In [6]:
#!pip install datasets
# pip install accelerate
from huggingface_hub import login         #1-Authentifiziert mich bei Hugging Face
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForLanguageModeling, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset
import torch

login() #2-Authentifiziert mich bei Hugging Face

##Vorbereitung für mich startet hier:
def check_gpu():                            #Checkt ob die GPU aktiv ist
   if torch.cuda.is_available():
       print("GPU is available")
       print(torch.cuda.get_device_name(0))
   else:
       print("GPU is not available")
check_gpu()


# Model und Tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/gemma-1.1-2b-it")   #Lädt das Model und Tokenizer
model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-1.1-2b-it",   #Model-typ (gemma instruction tuned)
    device_map="auto",          #Nutzt GPU automatisch
    torch_dtype=torch.bfloat16 ##entfernt aufgrund von Fehler (versucht Meta Tensor zu erschaffen ohne Daten)  #Spart Speicher
)
model = model.to("cuda") #Lädt Model auf die GPU, spezifisch Cuda
## Lora Einrichten
lora_config = LoraConfig(
    r=8,                          # LoRA-Rank = Anzahl der Low-Rank-Dimensionen (je kleiner, desto effizienter)
    lora_alpha=16,                #Skalierungsfaktor der LoRA-Gewichte – wie stark beeinflusst LoRA das Modell
    lora_dropout=0.1,             #Dropout auf LoRA-Layer -> hilft bei Regularisierung
    bias="none",                  #Trainiere keine Bias-Parameter (nur LoRA)
    task_type=TaskType.CAUSAL_LM, #Typ der Aufgabe -> wichtig, damit LoRA die richtigen Layer patcht (hier: Textgenerierung)
)

model = get_peft_model(model, lora_config) #Umwandlung in Fine-Tunebares Model
model.print_trainable_parameters()        #Kontrolliert ob nur die LoRA-Schichten trainieren

## Daten werden vorbereitet und geladen
dataset = load_dataset("yelp_polarity", split="train[:500]")  # Aktuell noch test daten, kein Datenset vorhanden!

def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)

tokenized = dataset.map(tokenize, batched=True)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir="./gemma-lora",
    per_device_train_batch_size=4,
    num_train_epochs=1,
    fp16=True,
    logging_steps=10,
    save_steps=100,
    save_total_limit=1,
)
#Training definieren
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    data_collator=data_collator,
)
#Model trainieren
trainer.train()
#LoRA-Finetuning speichern
model.save_pretrained("gemma-lora-finetuned")
tokenizer.save_pretrained("gemma-lora-finetuned")


#Testet Model ohne Fine Tuning (glaube ich)
input_text = "Write me a poem about Machine Learning."
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")

#Ausgabe Anzeigen
outputs = model.generate(**input_ids)
print(tokenizer.decode(outputs[0]))



VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

GPU is available
Tesla T4


tokenizer_config.json:   0%|          | 0.00/34.2k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/618 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

trainable params: 921,600 || all params: 2,507,094,016 || trainable%: 0.0368


README.md:   0%|          | 0.00/8.93k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/256M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/17.7M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/560000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/38000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mm-ramackers[0m ([33mm-ramackers-universit-t-des-saarlandes-saarland-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,5.5018
20,4.817
30,4.5601
40,4.2887
50,4.3403
60,3.8772
70,3.6505
80,3.6703
90,3.7246
100,3.6289


<bos>Write me a poem about Machine Learning.

In circuits deep, where data flows unseen,
A mind of metal, where algorithms reign.


# Neuer Abschnitt