**RESEARCH: Fine tuning**

In [None]:
from huggingface_hub import login  
login()

In [1]:
import torch
from transformers import pipeline, AutoTokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling, TextDataset
from datasets import load_dataset
from sklearn.model_selection import train_test_split

# Model name: AI-Sweden-Models/gpt-sw3-126m, AI-Sweden-Models/gpt-sw3-1.3b
model_name = "AI-Sweden-Models/gpt-sw3-126m"
device = "cuda:0" if torch.cuda.is_available() else "cpu" #in case i run this on my laptop or pc

In [2]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
model = GPT2LMHeadModel.from_pretrained(model_name)

In [3]:
# train = load_dataset("amcoff/skolmat", split = "train[:80%]")
# test = load_dataset("amcoff/skolmat", split = "train[-20%:]")

import pandas as pd

df = pd.read_csv("meals.csv")
train, test = train_test_split(df, test_size=0.2, random_state=42)
train.to_csv("train.csv", index=False)
test.to_csv("test.csv", index=False)

train_data = TextDataset(tokenizer=tokenizer, file_path="train.csv", block_size=256)
test_data = TextDataset(tokenizer=tokenizer, file_path="test.csv", block_size=256)




In [4]:
import evaluate
import numpy as np

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

def compute_metrics(eval_pred):
    metric = evaluate.load_metric("accuracy")
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [5]:

training_args = TrainingArguments(output_dir="./results" 
                                ,overwrite_output_dir=True
                                ,num_train_epochs=3
                                , per_device_train_batch_size= 4
                                , per_device_eval_batch_size= 8
                                , save_steps=800
                                , eval_steps=400
                                , warmup_steps=500
                                )

# initialize Trainer
trainer = Trainer(
    model = model,                         # the instantiated 🤗 Transformers model to be trained
    args = training_args,          #
    data_collator = data_collator,
    train_dataset = train_data,
    eval_dataset = test_data
)

trainer.train()
trainer.save_model()



  0%|          | 0/33 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 16.00 MiB (GPU 0; 6.00 GiB total capacity; 5.11 GiB already allocated; 0 bytes free; 5.29 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
from transformers import pipeline

pipe = pipeline("text-generation", model = './results', tokenizer = tokenizer)
pipe("FOOD ", max_length=30)

In [None]:
def generate_text(prompt):
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)

    output = model.generate(
        inputs = input_ids,
        max_new_tokens = 200,
        do_sample=True,
        top_p=1, temperature=0.7
        )
    return tokenizer.decode(output[0])

prompt = "Följande är en konversation mellan en besökare på museet och en guide. Guiden arbetar på museet. Guiden är hjälpsam, informativ och mycket vänlig. \n" \
            "Museet innehåller tre utställningar, den första utställningen heter 'Hitta nemo igen' av Hermann Gustafsson \n\n\n"\
            "Besökare: Hej, jag är här för att se utställningen 'Hitta nemo igen', och jag har en fråga!\n" \
            "Guide: Hej, vad kul! Vad vill du veta om utställningen?\n" \
            "Besökare: Vet du vem som skapade utställningen?\n" 
print(generate_text(prompt))         