**RESEARCH: Fine tuning**

In [None]:
from huggingface_hub import login  
login()

In [2]:
import torch
from transformers import pipeline, AutoTokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling, TextDataset
from datasets import load_dataset
from sklearn.model_selection import train_test_split

# Model name: AI-Sweden-Models/gpt-sw3-126m, AI-Sweden-Models/gpt-sw3-1.3b
model_name = "AI-Sweden-Models/gpt-sw3-126m"
device = "cuda:0" if torch.cuda.is_available() else "cpu" #in case i run this on my laptop or pc

In [3]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
model = GPT2LMHeadModel.from_pretrained(model_name)

In [3]:
import pandas as pd

df = pd.read_csv("meals.csv")
train, test = train_test_split(df, test_size=0.2, random_state=42)
train.to_csv("train.csv", index=False)
test.to_csv("test.csv", index=False)

train_data = TextDataset(tokenizer=tokenizer, file_path="train.csv", block_size=256)
test_data = TextDataset(tokenizer=tokenizer, file_path="test.csv", block_size=256)



In [4]:
import evaluate
import numpy as np

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

def compute_metrics(eval_pred):
    metric = evaluate.load_metric("accuracy")
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:

training_args = TrainingArguments(output_dir="./results" 
                                ,overwrite_output_dir=True
                                ,num_train_epochs=3
                                , per_device_train_batch_size= 4
                                , per_device_eval_batch_size= 8
                                , save_steps=800
                                , eval_steps=400
                                , warmup_steps=500
                                )

# initialize Trainer
trainer = Trainer(
    model = model,                         # the instantiated 🤗 Transformers model to be trained
    args = training_args,          
    data_collator = data_collator,
    train_dataset = train_data,
    eval_dataset = test_data
)

trainer.train()
trainer.save_model()

In [None]:
from transformers import pipeline
pipe = pipeline("text-generation", model = './results', tokenizer = tokenizer)

In [15]:
pipe("Ge mig ett recept med pannkakor\n ", max_length=30)

[{'generated_text': 'Ge mig ett recept med pannkakor\n \nPannkakor med ägg och mjölk \n \nPannkakor med ägg och mjölk \n'}]