<a href="https://colab.research.google.com/github/gglchrm/tarot_NN/blob/main/GPT_introduction_HABR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers
!pip install accelerate
!pip install torch torchdata transformers datasets loralib peft pandas numpy

In [None]:
# Restart session here

In [1]:
from transformers import LlamaTokenizer, LlamaForCausalLM
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import pandas as pd
import torch

model_name = "openlm-research/open_llama_3b_v2" # Base model to use
training_file = "tarot_readings.csv.1" # CSV file to use
num_epochs = 3 # Number of iterations to train
num_rows = 500 # Number of rows to use for training
device = torch.device("cuda:0") # cpu or cuda

In [2]:
def fine_tune_model(model, optimizer, batch, device):
    model.train()

    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    labels = batch['target_ids'].to(device)
    decoder_attention_mask = batch['target_attention_mask'].to(device)

    outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels, decoder_attention_mask=decoder_attention_mask)
    loss = outputs.loss
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss.item()

In [3]:
def tacot_reading(model, tokenizer, card1, card2, card3):
    prompt = "Give me a one paragraph tarot reading if I pull the cards {}, {} and {}.".format(card1, card2, card3)

    inputs = tokenizer(prompt, return_tensors="pt")
    completion = tokenizer.decode(model.generate(inputs["input_ids"], max_new_tokens=1000)[0], skip_special_tokens=True)

    print("Prompt: {}".format(prompt))
    print("Response: {}".format(completion))
    print()

    return completion

In [None]:
print("* Loading model [{}]...".format(model_name))
tokenizer = LlamaTokenizer.from_pretrained(model_name)
model = LlamaForCausalLM.from_pretrained(model_name).to(device)

In [None]:
print("* Running 3 inferences (pre-training)...")
tacot_reading(model, tokenizer, "The moon", "Two of Swords", "Three of Wands")
tacot_reading(model, tokenizer, "The hermit", "Ace of Pentacles", "Judgement")
tacot_reading(model, tokenizer, "Seven of Cups", "The chariot", "King of Swords")

In [None]:
print("* Creating dataset from [{}]...".format(training_file))
dataset = create_tarot_dataset(training_file, tokenizer, num_rows)
data_loader = DataLoader(dataset, batch_size=16, shuffle=True)

In [None]:
print("* Training model for {} epochs..".format(num_epochs))
optimizer = Adam(model.parameters(), lr=1e-4)
for epoch in range(num_epochs):
    loss = 0
    for batch in data_loader:
        loss += fine_tune_model(model, optimizer, batch, device)
    print("Epoch {} average loss: {}".format((epoch+1), (loss / len(data_loader))))

In [None]:
print("* Running 3 inferences (post-training)...")
tacot_reading(model, tokenizer, "The moon", "Two of Swords", "Three of Wands")
tacot_reading(model, tokenizer, "The hermit", "Ace of Pentacles", "Judgement")
tacot_reading(model, tokenizer, "Seven of Cups", "The chariot", "King of Swords")