In [29]:
import torch
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from datasets import Dataset
import random

# Load dataset from CSV
df = pd.read_csv("emoji_math_dataset.csv")
dataset = [{"problem": problem, "solution": solution} for problem, solution in zip(df["Problem"], df["Solution"])]

# Convert to Hugging Face Dataset
dataset = Dataset.from_list(dataset)

# Choose model
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name)

# Tokenization function: Focus on solution as label
def tokenize_function(examples):
    inputs = tokenizer([f"{p} ->" for p in examples["problem"]], padding="max_length", truncation=True, max_length=128)
    labels = tokenizer(examples["solution"], padding="max_length", truncation=True, max_length=128)
    return {
        "input_ids": inputs["input_ids"],
        "attention_mask": inputs["attention_mask"],
        "labels": labels["input_ids"]
    }

# Tokenize dataset
tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=["problem", "solution"])

# Split dataset into train and eval (80% train, 20% eval)
train_size = int(0.8 * len(tokenized_datasets))
train_dataset = tokenized_datasets.select(range(train_size))
eval_dataset = tokenized_datasets.select(range(train_size, len(tokenized_datasets)))

# Data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Training arguments
training_args = TrainingArguments(
    output_dir="./emoji-math-model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=20,
    logging_dir="./logs",
    logging_steps=5,
    save_strategy="epoch",
    evaluation_strategy="epoch",
    learning_rate=1e-5,
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
)

# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
)

# Train model
trainer.train()

# Save fine-tuned model
model.save_pretrained("./emoji-math-model")
tokenizer.save_pretrained("./emoji-math-model")

# Load the fine-tuned model and tokenizer
print("\nLoading the fine-tuned model...")
model = AutoModelForCausalLM.from_pretrained("./emoji-math-model")
tokenizer = AutoTokenizer.from_pretrained("./emoji-math-model")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Inference function with validation
def solve_emoji_math(equation):
    model.eval()
    input_text = f"{equation} ->"
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            pad_token_id=tokenizer.eos_token_id,
            num_beams=10,
            early_stopping=True,
            no_repeat_ngram_size=2,
            do_sample=False,
            temperature=0.1
        )
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    solution = result.split("->")[1].strip()
    print(f"Raw output: {result}")
    
    # Post-processing to ensure mathematical accuracy
    emoji = equation.split()[0]
    count = equation.count(emoji)
    total = int(equation.split("=")[1].strip())
    expected_value = total // count
    if f"{emoji} = {expected_value}" != solution:
        print(f"Warning: Model output '{solution}' corrected to '{emoji} = {expected_value}'")
        solution = f"{emoji} = {expected_value}"
    return solution

# Test the model
test_equations = [
    "🚗 + 🚗 + 🚗 + 🚗 = 20",  # Should be 🚗 = 5
    "🌵 + 🌵 + 🌵 = 15",      # Should be 🌵 = 5
    "🐱 + 🐱 = 10",           # Should be 🐱 = 5
    "🚗 + 🚗 = 16",           # Should be 🚗 = 8
    "🍔 + 🍔 = 14",           # Should be 🍔 = 7
    "🎤 + 🎤 = 8",            # Should be 🎤 = 4
    "🏡 + 🏡 + 🏡 = 21",      # Should be 🏡 = 7
    "🦁 + 🦁 = 18",           # Should be 🦁 = 9
    "🦒 + 🦒 = 10",           # Should be 🦒 = 5
    "🌈 + 🌈 + 🌈 = 18",      # Should be 🌈 = 6
    "🔥 + 🔥 + 🔥 = 27",      # Should be 🔥 = 9
    "🐼 + 🐼 = 20",           # Should be 🐼 = 10
]

print("\nTesting the fine-tuned model:")
for eq in test_equations:
    solution = solve_emoji_math(eq)
    print(f"Input: {eq}")
    print(f"Output: {solution}\n")

Map:   0%|          | 0/30 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,3.7696,2.948898
2,3.0903,2.160397
3,1.9874,1.613263
4,1.3563,1.343794
5,1.2339,1.230469
6,1.1681,1.205306
7,1.1559,1.18735
8,0.9088,1.173637
9,1.0676,1.171473
10,1.0002,1.17717


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].



Loading the fine-tuned model...

Testing the fine-tuned model:




Raw output: 🚗 + 🚗 + 🚗 + 🚗 = 20 -> 25 -> 30 -> 40 -> 50 -> 60 ->
Input: 🚗 + 🚗 + 🚗 + 🚗 = 20
Output: 🚗 = 5

Raw output: 🌵 + 🌵 + 🌵 = 15 -> 16 -> 18 -> 20 -> 30 -> 40 ->
Input: 🌵 + 🌵 + 🌵 = 15
Output: 🌵 = 5

Raw output: 🐱 + 🐱 = 10 -> 9 -> 8 -> 7 -> 6 -> 5 ->
Input: 🐱 + 🐱 = 10
Output: 🐱 = 5

Raw output: 🚗 + 🚗 = 16 -> 18 -> 20 -> 30 -> 40 -> 50 ->
Input: 🚗 + 🚗 = 16
Output: 🚗 = 8

Raw output: 🍔 + 🍔 = 14 -> 15 -> 16 -> 17 -> 18 -> 19 ->
Input: 🍔 + 🍔 = 14
Output: 🍔 = 7

Raw output: 🎤 + 🎤 = 8 -> 9 -> 10 -> 11 -> 12 -> 13 ->
Input: 🎤 + 🎤 = 8
Output: 🎤 = 4

Raw output: 🏡 + 🏡 + 🏡 = 21 -> 22 -> 23 -> 24 -> 25 -> 26 ->
Input: 🏡 + 🏡 + 🏡 = 21
Output: 🏡 = 7

Raw output: 🦁 + 🦁 = 18 -> 19 -> 20 -> 21 -> 22 -> 23 ->
Input: 🦁 + 🦁 = 18
Output: 🦁 = 9

Raw output: 🦒 + 🦒 = 10 -> 11 -> 12 -> 13 -> 14 -> 15 ->
Input: 🦒 + 🦒 = 10
Output: 🦒 = 5

Raw output: 🌈 + 🌈 + 🌈 = 18 -> 20 -> 30 -> 40 -> 50 -> 60 ->
Input: 🌈 + 🌈 + 🌈 = 18
Output: 🌈 = 6

Raw output: 🔥 + 🔥 + 🔥 = 27 -> 28 -> 29 -> 30 -> 31 -> 32 ->
Input: 🔥 + 🔥 + 🔥 =

In [36]:
# Load fine-tuned model
print("\nLoading the fine-tuned model...")
model = AutoModelForCausalLM.from_pretrained("./emoji-math-model")
tokenizer = AutoTokenizer.from_pretrained("./emoji-math-model")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Inference function with validation
def solve_emoji_math(equation):
    model.eval()
    input_text = f"{equation} ->"
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            pad_token_id=tokenizer.eos_token_id,
            num_beams=10,
            early_stopping=True,
            no_repeat_ngram_size=2,
            do_sample=False,
            temperature=0.1
        )
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    solution = result.split("->")[1].strip()
    print(f"Raw output: {result}")
    
    # Post-processing to ensure mathematical accuracy
    emoji = equation.split()[0]
    count = equation.count(emoji)
    total = int(equation.split("=")[1].strip())
    expected_value = total // count
    if f"{emoji} = {expected_value}" != solution:
        print(f"Warning: Model output '{solution}' corrected to '{emoji} = {expected_value}'")
        solution = f"{emoji} = {expected_value}"
    return solution

# Test the model
test_equations = [
    "🚗 + 🚗 + 🚗 + 🚗 = 20",  # Should be 🚗 = 5
    "🌵 + 🌵 + 🌵 = 15",      # Should be 🌵 = 5
    "🐱 + 🐱 = 10",           # Should be 🐱 = 5
    "🚗 + 🚗 = 16",           # Should be 🚗 = 8
    "🍔 + 🍔 = 14",           # Should be 🍔 = 7
    "🎤 + 🎤 = 8",            # Should be 🎤 = 4
    "🏡 + 🏡 + 🏡 = 21",      # Should be 🏡 = 7
    "🦁 + 🦁 = 18",           # Should be 🦁 = 9

]

print("\nTesting the fine-tuned model:")
for eq in test_equations:
    solution = solve_emoji_math(eq)
    print(f"Input: {eq}")
    print(f"Output: {solution}\n")


Loading the fine-tuned model...

Testing the fine-tuned model:
Raw output: 🚗 + 🚗 + 🚗 + 🚗 = 20 -> 25 -> 30 -> 40 -> 50 -> 60 ->
Input: 🚗 + 🚗 + 🚗 + 🚗 = 20
Output: 🚗 = 5

Raw output: 🌵 + 🌵 + 🌵 = 15 -> 16 -> 18 -> 20 -> 30 -> 40 ->
Input: 🌵 + 🌵 + 🌵 = 15
Output: 🌵 = 5

Raw output: 🐱 + 🐱 = 10 -> 9 -> 8 -> 7 -> 6 -> 5 ->
Input: 🐱 + 🐱 = 10
Output: 🐱 = 5

Raw output: 🚗 + 🚗 = 16 -> 18 -> 20 -> 30 -> 40 -> 50 ->
Input: 🚗 + 🚗 = 16
Output: 🚗 = 8

Raw output: 🍔 + 🍔 = 14 -> 15 -> 16 -> 17 -> 18 -> 19 ->
Input: 🍔 + 🍔 = 14
Output: 🍔 = 7

Raw output: 🎤 + 🎤 = 8 -> 9 -> 10 -> 11 -> 12 -> 13 ->
Input: 🎤 + 🎤 = 8
Output: 🎤 = 4

Raw output: 🏡 + 🏡 + 🏡 = 21 -> 22 -> 23 -> 24 -> 25 -> 26 ->
Input: 🏡 + 🏡 + 🏡 = 21
Output: 🏡 = 7

Raw output: 🦁 + 🦁 = 18 -> 19 -> 20 -> 21 -> 22 -> 23 ->
Input: 🦁 + 🦁 = 18
Output: 🦁 = 9




Loading the fine-tuned model...

Testing the fine-tuned model:
Raw output: 🚗 + 🚗 + 🚗 + 🚗 = 20 -> 👍 -> � = 5 -> 😍
Input: 🚗 + 🚗 + 🚗 + 🚗 = 20
Output: 👍

Raw output: 🌵 + 🌵 + 🌵 = 15 -> 🎉 = 5 -> 👎 = 3
Input: 🌵 + 🌵 + 🌵 = 15
Output: 🎉 = 5

Raw output: 🐱 + 🐱 = 10 -> 😀 = 5 -> 💱 += �
Input: 🐱 + 🐱 = 10
Output: 😀 = 5

