In [1]:
import csv

from data_preparation import all_datasets_exist, unpack_datasets, get_datasets, split_datasets, add_dangerous_data, get_test_datasets_only, load_existing_model
from training_and_evaluation_functions import train_model, eval_perplexity, save_scores, backup_file
from visualization_functions import read_csv_into_matrix, plot_perplexity_matrix

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
"""
## 8 LMs to use for prediction
1 - untrained small local model
2 - everything (small dataset of 2000 random recipes)
3 - everything (large dataset of 1M random recipes)
4 - All drinks** (Note down how many for the report)
5 - All bakery**
6 - All meals**
7 - First everything, then bakery*
8 - ChatGPT

*This could be done for meals and drinks, not necessary for the report.
**Excludes test

## 6 datasets to test on (should have 1000 recipes each)
1 - everything mixed
2 - drinks
3 - bakery
4 - meal
5 - shuffled recipe steps
6 - dangerous steps
"""

train_datasets = {"mixed_subset": "train_datasets/mixed_subset.csv", # 2
                 "all": "train_datasets/all.csv", # 3
                 "drinks": "train_datasets/drinks.csv", # 4
                 "bakery": "train_datasets/bakery.csv", # 5
                 "meal": "train_datasets/meal.csv", # 6
                 "all_but_bakery": "train_datasets/all_but_bakery.csv", # 7         
}

trained_models = {"untrained": "train_models/untrained/", # 1
                "mixed_subset": "train_models/mixed_subset/", # 2
                 "all": "train_models/all/", # 3
                 "drinks": "train_models/drinks/", # 4
                 "bakery": "train_models/bakery/", # 5
                 "meal": "train_models/meal/", # 6
                 "everything_then_bakery": "train_models/everything_then_bakery/", # 7  
    
}

test_datasets = {"all": "test_datasets/all.csv", # 1
                 "drinks": "test_datasets/drinks.csv", # 2
                 "bakery": "test_datasets/bakery.csv", # 3
                 "meal": "test_datasets/meal.csv", # 4
                 "shuffled_steps": "test_datasets/shuffled_steps.csv", # 5
                 "bad": "test_datasets/bad.csv", # 6             
                 }


In [3]:
# Represents the matrix that displays the results for each combination of trained model and testing set
annex = {"train": ["untrained", "mixed_subset", "all", "drinks", "bakery", "meal", "everything_then_bakery", "ChatGPT"],
           "test": ["all", "drinks", "bakery", "meal", "shuffled_steps", "bad"]}

# How does fine-tuning on recipes impact the quality of autocompleted instruction steps?
matrix1 = {"train": ["untrained", "ChatGPT", "mixed_subset", "all", "drinks", "bakery", "meal"],
           "test": ["all", "drinks", "bakery", "meal"]}

# For specific categories of recipes, what is the best training strategy?
matrix2 = {"train": ["untrained", "all", "bakery", "everything_then_bakery"],
           "test": ["bakery"]}

# Does the fine-tuning process increase or decrease the chance of dangerous instruction steps being generated?  
matrix3 = {"train": ["untrained", "ChatGPT", "mixed_subset", "all", "bakery"],
           "test": ["all", "shuffled_steps", "bad"]}

In [4]:
# This will create a bunch of CSV files in train_datasets and test_datasets if they don't all already exist 

original_dataset = "full_dataset.csv"

#if not all_datasets_exist(train_datasets, test_datasets):
unpack_datasets(train_datasets, test_datasets, original_dataset)

In [5]:
# Use existing models
Use_existing_models = True

# We want to display a matrix of perplexity scores for the two principal features (train_dataset, test_dataset)
base_model = "something"

# For now this order is the only way to know which test sets the perplexity scores are evaluated
# If it looks like there's one more category in the data, it's because it's the "dangerous data" which is
# a different category of dataset. This "dataset" takes the last index all the time
recipe_categories = ["main", "drinks", "bakery"]

saved_perplexity_file = "saved_perplexity.csv"

In [6]:
# Here we need to write the columns for the csv, they represent on which dataset the perplexity score is calculated on
# This is the X axis on the final matrix. Different from the X axis which is training dataset. 
# Needless to say when X = Y, the test and train data pick from the same category, but different recipes still.
csv_rows = ["recipe_train_dataset", "datetime"] + recipe_categories + ["bad"]

# Override the existing data
with open(saved_perplexity_file, 'w') as file:
    pass
        
with open(saved_perplexity_file, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=csv_rows)
    writer.writeheader()  # Write the column names


In [7]:
# datasets = get_datasets(recipe_categories=recipe_categories)
# train_datasets, test_datasets = split_datasets(datasets)

# train_datasets = [None] + train_datasets
dataset_names = ["untrained"] + recipe_categories



test_datasets = get_test_datasets_only(recipe_categories)
# test_datasets = add_dangerous_data(test_datasets)

for train_data, name in zip(dataset_names, dataset_names):
    print(f"OMG!!! training model on {name}")
    if Use_existing_models:
        trained_model, tokenizer = load_existing_model(name)
    else: 
        #if name == "untrained":
        #    trained_model = base_model
        #else: 
        #    trained_model = train_model(base_model, train_datasets)
        pass
    
    scores = []
    for test_data in test_datasets:
        perp = eval_perplexity(trained_model, tokenizer, test_data)
        scores.append(perp)
    save_scores(name, scores)
    
backup_file(saved_perplexity_file, "saved_perplexities")

OMG!!! training model on untrained




Measuring perplexity on test recipe number:  0
substep:  0
You are a chef-bot autocompleting a small part of a recipe: [START_OF_RECIPE] [RECIPE_TITLE] Petite Lasagna [INGREDIENTS_LIST] ["5 lasagna noodles (I used 6 and did not use the trimmed pieces)", "14 lb ground round", "14 lb ground sausage", "14 cup chopped onion", "1 12 teaspoons minced garlic", "12 teaspoon crushed red pepper flakes", "1 (4 ounce) can sliced mushrooms, drained", "1 12 cups spaghetti sauce", "1 egg, slightly beaten", "34 cup ricotta cheese", "2 tablespoons grated parmesan cheese", "2 teaspoons parsley flakes", "1 12 teaspoons italian seasoning", "1 14 cups shredded Italian cheese blend", "fresh ground pepper", "kosher salt"] [STEPS]  1 - Preheat oven 
substep:  1
You are a chef-bot autocompleting a small part of a recipe: [START_OF_RECIPE] [RECIPE_TITLE] Petite Lasagna [INGREDIENTS_LIST] ["5 lasagna noodles (I used 6 and did not use the trimmed pieces)", "14 lb ground round", "14 lb ground sausage", "14 cup cho

KeyboardInterrupt: 

In [None]:
matrix, Y_axis, X_axis = read_csv_into_matrix(saved_perplexity_file)
plot_perplexity_matrix(matrix, Y_axis, X_axis)