If using google drive please edit this line to connect to drive location

In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from google.colab import drive
import os, sys

# Mount the google colab
drive.mount("/content/drive/")
GOOGLE_DRIVE_PATH_POST_MYDRIVE = 'DeepLearning_GroupProject/'
GOOGLE_DRIVE_PATH = os.path.join('/content', 'drive', 'MyDrive', GOOGLE_DRIVE_PATH_POST_MYDRIVE)
!ls {GOOGLE_DRIVE_PATH}
sys.path.append(GOOGLE_DRIVE_PATH)

# GOOGLE_DRIVE_PATH = '.'

# relative paths
models_dir = 'models'
csv_path = 'Data/updated_data_with_lists.csv'
tensors_dir = 'Data/tensor_batches'

In [3]:
import pandas as pd
from ast import literal_eval
import numpy as np
import torch

# Explicitly adding models to the search path
models_path = os.path.join(GOOGLE_DRIVE_PATH, models_dir)
if models_path not in sys.path:
    sys.path.insert(0, models_path)

from models import recipe_encoder

csv_file = os.path.join(GOOGLE_DRIVE_PATH,csv_path)
df = pd.read_csv(csv_file, converters={"Cleaned_Ingredients": literal_eval, "Instructions": literal_eval})
print(len(df))
df.head()

13496


Unnamed: 0.1,Unnamed: 0,Title,Instructions,Image_Name,Cleaned_Ingredients
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"[Pat chicken dry with paper towels, season all...",miso-butter-roast-chicken-acorn-squash-panzanella,"[1 (3½–4-lb.) whole chicken, 2¾ tsp. kosher sa..."
1,1,Crispy Salt and Pepper Potatoes,[Preheat oven to 400°F and line a rimmed bakin...,crispy-salt-and-pepper-potatoes-dan-kluger,"[2 large egg whites, 1 pound new potatoes (abo..."
2,2,Thanksgiving Mac and Cheese,[Place a rack in middle of oven; preheat to 40...,thanksgiving-mac-and-cheese-erick-williams,"[1 cup evaporated milk, 1 cup whole milk, 1 ts..."
3,3,Italian Sausage and Bread Stuffing,[Preheat oven to 350°F with rack in middle. Ge...,italian-sausage-and-bread-stuffing-240559,"[1 (¾- to 1-pound) round Italian loaf, cut int..."
4,4,Newton's Law,[Stir together brown sugar and hot water in a ...,newtons-law-apple-bourbon-cocktail,"[1 teaspoon dark brown sugar, 1 teaspoon hot w..."


Concatenate the batches of preprocessed images into 1 tensor

In [4]:
pt_files = os.listdir(os.path.join(GOOGLE_DRIVE_PATH,tensors_dir))
all_image_tensors = []
all_image_labels = []

# Load and combine all .pt files
for pt_file in pt_files[:5]:
    pt_filepath = os.path.join(GOOGLE_DRIVE_PATH,tensors_dir,pt_file)
    image_tensors, image_labels = torch.load(pt_filepath)
    all_image_tensors.append(image_tensors)
    all_image_labels.extend(image_labels)
    print(f"Loaded {pt_file}")

# Concatenate tensors
all_image_tensors = torch.cat(all_image_tensors)
print(f"Number of images: {all_image_tensors.size(0)}")
print(f"Number of labels: {len(all_image_labels)}")
assert all_image_tensors.size(0) == len(all_image_labels), "Mismatch between images and labels!"

  image_tensors, image_labels = torch.load(pt_filepath)


Loaded batch_6.pt
Loaded batch_7.pt
Loaded batch_0.pt
Loaded batch_11.pt
Loaded batch_10.pt
Number of images: 5000
Number of labels: 5000


Reorganize dataframe to be in the same order as the Image Tensors using the image_label as the matching key

In [5]:
##Reset order of dataframe to match the image labels orders
all_image_labels_cleaned = [label.split(".")[0] for label in all_image_labels] #remove the .png and .jgp
print(all_image_labels_cleaned[:10]) #print to see if at the end the df matches

filtered_df = df[df["Image_Name"].isin(all_image_labels_cleaned)] #filter the df to only have values from the images
print(filtered_df["Image_Name"][:10])
print(df.shape)
print(filtered_df.shape, len(all_image_labels_cleaned))

valid_labels = set(filtered_df['Image_Name'])

# Filter labels and tensors
filtered_labels_and_tensors = [
    (tensor, label) for tensor, label in zip(all_image_tensors, all_image_labels_cleaned) if label in valid_labels
]

# Unpack the filtered data
filtered_tensors, filtered_labels = zip(*filtered_labels_and_tensors)

# Convert back to tensors
filtered_tensors = torch.stack(filtered_tensors)
filtered_labels = list(filtered_labels)

# Verify alignment
print(f"Number of filtered tensors: {len(filtered_tensors)}")
print(f"Number of filtered labels: {len(filtered_labels)}")
print(f"Number of rows in filtered_df: {len(filtered_df)}")

# Finally reorganize the df to be in the same order as the image tensors
filtered_df = filtered_df.set_index("Image_Name").loc[filtered_labels].reset_index()
print(filtered_df["Image_Name"][:10])

['lemon-rhubarb-chicken-237100', 'maple-soy-barbecue-grilled-chicken', 'little-gem-wedge-salad-with-tahini-ranch', 'lemon-balm-honeysuckle-366697', 'nectarine-and-peach-salad-with-pecans-blue-cheese-and-lavender-syrup', 'old-fashioned-fruitcake-cookies-350832', 'kona-swizzle-394668', 'lemon-souffles-with-boysenberries-241606', 'lemony-green-beans-and-peas-368550', 'miso-cured-black-cod-with-chilled-cucumbers-56389995']
1      crispy-salt-and-pepper-potatoes-dan-kluger
3       italian-sausage-and-bread-stuffing-240559
4              newtons-law-apple-bourbon-cocktail
5            warm-comfort-tequila-chamomile-toddy
7               turmeric-hot-toddy-claire-sprouse
10       hot-pimento-cheese-dip-polina-chesnakova
12             butternut-squash-apple-soup-365210
13                     caesar-salad-roast-chicken
14    chicken-and-rice-with-leeks-and-salsa-verde
17                   caramelized-plantain-parfait
Name: Image_Name, dtype: object
(13496, 5)
(4969, 5) 5000
Number of filtered 

Reformat Ingredients, Recipes, and Image titles

In [6]:
column = filtered_df["Cleaned_Ingredients"]
# item_list = [word
#                     for w_list in column
#                     for str_list in w_list
#                     # Make period and comma separate words
#                     # # Remove parentheses and quotes
#                     for word in str_list.replace('.', ' . ').replace(',',' , ').replace('(','').replace(')','').replace('"','').split()]

item_list = []
element_list = []
ingr_max = 0
print("Column length", len(column))
for w_list in column:
    tmp =[]
    for str_list in w_list:
        # for word in str_list:
        formatted = str_list.replace('.', ' . ').replace(',',' , ').replace('(',' ( ').replace(')',' ) ').replace('"','').split()
        item_list.extend(formatted)
        new_len = len(formatted)
        tmp.extend(formatted)
        if new_len>ingr_max:
            ingr_max=new_len
    element_list.append(tmp)

print(item_list[:10])
print(element_list[0])
print("Len Elements:", len(element_list))
ingr_vocab = set(item_list) #We need to keep these even if they are redundant so image tensors stay matching
ingr_vocab_size = len(ingr_vocab)
print("Size:", ingr_vocab_size)
print("Max:", ingr_max)
print(ingr_vocab)

Column length 4969
['5', 'tablespoons', 'olive', 'oil', ',', 'divided', '2', 'tablespoons', 'plus', '1/4']
['5', 'tablespoons', 'olive', 'oil', ',', 'divided', '2', 'tablespoons', 'plus', '1/4', 'cup', 'chopped', 'shallots', '4', '1/2', 'cups', 'diced', 'rhubarb', ',', 'divided', '1', 'tablespoon', 'fresh', 'lemon', 'juice', '2', 'teaspoons', 'finely', 'grated', 'lemon', 'peel', ',', 'divided', '1/4', 'cup', '(', '1/2', 'stick', ')', 'butter', '1/2', 'cup', 'sliced', 'unpeeled', 'fresh', 'ginger', '3/4', 'cup', 'sugar', '6', 'tablespoons', 'brandy', '4', 'cups', 'low-salt', 'chicken', 'broth', '1', 'whole', 'star', 'anise', '1', 'bay', 'leaf', '6', 'boneless', 'chicken', 'breast', 'halves', 'with', 'skin']
Len Elements: 4969
Size: 8110
Max: 110
{'cordyceps', 'unless', 'Noir', '*Passion', 'tablespoons/75', 'endive', 'Mochiko', 'OR', '266', 'Ore', 'Napoléon', 'quick-rising', 'dash', 'Ginger', 'filet', 'leg*', '9x5', 'clear', 'rindless', '2C/', 'restaurant-style', '2-thick', 'spicier', 'h

In [7]:
column = filtered_df["Instructions"]
# item_list = [word
#                     for w_list in column
#                     for str_list in w_list
#                     # Make period and comma separate words
#                     # # Remove parentheses and quotes
#                     for word in str_list.replace('.', ' . ').replace(',',' , ').replace('(','').replace(')','').replace('"','').split()]

item_list = []
inst_max = 0
for w_list in column:
    for str_list in w_list:
        # for word in str_list:
        formatted = str_list.replace('.', ' . ').replace(',',' , ').replace('(','').replace(')','').replace('"','').split()
        item_list.extend(formatted)
        new_len = len(formatted)
        if new_len>inst_max:
            inst_max=new_len

print(item_list[:10])
inst_vocab = set(item_list)
inst_vocab_size = len(inst_vocab)
print("Size:",inst_vocab_size)
print("Max:", inst_max)

['Heat', '2', 'tablespoons', 'olive', 'oil', 'in', 'heavy', 'large', 'skillet', 'over']
Size: 13246
Max: 238


In [8]:
column = filtered_df["Title"].to_list()
# item_list = [word
#                 for str_list in column
#     #                     # Make period and comma separate words
#     #                     # # Remove parentheses and quotes
#                 for word in str_list.replace('.', ' . ').replace(',',' , ').replace('(','').replace(')','').replace('"','').split()
#                 if word]

item_list = []
title_max = 0
for str_list in column:
        # for word in str_list:
        formatted = str_list.replace('.', ' . ').replace(',',' , ').replace('(','').replace(')','').replace('"','').split()
        item_list.extend(formatted)
        new_len = len(formatted)
        if new_len>title_max:
            title_max=new_len


print(item_list[:10])
title_vocab = set(item_list)
title_vocab_size = len(title_vocab)
print("Size:", title_vocab_size)
print("Max:", title_max)

['Lemon-Rhubarb', 'Chicken', 'Maple', 'Barbecue', 'Grilled', 'Chicken', 'Little', 'Gem', 'Wedge', 'Salad']
Size: 3767
Max: 17


In [9]:
total_vocab = set(list(inst_vocab) + list(ingr_vocab) + list(title_vocab))
total_vocab_size = len(total_vocab)
print("Size:",total_vocab_size)
total_max = max([inst_max, ingr_max, title_max])
print("Max:",total_max)

Size: 19381
Max: 238


Tokenize Recipes, Ingredients, and Image Titles

In [10]:
from collections import Counter
# word_to_ix = {word: i for i, word in enumerate(total_vocab)}

# word_to_ix_ingr = {word: i for i, word in enumerate(ingr_vocab)}
# word_to_ix_inst = {word: i for i, word in enumerate(inst_vocab)}
# word_to_ix_title = {word: i for i, word in enumerate(title_vocab)}

word_to_ix = {word: i for i, (word,empty) in enumerate(Counter(total_vocab).most_common(), start=1)}
word_to_ix['<PAD>'] = 0
print(word_to_ix)



In [11]:
total_max = 128

In [12]:
from transformers import AutoTokenizer

# Initialize the tokenizer
tokenizer_recipes = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize_nested_list(nested_list, max_length=128):
    """
    Tokenizes a nested list of strings (list of ingredients per recipe).
    Each inner list is tokenized into a list of token IDs.
    """
    tokenized_list = []
    for sublist in nested_list:
        # Join the inner list into a string
        # text = " ".join(sublist)
        text = str(sublist)
        # Tokenize the string
        tokens = tokenizer_recipes(
            text,
            max_length=max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )
        # Append tokenized input_ids to the result list
        tokenized_list.append(tokens["input_ids"].squeeze(0).tolist())
    return tokenized_list

filtered_df['Title_List'] = df['Title'].apply(lambda x: [x])
filtered_df['tokenized_ingredients'] = filtered_df['Cleaned_Ingredients'].apply(lambda x: tokenize_nested_list(x, total_max))
filtered_df['tokenized_instructions'] = filtered_df['Instructions'].apply(lambda x: tokenize_nested_list(x, total_max))
filtered_df['tokenized_titles'] = filtered_df['Title_List'].apply(lambda x: tokenize_nested_list(x, total_max))


# # Store tokenized data in a dictionary for your DataLoader
# filtered_df["tokenized_titles"] = title_tokens["input_ids"]
# filtered_df["title_attention_mask"] = title_tokens["attention_mask"]

# filtered_df["tokenized_ingredients"] = ingredients_tokens["input_ids"]
# filtered_df["ingredients_attention_mask"] = ingredients_tokens["attention_mask"]

# filtered_df["tokenized_instructions"] = instructions_tokens["input_ids"]
# filtered_df["instructions_attention_mask"] = instructions_tokens["attention_mask"]
print(len(filtered_df["tokenized_titles"][0]))
print(filtered_df["tokenized_ingredients"][0])
print(filtered_df["tokenized_instructions"][0])


  from .autonotebook import tqdm as notebook_tqdm


1
[[101, 1019, 7251, 24667, 3619, 9724, 3514, 1010, 4055, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1016, 7251, 24667, 3619, 4606, 1015, 1013, 1018, 2452, 24881, 4618, 12868, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1018, 1015, 1013, 1016, 10268, 18740, 2094, 1054, 6979, 8237, 2497, 1010, 4055, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [13]:
print(len(filtered_df["tokenized_titles"][1]))
print(len(filtered_df["tokenized_ingredients"][3]))
print(len(filtered_df["tokenized_instructions"][1]))

1
5
26


In [14]:
# def tokenize_text(list_object, vocab, max_length):
#     tokenized_list = []
#     # Tokenize each sentence
#     for sentence in list_object:
#       formatted = sentence.replace('.', ' . ').replace(',',' , ').replace('(','').replace(')','').replace('"','').split()
#       tokens = [vocab.get(word, vocab['<PAD>']) for word in formatted]
#       # Pad or truncate to max_length
#       if len(tokens) < max_length:
#           tokens.extend([vocab['<PAD>']] * (max_length - len(tokens)))
#       else:
#           tokens = tokens[:max_length]
#       # Append the tokenized sentence to the list
#       tokenized_list.append(tokens)

#     return tokenized_list
# # Tokenize all columns
# filtered_df['Title_List'] = df['Title'].apply(lambda x: [x])
# filtered_df['tokenized_ingredients'] = filtered_df['Cleaned_Ingredients'].apply(lambda x: tokenize_text(x, word_to_ix,total_max))
# filtered_df['tokenized_instructions'] = filtered_df['Instructions'].apply(lambda x: tokenize_text(x, word_to_ix,total_max))
# filtered_df['tokenized_titles'] = filtered_df['Title_List'].apply(lambda x: tokenize_text(x, word_to_ix,total_max))
# # print(filtered_df['Title_List'])
# # print(filtered_df[['tokenized_ingredients', 'tokenized_instructions', 'tokenized_titles', 'Cleaned_Ingredients', 'Instructions', 'Title']])

# print(max_length_ing, max_length_inst, max_length_title)

In [15]:
def pad_or_truncate_nested(sublist, target_length, max_length, pad_token=0):
        """
            Pad or truncate the outer list of a nested list to match the target_length.
            Each inner list remains untouched.
        """
        # Pad with [pad_token] or truncate the outer list
        if len(sublist) < target_length:
            sublist.extend([[pad_token]* max_length] * (target_length - len(sublist)))
        else:
            sublist = sublist[:target_length]
        return sublist

max_length_ing = filtered_df['tokenized_ingredients'].apply(len).max()
max_length_inst = filtered_df['tokenized_instructions'].apply(len).max()//4
max_length_title = filtered_df['tokenized_titles'].apply(len).max()
filtered_df['tokenized_ingredients'] = filtered_df['tokenized_ingredients'].apply(
    lambda ing: pad_or_truncate_nested(ing, max_length_ing,total_max))

filtered_df['tokenized_instructions'] = filtered_df['tokenized_instructions'].apply(
    lambda inst: pad_or_truncate_nested(inst, max_length_inst, total_max))
# new_token_ing = [pad_or_truncate_nested(ing, max_length_title) for ing in tokenized_ingredients] #titles were all list length of 1
max_length_ing = filtered_df['tokenized_ingredients'].apply(len).max()
max_length_inst = filtered_df['tokenized_instructions'].apply(len).max()
max_length_title = filtered_df['tokenized_titles'].apply(len).max()
print(max_length_ing, max_length_inst, max_length_title)
print(len(filtered_df['tokenized_ingredients'][5]))
print(filtered_df['tokenized_ingredients'][100])

51 22 1
51
[[101, 1015, 9044, 2598, 4977, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1017, 1013, 1018, 2452, 4840, 7852, 13675, 25438, 2015, 1006, 2013, 1016, 25609, 2422, 2878, 1011, 10500, 7852, 1007, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1015, 1013, 1016, 2452, 22126, 24881, 29259, 2401, 2030, 2060, 4086, 24444, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

Tokenize the Image Labels for the Image Encoder

In [16]:
from transformers import AutoTokenizer
from transformers import CLIPModel
model_name = "openai/clip-vit-base-patch16"
clip_model = CLIPModel.from_pretrained(model_name)
tokenizer_images = AutoTokenizer.from_pretrained(model_name)

tokenized_labels = tokenizer_images(
    filtered_labels,
    padding="max_length",
    truncation=True,
    max_length=tokenizer_images.model_max_length,
    return_tensors="pt"
)

In [17]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [18]:
from torch.utils.data import Dataset, DataLoader, random_split
import torch
from models.image_2_recipe import Image2Recipe
from models.image_encoder import Image_Encoder
from models.recipe_encoder import RecipeEncoder
from models.MMR import MMR
from models.MMR import MMR_losses
import matplotlib.pyplot as plt

class Data_Loading(Dataset):
    """
    Class to combine the Images, Labels, Recipes together to be used in combination when inputted into Model
    """
    def __init__(self, tokenized_ingredients, tokenized_instructions, tokenized_titles, image_tensors, tokenized_labels):
        self.ingredients = torch.tensor(tokenized_ingredients, dtype=torch.int16)
        self.instructions = torch.tensor(tokenized_instructions, dtype=torch.int16)
        self.titles = torch.tensor(tokenized_titles, dtype=torch.int16)
        self.images = image_tensors
        self.tokenized_labels = tokenized_labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return {
            "ingredients": self.ingredients[idx],
            "instructions": self.instructions[idx],
            "titles": self.titles[idx],
            "images": self.images[idx],
            "tokenized_labels": {
                "input_ids": self.tokenized_labels['input_ids'][idx].to(dtype=torch.long),
                "attention_mask": self.tokenized_labels['attention_mask'][idx].to(dtype=torch.uint8)
            }
        }



class Trainer(object):
    """
    Class designed to run ViT (train, evaluate, plot)
    """

    def __init__(self, **kwargs):
        """
        Initialize ViT
        """
        self.epochs = kwargs['epochs']
        self.optimizer_name = kwargs['optimizer']
        self.device = kwargs['device']
        self.batch_size = kwargs['batch_size']
        self.lr = kwargs['learning_rate']

        self.tokenized_ingredients = kwargs['ingredient_tokens']
        self.tokenized_instructions = kwargs['instruction_tokens']
        self.tokenized_title = kwargs['title_tokens']
        self.image_tensor = kwargs['image_tensors']
        self.image_labels = kwargs['image_labels']
        self.clip_model = kwargs['clip_model']
        self.vocab_size = kwargs['vocab_size']
        self.max_len = kwargs['max_len']
        self.instance_weight = kwargs['instance_weight']
        self.sem_weight = kwargs['sem_weight']
        self.itm_weight = kwargs['itm_weight']
        # Pending variable margin calc
        self.loss_calcs = MMR_losses(margin=1.0, instance_weight=self.instance_weight, sem_weight=self.sem_weight, itm_weight=self.itm_weight)
        num_classes = len(self.image_labels)


        self.image_encoder = Image_Encoder(self.device, self.clip_model, num_classes).to(self.device)
        self.recipe_encoder = RecipeEncoder(self.device, self.vocab_size, self.max_len).to(self.device)
        self.mmr = MMR(hidden_dim=self.image_encoder.clip_model.config.projection_dim).to(self.device)
        # MMR varaibles: num_heads=num_heads, ITEM_lyrs=ITEM_lyrs, MTD_lyrs=MTD_lyrs, projection_dim=projection_dim
        self.model = Image2Recipe(self.image_encoder, self.recipe_encoder, self.mmr).to(self.device)


        ##DO we want to tune each of these learning rates for each model?
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        # self.optimizer = torch.optim.AdamW([
        #     {"params": self.model.image_encoder.parent_model.parameters(), "lr": 1e-6},
        #     {"params": self.model.recipe_encoder.parameters(), "lr": 1e-5},
        #     {"params": self.model.image_encoder.fc1.parameters(), "lr": 1e-5},
        #     {"params": self.model.recipe_encoder.ll_e.parameters(), "lr": 1e-5},
        # ])


        #Combine Images, Recipes, Instructions in training and eval datasets
        self.data_total = Data_Loading(
            self.tokenized_ingredients,
            self.tokenized_instructions,
            self.tokenized_title,
            self.image_tensor,
            self.image_labels
        )
        training_perc = .9
        train_size = int(training_perc * len(self.data_total))
        eval_size = len(self.data_total) - train_size
        train_dataset, eval_dataset = random_split(self.data_total, [train_size, eval_size])
        self.dataloader = {}
        self.dataloader['train'] = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
        self.dataloader['eval'] = DataLoader(eval_dataset, batch_size=self.batch_size, shuffle=False)

        #Lists to fill up during training and plotted later for learning curves
        self.train_loss_list = []
        self.eval_loss_list = []
        self.eval_acc_list = []
        self.eval_acc_list = []
        print("finished initializing")


    def train(self):
        """
        Train ViT, image encoder, recipe encoder, MMR
        """

        for epoch in range(self.epochs):

            for phase in ['train', 'eval']:
                total_loss = 0
                total_accuracy = 0
                if phase == 'train':
                    self.model.train()
                else:
                    self.model.eval()
                print("updated")
                for i, batch_data in enumerate(self.dataloader[phase]):
                    #Looping through batches of training data then eval data each epoch
                    #TODO: Add how the recipe, instructions, and titles will be tokenized
                    ingredients, instructions, titles, images, image_labels = (
                        batch_data['ingredients'].to(self.device),
                        batch_data['instructions'].to(self.device),
                        batch_data['titles'].to(self.device),
                        batch_data['images'].to(self.device),
                        batch_data['tokenized_labels']
                    )

                    recipe_enc_src = [titles, ingredients, instructions]
                    self.optimizer.zero_grad()

                    if phase == 'train':
                        output = self.model(images, image_labels, recipe_enc_src)
                        ##Combine the Recipe Encoder Losses and Image Encoder Losses based on TFOOD
                        mmr_logits = output["mmr_logits"]
                        image_logits = output["image_logits"]
                        image_embeddings_proj = output["image_embeddings"]
                        recipe_embeddings_proj = output["recipe_embeddings"]
                        # print("MMR_Logits", mmr_logits)
                        # print("image_logits", image_logits)
                        # print("image_embeddings_proj", image_embeddings_proj)
                        # print("recipe_embeddings_proj", recipe_embeddings_proj)

                        # I am assuming that the image and recipe logits give you a classification score that correspond to the label?
                        # Do we want to add classification to the loss?
                        # Using baseline all image-recipe pairs match btw.. maybe dont wat to do that?
                        loss = self.loss_calcs.total_loss(image_logits, image_embeddings_proj, recipe_embeddings_proj, mmr_logits)
                        self.train_loss_list.append(loss.item())
                        loss.backward()
                        self.optimizer.step()

                    else: ##Eval mode
                        with torch.no_grad():
                            output = self.model(images, image_labels, recipe_enc_src)
                            loss = self.loss_calcs.total_eval_loss(image_logits, image_embeddings_proj, recipe_embeddings_proj)
                            self.eval_loss_list.append(loss.item())


                    # del unused_tensor
                    torch.cuda.empty_cache() #clear cache after each batch
                    print(i)
                    # print(output)
                    # total_loss += loss.item()

                print(f"{phase}: Epoch {epoch+1}, Loss: {total_loss / len(self.dataloader[phase])}")
                break


    ##Waiting on training code to finish
    def plot_learning_loss_curves(self):
        """
        Plot accuracy and loss curves for training and eval accuracy/loss lists (item/epoch)
        """
        plt.figure(figsize=(10, 5))
        plt.plot(self.train_loss_list, label='Training Loss')
        plt.plot(self.eval_loss_list, label='Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Loss Curve')
        plt.legend()
        plt.show()

        plt.figure(figsize=(10, 5))
        plt.plot(self.train_acc_list, label='Training Accuracy')
        plt.plot(self.eval_acc_list, label='Validation Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.title('Accuracy Curve')
        plt.legend()
        plt.show()


In [19]:
print(len(filtered_df['tokenized_ingredients'][1][0]))
print(len(filtered_df['tokenized_instructions'][1][0]))
print(len(filtered_df['tokenized_titles'][1][0]))

128
128
128


In [20]:
# !export CUDA_LAUNCH_BLOCKING=1


In [21]:
# torch.cuda.empty_cache()

In [None]:
# Cuda issues
print(torch.__version__)
print(torch.cuda.is_available())
vocab_size = tokenizer_recipes.vocab_size
kwargs = {
    'epochs': 10,
    'ingredient_tokens': filtered_df['tokenized_ingredients'].to_list(),
    'instruction_tokens': filtered_df['tokenized_instructions'].to_list(),
    'title_tokens': filtered_df['tokenized_titles'].to_list(),
    'image_tensors': filtered_tensors,
    'image_labels': tokenized_labels,
    'device': device,
    'vocab_size': vocab_size,
    'max_len': total_max,
    'clip_model': clip_model,
    'optimizer': 'adam',
    'learning_rate': 1e-3,
    'batch_size': 2,
    'instance_weight': 1,
    'sem_weight': 1,
    'itm_weight': 1
    # 'max_lengths': {
    #     'ingredient_tokens': max_length_ing,
    #     'instruction_tokens': max_length_inst,
    #     'title_tokens': max_length_title
    # }

}
image2recipe = Trainer(**kwargs)
image2recipe.train()


2.4.0
True
finished initializing
updated


  positives_mask = (labels == labels.T)


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27