In [12]:
# read txt file
with open(r'/content/dish_metadata_cafe1.csv', 'r') as f:
    data1 = f.read()

with open(r'/content/dish_metadata_cafe2.csv', 'r') as f:
    data2 = f.read()

import pandas as pd
nutrition1 = pd.DataFrame({})
nutrition1['ingredients'] = ''
for idx, dish in enumerate(data2.split('\n')[:-1]):
    lista = dish.split(',')
    # concat row in df
    nutrition1 = pd.concat([nutrition1, pd.DataFrame([lista[0:6]])], axis=0, ignore_index = True)
    diz  = {}
    for idx2, ingredient in enumerate(lista[6:]):
        if ingredient[0:4] == 'ingr':
            diz[ingredient] = lista[6:][idx2+1:idx2+7]
        else:
            pass
    # add diz to df
    nutrition1['ingredients'][idx] = diz


nutrition2 = pd.DataFrame({})
nutrition2['ingredients'] = ''
for idx, dish in enumerate(data1.split('\n')[:-1]):   # ultima riga è vuota
    lista = dish.split(',')
    # concat row in df: dish_id, total_calories, total_mass, total_fat, total_carb, total_protein
    nutrition2 = pd.concat([nutrition2, pd.DataFrame([lista[0:6]])], axis=0, ignore_index = True)
    diz  = {}
    for idx2, ingredient in enumerate(lista[6:]):
        if ingredient[0:4] == 'ingr':
            diz[ingredient] = lista[6:][idx2+1:idx2+7]
        else:
            pass
    # add diz to df: (ingr_1_id, ingr_1_name, ingr_1_grams, ingr_1_calories, ingr_1_fat, ingr_1_carb, ingr_1_protein
    nutrition2['ingredients'][idx] = diz



# concat nutrition1 and nutrition2
nutrition = pd.concat([nutrition1, nutrition2], axis=0, ignore_index = True)
# rename the first 6 columns
nutrition.columns = ['dish_id', 'total_calories', 'total_mass', 'total_fat', 'total_carb', 'total_protein', 'ingredients']


# convert to int from second column to the end
nutrition.iloc[:, 1:-1] = nutrition.iloc[:, 1:-1].astype(float).astype(int)
nutrition.dtypes

dish_id           object
total_calories     int64
total_mass         int64
total_fat          int64
total_carb         int64
total_protein      int64
ingredients       object
dtype: object

In [13]:
nutrition

Unnamed: 0,dish_id,total_calories,total_mass,total_fat,total_carb,total_protein,ingredients
0,dish_1572974428,0,74,0,0,0,"{'ingr_0000000032': ['tomatoes', '15.572240', ..."
1,dish_1572464692,0,56,0,0,0,"{'ingr_0000000442': ['hummus', '56.000000', '9..."
2,dish_1571931594,0,66,0,0,0,"{'ingr_0000000433': ['roasted potatoes', '66.0..."
3,dish_1575478635,0,132,0,0,0,"{'ingr_0000000008': ['scrambled eggs', '132.00..."
4,dish_1572887017,0,274,0,0,0,"{'ingr_0000000015': ['hash browns', '43.000000..."
...,...,...,...,...,...,...,...
5001,dish_1551139192,31,63,0,8,0,"{'ingr_0000000038': ['pineapple', '63.000000',..."
5002,dish_1563382911,182,123,13,1,12,"{'ingr_0000000008': ['scrambled eggs', '123.00..."
5003,dish_1562691737,485,700,11,70,27,"{'ingr_0000000543': ['chia seeds', '9.725404',..."
5004,dish_1558458496,53,77,0,13,0,"{'ingr_0000000440': ['grapes', '77.000000', '5..."


In [14]:
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import Dataset
import numpy as np
import pickle
import os
import random
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import pairwise_distances
from collections import defaultdict
import shutil
import torchvision.models as models
from collections import OrderedDict




class Nutrition5k(Dataset):
    def __init__(self, image_paths, nutrition5k_df):
        self.transform = transforms.Compose([transforms.Resize((512, 512)), transforms.ToTensor()])#, transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
        self.image_paths = image_paths   # embedding images folder
        self.images = []
        self.kcal = []
        self.carb = []
        self.protein = []
        self.fat = []
        self.mass = []       
        self.ingredients = []   # how many ingredients in the recipe

        for path in image_paths:     # random shuffle the images
            filename = path[0:15]
            nutritional_info = nutrition5k_df[nutrition5k_df['dish_id'] == filename]
            self.images.append(os.path.join(image_path,filename + ".jpeg"))
            self.kcal.append(nutritional_info['total_calories'].values[0])
            self.carb.append(nutritional_info['total_carb'].values[0])
            self.protein.append(nutritional_info['total_protein'].values[0])
            self.fat.append(nutritional_info['total_fat'].values[0])
            self.mass.append(nutritional_info['total_mass'].values[0])
            self.ingredients.append(len(nutritional_info['ingredients'].values[0])-1)
            
                

    def __len__(self):
         return len(self.images)

    def __getitem__(self, index):
            img = Image.open(self.images[index]).convert('RGB')
            img = self.transform(img)
          
            kcal = self.kcal[index]
            carb = self.carb[index]
            protein = self.protein[index]
            fat = self.fat[index]
            mass = self.mass[index]
            ingredients = self.ingredients[index]

            # get image name from filename
            #image_name = self.images[index].split('\\')[-1]
            sample = {"image": img ,'calories':kcal, 'carb': carb, 'protein': protein, 'fat':fat, 'mass':mass, 'ingredients':ingredients}
            
            return sample




class HydraNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = models.resnet18(pretrained=True)
        self.n_features = self.net.fc.in_features
        self.net.fc = nn.Identity()

        self.net.fc1 = nn.Sequential(OrderedDict(
            [('linear', nn.Linear(self.n_features,self.n_features)),
            ('relu1', nn.ReLU()),
            ('final', nn.Linear(self.n_features, 1))]))

        self.net.fc2 = nn.Sequential(OrderedDict(
            [('linear', nn.Linear(self.n_features,self.n_features)),
            ('relu1', nn.ReLU()),
            ('final', nn.Linear(self.n_features, 1))]))

        self.net.fc3 = nn.Sequential(OrderedDict(
            [('linear', nn.Linear(self.n_features,self.n_features)),
            ('relu1', nn.ReLU()),
            ('final', nn.Linear(self.n_features, 1))]))
        
        self.net.fc4 = nn.Sequential(OrderedDict(
            [('linear', nn.Linear(self.n_features,self.n_features)),
            ('relu1', nn.ReLU()),
            ('final', nn.Linear(self.n_features, 1))]))

        self.net.fc5 = nn.Sequential(OrderedDict(
            [('linear', nn.Linear(self.n_features,self.n_features)),
            ('relu1', nn.ReLU()),
            ('final', nn.Linear(self.n_features, 1))]))

        self.net.fc6 = nn.Sequential(OrderedDict(
            [('linear', nn.Linear(self.n_features,self.n_features)),
            ('relu1', nn.ReLU()),
            ('final', nn.Linear(self.n_features, 34))]))
        
    def forward(self, x):
        kcal_head = self.net.fc1(self.net(x))
        carbo_head = self.net.fc2(self.net(x))
        protein_head = self.net.fc3(self.net(x))
        fat_head = self.net.fc4(self.net(x))
        mass_head = self.net.fc5(self.net(x))
        num_ingr_head = self.net.fc6(self.net(x))

        return kcal_head, carbo_head, protein_head, fat_head, mass_head, num_ingr_head




class HydraTraining(object):

    def __init__(self, model, n_epochs, train_loader, val_loader, optimizer, criterion, device,
                    num_ingr_loss, kcal_loss, carbo_loss, protein_loss, fat_loss, mass_loss):
        self.model = model
        self.n_epochs = n_epochs
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.optimizer = optimizer
        self.criterion = criterion
        self.device = device
        self.num_ingr_loss = num_ingr_loss
        self.kcal_loss = kcal_loss
        self.carbo_loss = carbo_loss
        self.protein_loss = protein_loss
        self.fat_loss = fat_loss
        self.mass_loss = mass_loss

        self.accuracy_train = []
        self.loss_train = []





    def train(self):
        
        for epoch in range(self.n_epochs):
            self.model.train()
            total_training_loss = 0
            correct_prediction = 0
            total_prediction = 0
        
            for i, data in enumerate(tqdm(self.train_loader)):
                inputs = data["image"]#.to(device=device)
                
                kcal_label = torch.tensor(data["calories"])#.to(device=device)
                carbo_label = data["carb"]#.to(device=device)
                protein_label = data["protein"]#.to(device=device)
                fat_label = data["fat"]
                mass_label = data["mass"]
                num_ingr_label = data["ingredients"]
                
                self.optimizer.zero_grad()
                kcal_output, carbo_output, protein_output, fat_output, mass_output, num_ingr_output = self.model(inputs)
                print(kcal_label, kcal_output)
                loss_1 = self.kcal_loss(kcal_output, kcal_label.unsqueeze(1).float())
                loss_2 = self.carbo_loss(carbo_output, carbo_label.unsqueeze(1).float())
                loss_3 = self.protein_loss(protein_output, protein_label.unsqueeze(1).float())
                loss_4 = self.fat_loss(fat_output, fat_label.unsqueeze(1).float())
                loss_5 = self.mass_loss(mass_output, mass_label.unsqueeze(1).float())
                loss_6 = self.num_ingr_loss(num_ingr_output, num_ingr_label)
                
                loss = loss_1 + loss_2 + loss_3 + loss_4 + loss_5 + loss_6
                loss.backward()
                self.optimizer.step()
                
                total_training_loss += loss

                # accuracy
                _, prediction = torch.max(kcal_output.data, 1)
                total_prediction += prediction.shape[0]
                correct_prediction += (prediction == kcal_label).sum().item()

            # Print stats at the end of the epoch
            num_batches = i
            avg_loss = total_training_loss / num_batches
            acc = correct_prediction/total_prediction

            self.accuracy_train.append(acc)
            self.loss_train.append(avg_loss)

            print('Epoch:', epoch+1, 'Loss:', f'{avg_loss:.2f}', 'Accuracy:', acc)

        return self.accuracy_train, self.loss_train

In [15]:
import torch
from torch.utils.data import DataLoader
import random
image_path = "/content/drive/MyDrive/embedding_images"

In [16]:
import random
def train_test_split(image_path):
    image_list = os.listdir(image_path)
    random.shuffle(image_list)
    train_data = DataLoader(Nutrition5k(image_list[0:int(len(image_list)*3/4)], nutrition), shuffle=True, batch_size=16)
    test_data = DataLoader(Nutrition5k(image_list[int(len(image_list)*3/4):], nutrition), shuffle=False, batch_size=16)
    return train_data, test_data

train_data, test_data = train_test_split(image_path)

In [17]:
model = HydraNet()

num_ingr_loss = nn.CrossEntropyLoss()
kcal_loss = nn.L1Loss()
carbo_loss = nn.L1Loss()
protein_loss = nn.L1Loss()
fat_loss = nn.L1Loss()
mass_loss = nn.L1Loss()

sig = nn.Sigmoid()

optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.09)

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


# 6 ore per un epoca ... 24 ore per 4 epoche

In [18]:
accuracy_train = []
loss_train = []

for epoch in range(5):
            model.train()
            total_training_loss = 0
            correct_prediction = 0
            total_prediction = 0
        
            for i, data in enumerate(tqdm(train_data)):
                inputs = data["image"]#.to(device=device)
                #print(data["calories"])
                kcal_label = torch.tensor(data["calories"])#.to(device=device)
                carbo_label = data["carb"]#.to(device=device)
                protein_label = data["protein"]#.to(device=device)
                fat_label = data["fat"]
                mass_label = data["mass"]
                num_ingr_label = data["ingredients"]
                #print(inputs, kcal_label, carbo_label, protein_label, fat_label, mass_label, num_ingr_label)
                optimizer.zero_grad()
                kcal_output, carbo_output, protein_output, fat_output, mass_output, num_ingr_output = model(inputs)
                #print(kcal_output, kcal_label)
                loss_1 = kcal_loss(kcal_output, kcal_label.unsqueeze(1).float())
                loss_2 = carbo_loss(carbo_output, carbo_label.unsqueeze(1).float())
                loss_3 = protein_loss(protein_output, protein_label.unsqueeze(1).float())
                loss_4 = fat_loss(fat_output, fat_label.unsqueeze(1).float())
                loss_5 = mass_loss(mass_output, mass_label.unsqueeze(1).float())
                #print( 'LABELS', num_ingr_label.shape, num_ingr_label, 'PREDICTED', num_ingr_output.shape)
                loss_6 = num_ingr_loss(num_ingr_output, num_ingr_label)
                
                loss = loss_1 + loss_2 + loss_3 + loss_4 + loss_5 + loss_6
                loss.backward()
                optimizer.step()
                
                total_training_loss += loss

                # accuracy
                _, prediction = torch.max(kcal_output.data, 1)
                total_prediction += prediction.shape[0]
                correct_prediction += (prediction == kcal_label).sum().item()

            # Print stats at the end of the epoch
            num_batches = i
            avg_loss = total_training_loss / num_batches
            acc = correct_prediction/total_prediction

            accuracy_train.append(acc)
            loss_train.append(avg_loss)

            print('Epoch:', epoch+1, 'Loss:', f'{avg_loss:.2f}', 'Accuracy:', acc)

  del sys.path[0]
  2%|▏         | 5/225 [09:22<6:52:50, 112.59s/it]


KeyboardInterrupt: ignored