In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.autograd import Variable

import glob
import os
import matplotlib.pyplot as plt
import torchvision.models as models
import cv2
import pickle
import random

In [3]:
class Classifier(nn.Module):
    def __init__(self, input_size, hidden_layers, output_size):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_layers)
        self.fc2 = nn.Linear(hidden_layers, hidden_layers)
        self.fc3 = nn.Linear(hidden_layers, output_size)
    
    def forward(self,data):
        x = F.tanh(self.fc1(data))
        x = F.tanh(self.fc2(x))
        x = self.fc3(x)
        return F.softmax(x,dim = 1)

In [18]:
def label2onehot(label_id):
    vector = np.zeros(101)
    vector[label_id] = 1
    return vector

def onehot2label(vector):
    return np.argmax(vector)

In [4]:
import pickle
with open("vgg16_data.pkl","rb") as f:
    temp = pickle.load(f)

In [14]:
with open("training_data.pkl","rb") as f:
    training_data = pickle.load(f)
    #random.shuffle(training_data)
    
with open("test_data.pkl","rb") as f:
    test_data = pickle.load(f)

In [16]:
records = temp["records"]
net = temp["net"]
net.cuda()

Classifier(
  (fc1): Linear(in_features=4096, out_features=2048, bias=True)
  (fc2): Linear(in_features=2048, out_features=2048, bias=True)
  (fc3): Linear(in_features=2048, out_features=101, bias=True)
)

In [20]:
with open("./food-101/meta/train.txt","r") as f:
    training_list = f.readlines()
with open("./food-101/meta/test.txt","r") as f:
    test_list = f.readlines()
    
with open('./food-101/meta/labels.txt','r') as f:
    labels = f.readlines()
    
training_list = ["./food-101/images/"+ path[:-1] +".jpg" for path in training_list]
test_list = ["./food-101/images/"+ path[:-1] +".jpg" for path in test_list]
labels = [label[:-1].replace(" ",'_').lower() for label in labels]

In [21]:
def validate(test_data,net):
    correct = 0
    for label, feature in test_data:
        x = Variable(torch.FloatTensor(feature)).cuda()
        output = net(x)
        if onehot2label(output.cpu().data.numpy()) == label:
            correct += 1
    return correct / len(test_data)

In [64]:
test_data_dict = {}
for i in range(len(labels)):
    test_data_dict[i] = []

for x in test_data:
    test_data_dict[x[0]].append(x)

In [65]:
test_acc_of_class = {
    "food_name":[],
    "accuracy":[]
}
for i in range(len(labels)):
    test_acc_of_class["food_name"].append(labels[i])
    test_acc_of_class["accuracy"].append(validate(test_data_dict[i],net))

In [66]:
import seaborn as sns
import pandas as pd
sns.set_style("whitegrid")

In [67]:
pd_test_acc_of_class = pd.DataFrame.from_dict(test_acc_of_class)

In [68]:
pd_test_acc_of_class.sort_values(by = ["accuracy"],ascending=False)[["food_name","accuracy"]][:25]

Unnamed: 0,food_name,accuracy
33,edamame,0.944
63,macarons,0.832
32,dumplings,0.828
64,miso_soup,0.784
54,hot_and_sour_soup,0.784
70,pad_thai,0.752
51,guacamole,0.748
40,french_fries,0.744
86,sashimi,0.736
69,oysters,0.732


In [69]:
pd_test_acc_of_class.sort_values(by = ["accuracy"],ascending=False)[["food_name","accuracy"]][25:50]

Unnamed: 0,food_name,accuracy
52,gyoza,0.628
83,red_velvet_cake,0.624
41,french_onion_soup,0.62
27,creme_brulee,0.62
23,churros,0.616
7,bibimbap,0.608
55,hot_dog,0.604
29,cup_cakes,0.6
43,fried_calamari,0.596
78,poutine,0.592


In [70]:
pd_test_acc_of_class.sort_values(by = ["accuracy"],ascending=False)[["food_name","accuracy"]][50:75]

Unnamed: 0,food_name,accuracy
19,chicken_quesadilla,0.48
17,cheese_plate,0.48
20,chicken_wings,0.48
59,lasagna,0.468
21,chocolate_cake,0.468
3,beef_carpaccio,0.456
48,greek_salad,0.452
38,fish_and_chips,0.452
53,hamburger,0.444
85,samosa,0.44


In [71]:
pd_test_acc_of_class.sort_values(by = ["accuracy"],ascending=False)[["food_name","accuracy"]][75:]

Unnamed: 0,food_name,accuracy
50,grilled_salmon,0.368
67,omelette,0.368
47,gnocchi,0.36
36,falafel,0.348
39,foie_gras,0.34
26,crab_cakes,0.336
9,breakfast_burrito,0.328
96,tacos,0.324
18,chicken_curry,0.316
77,pork_chop,0.312


In [61]:
pd.DataFrame.from_dict(records[10])[-1:]

Unnamed: 0,test,train
99,0.82804,0.82662


In [63]:
records[5]

{'test': [0.5158811881188119,
  0.5815841584158415,
  0.6029306930693069,
  0.6145346534653465,
  0.6195247524752475,
  0.6286336633663366,
  0.6293465346534654,
  0.6280792079207921,
  0.6256237623762376,
  0.6285544554455446,
  0.6383366336633663,
  0.6413465346534654,
  0.6455841584158416,
  0.6527524752475248,
  0.6507722772277228,
  0.6570693069306931,
  0.6653069306930693,
  0.6683168316831684,
  0.6725148514851486,
  0.6752871287128713,
  0.6790099009900991,
  0.680990099009901,
  0.6854257425742575,
  0.6884752475247524,
  0.6939405940594059,
  0.6995643564356435,
  0.6991287128712871,
  0.7023762376237623,
  0.7053069306930693,
  0.7066930693069307,
  0.7094257425742574,
  0.7093069306930693,
  0.7075247524752475,
  0.707089108910891,
  0.706059405940594,
  0.7096633663366336,
  0.7072475247524752,
  0.7084356435643564,
  0.7104554455445544,
  0.7179405940594059,
  0.7220990099009901,
  0.7236039603960396,
  0.7243960396039604,
  0.7257425742574257,
  0.7266534653465346,
  0.7

In [74]:
def show_error_result(test_data, net,error_labels):
    error = 0
    for label, feature in test_data:
        x = Variable(torch.FloatTensor(feature)).cuda()
        output = net(x)
        predict_label_index = onehot2label(output.cpu().data.numpy())
        if predict_label_index != label:
#             print(predict_label_index)
            error += 1
            error_labels.append(predict_label_index)

In [102]:
# steak error
steak_error_labels = []
show_error_result(training_data_dict[93],net,steak_error_labels)
a = pd.Series([labels[lb] for lb in steak_error_labels]).value_counts().to_frame()
a.columns = ["Steak"]
a

Unnamed: 0,Steak
pork_chop,112
baby_back_ribs,94
filet_mignon,70
grilled_salmon,45
french_toast,41
foie_gras,21
crab_cakes,20
prime_rib,19
peking_duck,13
hamburger,13


In [103]:
# grilled_cheese_sandwich error
grilled_cheese_sandwich_error_labels = []
show_error_result(training_data_dict[49],net,grilled_cheese_sandwich_error_labels)
a = pd.Series([labels[lb] for lb in grilled_cheese_sandwich_error_labels]).value_counts().to_frame()
a.columns = ["Grilled_cheese_sandwich"]
a

Unnamed: 0,Grilled_cheese_sandwich
garlic_bread,62
pulled_pork_sandwich,59
club_sandwich,56
hamburger,48
french_toast,44
grilled_salmon,39
gyoza,32
fish_and_chips,28
omelette,21
hot_dog,20


In [104]:
#cheesecake error
cheesecake_error_labels = []
show_error_result(training_data_dict[16],net,cheesecake_error_labels)
a = pd.Series([labels[lb] for lb in cheesecake_error_labels]).value_counts().to_frame()
a.columns = ["Cheesecake"]
a

Unnamed: 0,Cheesecake
tiramisu,85
chocolate_cake,71
panna_cotta,64
strawberry_shortcake,56
red_velvet_cake,52
foie_gras,40
carrot_cake,32
donuts,24
chocolate_mousse,23
waffles,22


In [107]:
#bruschetta error
bruschetta_error_labels = []
show_error_result(training_data_dict[10],net,bruschetta_error_labels)
a = pd.Series([labels[lb] for lb in bruschetta_error_labels]).value_counts().to_frame()
a.columns = ["Bruschetta"]
a

Unnamed: 0,Bruschetta
lobster_roll_sandwich,61
beef_carpaccio,39
caprese_salad,38
sushi,27
takoyaki,27
tuna_tartare,26
ceviche,26
pizza,25
tacos,21
grilled_salmon,18


In [109]:
#tuna_tartare error
tuna_tartare_error_labels = []
show_error_result(training_data_dict[99],net,tuna_tartare_error_labels)
a = pd.Series([labels[lb] for lb in tuna_tartare_error_labels]).value_counts().to_frame()
a.columns = ["Tuna_tartare"]
a

Unnamed: 0,Tuna_tartare
ceviche,18
beef_tartare,18
panna_cotta,13
foie_gras,12
guacamole,12
crab_cakes,12
bread_pudding,9
beet_salad,8
sashimi,8
gyoza,7
