# Food Classification
## 1. Data Preprocessing

This dataset is collected from Internet. It has in total 11 classes: Bread, Dairy product, Dessert, Egg, Fried food, Meat, Noodles/Pasta, Rice, Seafood, Soup, and Vegetable/Fruit. There are 9866 images in Training set, 3430 images in Validation set and 3347 images in Testing set.

In [1]:
import cv2
import os
import numpy as np

In [2]:
def read_file(path,label_print): 
    #label_print is a boolean_value, if True, return label value
    img_dir=sorted(os.listdir(path))
    x=np.zeros((len(img_dir),64,64,3),dtype=np.uint8)
    y=np.zeros((len(img_dir)),dtype=np.uint8)
    for i,file in enumerate(img_dir):
        img=cv2.imread(os.path.join(path,file))
        x[i,:,:]=cv2.resize(img,(64,64))
        if label_print:
            y[i]=int(file.split('_')[0])
    if label_print:
        return x,y
    else:
        return x

In [3]:
base_path='/kaggle/input/food11'
X_train,y_train=read_file(os.path.join(base_path,'training'),True)
X_val,y_val=read_file(os.path.join(base_path,'validation'),True)
X_test=read_file(os.path.join(base_path,'evaluation'),False)

## 2. Data Loader
Using PyTorch to load dataset

In [4]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader,Dataset

In [5]:
class Food(Dataset):
    def __init__(self,x,y=None,transform=None):
        self.x=x
        self.y=y
        if y is not None:
            self.y=torch.LongTensor(y)
        self.transform=transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self,index):
        X=self.x[index]
        if self.transform is not None:
            X=self.transform(X)
        if self.y is not None:
            Y=self.y[index]
            return X,Y
        else:
            return X
    

In [6]:
train_transform=transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5],std=[0.2,0.2,0.2])
])
val_transform=transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5],std=[0.2,0.2,0.2])
])
training_set=Food(X_train,y_train,train_transform)
val_set=Food(X_val,y_val,val_transform)

In [7]:
training_loader=DataLoader(training_set,batch_size=64,shuffle=True)
val_loader=DataLoader(val_set,batch_size=64,shuffle=False)

## 3. Define VGG16 Model

In [8]:
import torch.nn as nn
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16,self).__init__()
        
        self.layer1=nn.Sequential(
        nn.Conv2d(in_channels=3,out_channels=64,kernel_size=3,padding='same'),
        nn.BatchNorm2d(64),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3,padding='same'),
        nn.BatchNorm2d(64),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(2,2) #[64,16,16]
        )
        self.layer2=nn.Sequential(
        nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,padding='same'),
        nn.BatchNorm2d(128),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=128,out_channels=128,kernel_size=3,padding='same'),
        nn.BatchNorm2d(128),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(2,2)) #[128,8,8]
        
        self.layer3=nn.Sequential(
        nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3,padding='same'),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,padding='same'),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,padding='same'),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(2,2)) #[256,4,4]
        
        self.layer4=nn.Sequential(
        nn.Conv2d(in_channels=256,out_channels=512,kernel_size=3,padding='same'),
        nn.BatchNorm2d(512),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,padding='same'),
        nn.BatchNorm2d(512),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,padding='same'),
        nn.BatchNorm2d(512),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(2,2)) #[512,2,2]
        
        self.layer5=nn.Sequential(
        nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,padding='same'),
        nn.BatchNorm2d(512),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,padding='same'),
        nn.BatchNorm2d(512),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,padding='same'),
        nn.BatchNorm2d(512),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(2,2)) #[512,1,1]
        
        self.conv=nn.Sequential(
        self.layer1,
        self.layer2,
        self.layer3,
        self.layer4,
        self.layer5)
        
        self.fc=nn.Sequential(
        nn.Linear(512*2*2,1024),
        nn.ReLU(inplace=True),
        #nn.Dropout(),
        nn.Linear(1024,512),
        nn.ReLU(inplace=True),
        #nn.Dropout(),
        nn.Linear(512,11))
        
    def forward(self,x):
        x=self.conv(x)
        #print(x.shape)
        x=x.view(x.size()[0],-1)
        #print(x.shape)
        x=self.fc(x)
        
        return x
    

## Define Lenet-5 Network

In [9]:
class Lenet(nn.Module):
    def __init__(self):
        super(Lenet,self).__init__()
        #[32,32,3]
        self.layer1=nn.Sequential(
        nn.Conv2d(in_channels=3,out_channels=6,kernel_size=5,padding=2),
        nn.BatchNorm2d(6),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(2,2)) #[6,16,16]
        
        self.layer2=nn.Sequential(
        nn.Conv2d(in_channels=6,out_channels=16,kernel_size=5),
        nn.BatchNorm2d(16),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(2,2)) #[16,6,6]
        
        self.conv_layer=nn.Sequential(
        self.layer1,
        self.layer2)
        
        self.fc=nn.Sequential(
        nn.Linear(16*14*14,120),
        nn.ReLU(inplace=True),
        nn.Linear(120,84),
        nn.ReLU(inplace=True),
        nn.Linear(84,11)
        )
        
    def forward(self,x):
        x=self.conv_layer(x)
        x=x.view(x.size()[0],-1)
        x=self.fc(x)
        return x

In [10]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        #torch.nn.MaxPool2d(kernel_size, stride, padding)
        #input 維度 [3, 64,64]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 64,64]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 32,32]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 32,32]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 16,16]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 16,16
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 8,8]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 8,8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4,4]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 4,4]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 2,2]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*2*2, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

In [11]:
# if GPU avaliable
device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [12]:
model=Classifier()
model=model.to(device)

## 4. Train process

In [13]:
cri=torch.nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(),lr=0.001)

In [14]:
len(training_set)

9866

In [15]:

for epoch in range(50):
    train_acc=0.0
    train_loss=0.0
    val_acc=0.0
    val_loss=0.0
    model.train() #open dropout layers
    for i,data in enumerate(training_loader):
        optimizer.zero_grad()
        inputs,labels=data
        inputs = inputs.to(device)
        labels = labels.to(device)
        train_pred=model(inputs)
        batch_loss=cri(train_pred,labels)
        batch_loss.backward()
        optimizer.step()
        #train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_acc+=np.sum(np.argmax(train_pred.cpu().data.numpy(),axis=1)==labels.cpu().numpy())
        train_loss+=batch_loss.item()
    #if epoch%9==0:
    print('epoch:',epoch+1,'train Accuracy:{:.4f} train loss:{:.4f}'.format(train_acc/len(traing_loader),train_loss/len(training_loader)))
    #print('finish train!')
    
    model.eval()
    with torch.no_grad():
        for i,data in enumerate(val_loader):
            inputs,labels=data
            inputs = inputs.to(device)
            labels = labels.to(device)
            val_pred=model(inputs)
            batch_loss=cri(val_pred,labels)
        #train_acc += np.sum(np.argmax(train_pred.data.numpy(), axis=1) == data[1].numpy())
            val_acc+=np.sum(np.argmax(val_pred.cpu().data.numpy(),axis=1)==labels.cpu().numpy())
            val_loss+=batch_loss.item()
    #if epoch%9==0:
        print('epoch:',epoch+1,'val Accuracy:{:.4f} val loss:{:.4f}'.format(val_acc/len(val_loader),val_loss/len(val_loader)))


epoch: 1 train Accuracy:0.2823 train loss:0.0316
epoch: 1 val Accuracy:0.3108 val loss:0.0304
epoch: 2 train Accuracy:0.3869 train loss:0.0273
epoch: 2 val Accuracy:0.3965 val loss:0.0281
epoch: 3 train Accuracy:0.4572 train loss:0.0245
epoch: 3 val Accuracy:0.4484 val loss:0.0255
epoch: 4 train Accuracy:0.4989 train loss:0.0226
epoch: 4 val Accuracy:0.4329 val loss:0.0267
epoch: 5 train Accuracy:0.5359 train loss:0.0210
epoch: 5 val Accuracy:0.5012 val loss:0.0234
epoch: 6 train Accuracy:0.5767 train loss:0.0193
epoch: 6 val Accuracy:0.5370 val loss:0.0220
epoch: 7 train Accuracy:0.5956 train loss:0.0185
epoch: 7 val Accuracy:0.5913 val loss:0.0198
epoch: 8 train Accuracy:0.6261 train loss:0.0169
epoch: 8 val Accuracy:0.5061 val loss:0.0242
epoch: 9 train Accuracy:0.6416 train loss:0.0162
epoch: 9 val Accuracy:0.5350 val loss:0.0224
epoch: 10 train Accuracy:0.6721 train loss:0.0148
epoch: 10 val Accuracy:0.5810 val loss:0.0203
epoch: 11 train Accuracy:0.6882 train loss:0.0142
epoch: 1

In [16]:
# concat train set and val set to increase the dataset and get a better performance
train_val_x = np.concatenate((X_train, X_val), axis=0)
train_val_y = np.concatenate((y_train, y_val), axis=0)
train_val_set = Food(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=32, shuffle=True)

In [17]:
model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) 


for epoch in range(50):
    train_acc=0.0
    train_loss=0.0
    model_best.train()
    for i,data in enumerate(train_val_loader):
        optimizer.zero_grad()
        inputs,labels=data
        inputs=inputs.to(device)
        labels=labels.to(device)
        train_pred=model_best(inputs)
        batch_loss=cri(train_pred,labels)
        batch_loss.backward()
        optimizer.step()

        train_acc+=np.sum(np.argmax(train_pred.cpu().data.numpy(),axis=1)==labels.cpu().numpy())
        train_loss+=batch_loss.item()

  #if epoch%9==0:
    print('epoch:',epoch+1,'train Accuracy:{:.4f} train loss:{:.4f}'.format(train_acc/len(train_val_loader),train_loss/len(train_val_loader)))


epoch: 1 train Accuracy:0.3160 train loss:0.0614
epoch: 2 train Accuracy:0.4058 train loss:0.0527
epoch: 3 train Accuracy:0.4768 train loss:0.0472
epoch: 4 train Accuracy:0.5233 train loss:0.0431
epoch: 5 train Accuracy:0.5688 train loss:0.0392
epoch: 6 train Accuracy:0.5967 train loss:0.0364
epoch: 7 train Accuracy:0.6304 train loss:0.0334
epoch: 8 train Accuracy:0.6532 train loss:0.0315
epoch: 9 train Accuracy:0.6755 train loss:0.0293
epoch: 10 train Accuracy:0.7019 train loss:0.0271
epoch: 11 train Accuracy:0.7170 train loss:0.0260
epoch: 12 train Accuracy:0.7311 train loss:0.0242
epoch: 13 train Accuracy:0.7519 train loss:0.0224
epoch: 14 train Accuracy:0.7715 train loss:0.0209
epoch: 15 train Accuracy:0.7917 train loss:0.0193
epoch: 16 train Accuracy:0.8034 train loss:0.0180
epoch: 17 train Accuracy:0.8221 train loss:0.0162
epoch: 18 train Accuracy:0.8315 train loss:0.0154
epoch: 19 train Accuracy:0.8505 train loss:0.0137
epoch: 20 train Accuracy:0.8594 train loss:0.0129
epoch: 21

## 5. Test process

In [18]:
test_set=Food(X_test,y=None,transform=val_transform)
test_loader=DataLoader(test_set,batch_size=32,shuffle=False)

In [19]:
model_best.eval()
pred=[]
with torch.no_grad():
    for data in test_loader:
        inputs=data.to(device)
        test_pred=model_best(inputs)
        test_label=np.argmax(test_pred.cpu().data.numpy(),axis=1)

        for y in test_label:

            pred.append(y)

In [20]:
# save resulted label to csv file

with open("./predict.csv", 'w') as f:
    f.write('Id,Category\n')
    for i, y in  enumerate(pred):
        f.write('{},{}\n'.format(i, y))

In [1]:
import torch

In [8]:
a=torch.tensor([[1,2,3]])
a.size(0)

1

In [9]:
a.squeeze().size(0)

3