In [0]:
%matplotlib inline

In [0]:
# License: BSD
# Author: Sasank Chilamkurthy

from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
import numpy as np
import pandas as pd
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
from google.colab import drive
import time
import os
import copy
import random
from functools import reduce
from skimage import io, transform
from sklearn.preprocessing import MinMaxScaler

plt.ion()   # interactive mode

In [129]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [0]:
root_folder = 'drive/My Drive/computer-vision/project'

In [0]:
flatten = lambda l: [item for sublist in l for item in sublist]
def clear_non_ascii(word):
    res = ''
    for letter in word:
        if (ord(letter) < 128):
            res += letter
    return res

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, ingredients, nutritions = sample['image'], sample['ingredients'], sample['nutritions']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))
        return {'image': torch.from_numpy(image),
                'ingredients': torch.from_numpy(ingredients),
                'nutritions': torch.from_numpy(nutritions)}

class FoodDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = None
        self.metadata = None
        self.ingredients = None
        self.prepare_metadata()

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        row = self.images.iloc[idx]
        im_name, im_class = row['image_name'], row['class']
        image = io.imread(os.path.join(self.root_dir, 'downloads', im_class, im_name))
        rows = self.metadata[self.metadata['name'] == im_class.strip()]
        row = rows.iloc[random.choice(list(range(len(rows))))].to_dict() # random.choice is for getting a random row if there are more than 1 recipes sharing one name
        ingredients = self.ingredient_to_sparse(row['ingredients'])
        del row['image']
        del row['ingredients']
        del row['name']
        nutritions = self.nutritions_to_array(row)
        sample = {
            'image': image,
            'nutritions': nutritions,
            'ingredients': ingredients
        }
        if self.transform:
            sample = self.transform(sample)
        return sample
    
    def ingredient_to_sparse(self, target_ingredients):
        zero_array = np.zeros(len(self.ingredients), dtype=int)
        ingredient_locations = map(lambda string: self.ingredients.index(string), target_ingredients)
        for loc in list(ingredient_locations):
            zero_array[loc] = 1
        return zero_array
    
    def nutritions_to_array(self, row):
        calories, carbs, fat, protein = row['calories'], row['carbs'], row['fat'], row['protein']
        as_array = np.array([calories, carbs, fat, protein])
        return as_array
      
    def prepare_metadata(self):
        food = pd.read_csv(os.path.join(self.root_dir, 'data1.csv'))
        food['ingredients'] = food['ingredients'].apply(lambda cell: cell.split('|'))
        food['name'] = food['name'].apply(lambda cell: cell.replace('\"', '')) # Some cells contain " which causes problems with the file system
        food['name'] = food['name'].apply(lambda cell: cell.replace('/', '_')) # Some cells contain / which causes problems with the file system
        food['name'] = food['name'].apply(lambda cell: cell.replace('  ', ' ')) # Some cells has 2 spaces instead 1 one
        food['name'] = food['name'].apply(lambda cell: clear_non_ascii(cell))
        
        scaler = MinMaxScaler()
        food[['calories', 'fat', 'carbs', 'protein']] = scaler.fit_transform(food[['calories', 'fat', 'carbs', 'protein']])
        self.scaler = scaler
        
        ingredients = list(set(flatten(list(food['ingredients']))))
        self.metadata = food
        self.ingredients = sorted(ingredients)

        rows = []
        
        for name in os.listdir(os.path.join(self.root_dir, 'downloads')):
            row = food[food['name']==name.strip()]
            if not len(row): continue
            images = os.listdir(os.path.join(self.root_dir, 'downloads', name))
            new_row = list(map(lambda im: [im, name], images))
            rows.extend(new_row)

        df = pd.DataFrame(rows, columns=['image_name', 'class'])
        self.images = df

In [0]:
image_dataset = FoodDataset(root_folder, transform=transforms.Compose([
    ToTensor()
]))

In [174]:
image_dataset[1]

{'image': tensor([[[  8,   0,  28,  ..., 231, 230, 230],
          [ 27,  22,  16,  ..., 231, 230, 230],
          [ 47,  49,  16,  ..., 231, 230, 230],
          ...,
          [ 59,  31,  10,  ...,  24,  26,  24],
          [  1,   8,  47,  ...,  34,  46,  13],
          [ 10,   2, 101,  ...,  70, 118,  80]],
 
         [[ 20,   8,  40,  ..., 231, 230, 230],
          [ 41,  36,  30,  ..., 231, 230, 230],
          [ 62,  64,  31,  ..., 231, 230, 230],
          ...,
          [ 46,  20,   0,  ...,  19,  21,  19],
          [  0,   3,  42,  ...,  29,  41,   8],
          [  7,   0,  99,  ...,  65, 113,  75]],
 
         [[ 46,  32,  64,  ..., 229, 228, 228],
          [ 68,  62,  56,  ..., 229, 228, 228],
          [ 91,  93,  60,  ..., 229, 228, 228],
          ...,
          [ 64,  37,  16,  ...,  39,  41,  39],
          [ 19,  26,  64,  ...,  51,  63,  30],
          [ 34,  26, 123,  ...,  87, 135,  98]]], dtype=torch.uint8),
 'ingredients': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [0]:
image_dataset = FoodDataset(root_folder)