In [None]:
# for loading google drive files
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [None]:
# for data cleaning
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.utils.data as Data
import matplotlib.pyplot as plt
import matplotlib.image as mpimg 
from sklearn.model_selection import train_test_split
import PIL
from PIL import Image
import queue
import torchvision.transforms.functional as TF
from torch.utils.data import IterableDataset
import random

CSV_PATH = '/content/drive/My Drive/ECE324 Project/data/images/usable_data.csv'
IMAGE_PATH = "/content/drive/My Drive/ECE324 Project/data/images/images/"

#torch.manual_seed(17)
#random.seed(1)

if torch.cuda.is_available():
  torch.set_default_tensor_type(torch.cuda.FloatTensor)

In [None]:
data = pd.read_csv(CSV_PATH)

# define transformation for loaded images
train_transform = transforms.Compose(
  [transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.03),
  transforms.RandomCrop(size=200, padding=4, padding_mode='edge'),
  transforms.RandomResizedCrop(size=224, scale=(0.9,1.0), ratio=(0.8,1.2)),
  transforms.ToTensor(),])   

tensor_transform = transforms.Compose(
    [transforms.ToTensor(),]
)

print(data)
code = data["code"]

                                   filtered_token  ...    code
0                      ['wheat', 'pita', 'bread']  ...   70006
1                      ['baked', 'mac', 'cheese']  ...   70012
2                            ['blonde', 'bobbie']  ...   70014
3                               ['egg', 'butter']  ...   70019
4                               ['beer', 'pizza']  ...   70032
...                                           ...  ...     ...
42850                          ['love', 'potion']  ...  269894
42851    ['pimiento', 'cheese', 'tomato', 'bite']  ...  269895
42852          ['puck', 'cheeseburger', 'slider']  ...  269896
42853  ['instant', 'pot', 'sweet', 'baby', 'rib']  ...  269898
42854      ['instant', 'pot', 'zuppa', 'toscana']  ...  269899

[42855 rows x 5 columns]


In [None]:
class Identity(nn.Module): #Change any of the 
  def __init__(self):
    super(Identity, self).__init__()

  def forward(self,x):
    return x

class MyDataset(IterableDataset):
    def __init__(self, image_queue):
      self.queue = image_queue

    def read_next_image(self):
        while self.queue.qsize() > 0:
            # you can add transform here
            yield self.queue.get()
        return None

    def __iter__(self):
        return self.read_next_image()

model = torchvision.models.vgg16(pretrained=True)
filepath = "/content/drive/My Drive/ECE324 Project/data/vgg_feature_vectors/"


for param in model.parameters():
  param.requires_grad = False
model.classifier = nn.Sequential(*[model.classifier[i] for i in range(2)]) 

if torch.cuda.is_available(): 
  model.cuda()

print(model)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


HBox(children=(FloatProgress(value=0.0, max=553433881.0), HTML(value='')))


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=

In [None]:
#some_tensor = torch.load(filepath+"vgg_70006.pt")
#print(some_tensor[5].dtype)
#print(some_tensor[0].shape)

In [None]:

# Number of augmented recipe images per recipe
num_variants = 5 # does not include original

step = data.shape[0]//100
recipes = []
# Set Up Iterator Dataset, since data has to be read in as batches for consistent dimensions

for batch in range(101):
  print("batch: ",batch, " out of 100")
  start = batch*step
  end = (batch+1)*step
  if batch == 100:
    end = data.shape[0]

  for i in range(start,end): #actual range data.shape[0]
    if i % 100 == 0:
      print(i)
    
    imagepath = IMAGE_PATH + str(code[i]) + ".jpg"
    image = Image.open(imagepath)
    vector_dict = {}

    raw_image = tensor_transform(image)
    raw_image = raw_image.unsqueeze(0)
    output = model(raw_image.cuda())
    vector_dict[0] = output

    for j in range(num_variants):
      img = train_transform(image)
      img = img.unsqueeze(0)
      output = model(img.cuda())
      vector_dict[j+1] = output

    #torch.save(vector_dict, filepath+"vgg_"+str(code[i])+".pt")


batch:  0  out of 100
0
100
200
300
400
batch:  1  out of 100
500
600
700
800
batch:  2  out of 100
900
1000
1100
1200
batch:  3  out of 100
1300
1400
1500
1600
1700
batch:  4  out of 100
1800
1900
2000
2100
batch:  5  out of 100
2200
2300
2400
2500
batch:  6  out of 100
2600
2700
2800
2900
batch:  7  out of 100
3000
3100
3200
3300
3400
batch:  8  out of 100
3500
3600
3700
3800
batch:  9  out of 100
3900
4000
4100
4200
batch:  10  out of 100
4300
4400
4500
4600
4700
batch:  11  out of 100
4800
4900
5000
5100
batch:  12  out of 100
5200
5300
5400
5500
batch:  13  out of 100
5600
5700
5800
5900
batch:  14  out of 100
6000
6100
6200
6300
6400
batch:  15  out of 100
6500
6600
6700
6800
batch:  16  out of 100
6900
7000
7100
7200
batch:  17  out of 100
7300
7400
7500
7600
7700
batch:  18  out of 100
7800
7900
8000
8100
batch:  19  out of 100
8200
8300
8400
8500
batch:  20  out of 100
8600
8700
8800
8900
batch:  21  out of 100
9000
9100
9200
9300
9400
batch:  22  out of 100
9500
9600
9700
980

In [None]:
drive.flush_and_unmount()