In [1]:
# for loading/processing the images  
from tensorflow.keras.preprocessing.image import load_img 
from tensorflow.keras.preprocessing.image import img_to_array 
from tensorflow.keras.applications.vgg16 import preprocess_input 

# models 
from tensorflow.keras.applications.vgg16 import VGG16 
from tensorflow.keras.models import Model

# clustering and dimension reduction
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# for everything else
import os
import numpy as np
import matplotlib.pyplot as plt
from random import randint
import pandas as pd
import pickle

In [46]:
path= '/Users/darialaslo/Documents/CBB/CBB SEM2/IML/GIT/task-4/food'
os.chdir(path)

# this list holds all the image filename
food = []

#add image files to the list
with os.scandir(path) as files:
  # loops through each file in the directory
    for file in files:
        if file.name.endswith('.jpg'):
          # adds only the image files to the flowers list
            food.append(file.name)
    


In [10]:
# load the image as a 224x224 array
img = load_img(food[0], target_size=(224,224))
# convert from 'PIL.Image.Image' to numpy array
img = np.array(img)

print(img.shape)


(224, 224, 3)


In [11]:
reshaped_img = img.reshape(1,224,224,3)
print(reshaped_img.shape)


(1, 224, 224, 3)


In [12]:
#model

# load model
model = VGG16()
# remove the output layer
model = Model(inputs=model.inputs, outputs=model.layers[-2].output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5


In [13]:
def extract_features(file, model):
    # load the image as a 224x224 array
    img = load_img(file, target_size=(224,224))
    # convert from 'PIL.Image.Image' to numpy array
    img = np.array(img) 
    # reshape the data for the model reshape(num_of_samples, dim 1, dim 2, channels)
    reshaped_img = img.reshape(1,224,224,3) 
    # prepare image for model
    imgx = preprocess_input(reshaped_img)
    # get the feature vector
    features = model.predict(imgx, use_multiprocessing=True)
    return features

In [47]:
len(food)

10000

In [48]:
#DO NOT RUN unless changes have been made (load pickle file: dict.pkl)

data={}

# lop through each image in the dataset
for i in range(0,len(food)):

    # try to extract the features and update the dictionary
    feat = extract_features(food[i],model)
    data[food[i]] = feat
        
        

In [49]:
#save dict as pickle file 
#features don't have to be extracted again, you can just load this as a pickle file
a_file = open("dict.pkl", "wb")
pickle.dump(data, a_file)
a_file.close()

In [50]:
# get a list of the filenames
filenames = np.array(list(data.keys()))

# get a list of just the features
feat = np.array(list(data.values()))
feat.shape


(10000, 1, 4096)

In [51]:
# reshape so that there are 210 samples of 4096 vectors
feat = feat.reshape(-1,4096)
feat.shape


(10000, 4096)

In [None]:
#function for getting anchor, pos and neg 
def get_image_path (file):
    
    anchor_list=[]
    positive_list=[]
    negative_list=[]
    for i in range (0,file.shape[0]):
        anchor_name= "{}.jpg".format(file[i,0])
        positive_name= "{}.jpg".format(file[i,1])
        negative_name= "{}.jpg".format(file[i,2])
        anchor_list.append(anchor_name)
        positive_list.append(positive_name)
        negative_list.append(negative_name)
    
    return anchor_list, positive_list, negative_list


In [None]:
#if using another neural net after extracting fetures, the opposite triplets and labels are needed

# use the get image name to know which one is which
train_file = open("./train_triplets.txt", "r")
train = np.loadtxt(train_file, dtype=str)

anchor_list1, positive_list1, negative_list1 = get_image_path (train)

#need to append the opposite order anchor, negative positive, that will have label 0
anchor_list2, nagtive_list2, positive_list2 = get_image_path (train)
#append to the previous
anchor_list1.append(anchor_list2)
positive_list1.append(positive_list2)
negative_list1.append(negative_list2)

#create the dataset by taking the feature vectors for each on the tree images in each triplet and adding it 
#for the train set it should be twice the size
#doing this in a function would be helpful so that we can use it for test 


#create labels for the train set


In [None]:
#copied from previous model
#dimensions/shapes need to be adjusted if it is to be used 

#define parameters
aminoacids=4
features=20
fnode_1=16
fnode_2=12
fnode_3=10

#cin = Input(batch_shape=(None,2))
pin = Input(batch_shape=(None, aminoacids, features))

#cout = cin
#pout = pin


layer_1 = Dense(fnode_1, kernel_initializer='he_uniform', activation='relu', kernel_regularizer=l2 (1.0e-4))(pin)
#out = pout

#out = concatenate([pout, cout])
layer_2 = Dense(fnode_2, kernel_initializer='he_uniform', activation='relu', kernel_regularizer=l2 (1.0e-4))(layer_1)
layer_3 = Dense(fnode_3, kernel_initializer='he_uniform', activation='relu', kernel_regularizer=l2 (1.0e-4))(layer_2)


out = Dropout(0.5)(layer_3)
out = Dense(1, kernel_initializer='he_uniform', activation='sigmoid')(out)

#adam=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.01)
    
model = Model(inputs=[pin], outputs=[out])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
#summarise the model
model.summary()