In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)
  

from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

/bin/bash: nvidia-smi: command not found
Your runtime has 1081.4 gigabytes of available RAM

You are using a high-RAM runtime!


In [None]:
#Importing the Required Libraries
from sklearn.datasets import load_files
from keras.utils import np_utils
import numpy as np
from glob import glob

def load_dataset(path):
  data = load_files(path)
  dog_files = np.array(data['filenames'])
  dog_targets = np_utils.to_categorical(np.array(data['target']), 120)
  return dog_files, dog_targets



2023-05-01 11:44:30.665142: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
#loading dataset
X, Y = load_dataset("images/Images") 

In [None]:
dog_names = np.zeros(120, np.dtype('U200'))
for i in range(len(X)):
    s = X[i]
    name = X[i].split("-", 1)[1]
    if "/" in name:
      name = name.split("/")[0]
    #name = X[i].split('-')[1]
    name_index = np.argmax(Y[i])
    dog_names[name_index] = name

In [None]:
import pandas as pd

def load_mixed_breed_dataset(image_path, percentages_path):
    images = load_files(image_path)
    percentages = pd.read_csv(percentages_path)
    dog_files = np.array(images['filenames'])
    image_cond = lambda x: np.all(x[-4:] == ".jpg")
    dog_files = dog_files[list(map(image_cond, dog_files))]
    #percentages.set_index(percentages.loc[:,"Image Name"])
    percentages = percentages.set_index("Image Name")
    get_name_f = lambda x: x.split("/")[-1][:-4]
    dog_image_names = np.array(list(map(get_name_f, dog_files)))
    dog_targets = percentages.loc[dog_image_names]
    dog_targets = dog_targets[dog_names]
    return dog_files, dog_targets

In [None]:
dog_names = list(dog_names)

In [None]:
# Training with mixedbreed dataset setting
# To be segmented into training, validation, and test sets
files, targets = load_mixed_breed_dataset("JP_Images_Single", "JP_Percentages_Normalized.csv")

# Kept as all files
allfiles, alltargets = load_mixed_breed_dataset("JP_Images_Single", "JP_Percentages_Normalized.csv")

# Training with purebreed dataset setting
#files, targets = X,Y

In [None]:
from sklearn.model_selection import train_test_split
files_train, files_test, targets_train, targets_test = train_test_split(files, targets, test_size=0.25, random_state=87)

files_train, files_val, targets_train, targets_val = train_test_split(files_train, targets_train, test_size=0.25, random_state=87) # 0.25 x 0.8 = 0.2

In [None]:
# print statistics about the dataset
print('There are %s total purebred dog images.\n' % str(len(files)))
print('There are %d training purebred dog images.' % len(files_train))
print('There are %d validation purebred dog images.' % len(files_val))
print('There are %d test purebred dog images.'% len(files_test))
print('There are total {0} dog breeds'.format(len(dog_names)))

There are 60 total purebred dog images.

There are 33 training purebred dog images.
There are 12 validation purebred dog images.
There are 15 test purebred dog images.
There are total 120 dog breeds


In [None]:
import matplotlib.pyplot as plt
import cv2 

In [None]:
#Visualize some training examples

def displayImage(img_path,ax):
  image = cv2.imread(img_path)
  ax.imshow(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))

#sample = list(np.random.choice(x_train.shape[0],8))
#fig = plt.figure(figsize=(20,10))
#for index,im in enumerate(sample):
#  ax = fig.add_subplot(3,4,index+1,xticks=[], yticks=[])
#  displayImage(x_train[im],ax)
#  ax.set_title(dog_names[int(np.argmax(y_train[im]))])

In [None]:
def createdatasets(dataset):
  data_set = np.zeros([dataset.shape[0],299,299,3])
  for i,image_arr in enumerate(dataset):
    img = cv2.imread(image_arr)
    res = cv2.resize(img, dsize=(299, 299), interpolation=cv2.INTER_CUBIC)
    data_set[i,:,:,:] = res
  return data_set


In [None]:
# importing Xception model using Keras. We have to remove the last layer and as we are removing the last layer
# we have to provide the Input tensor
from keras.applications.xception import Xception
from keras.layers import Input
newinput = Input(shape=(299,299,3))
model = Xception(include_top=False,input_tensor=newinput)
model.summary()

2023-05-01 11:45:57.674548: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-01 11:45:57.684037: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Model: "xception"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 299, 299, 3  0           []                               
                                )]                                                                
                                                                                                  
 block1_conv1 (Conv2D)          (None, 149, 149, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 block1_conv1_bn (BatchNormaliz  (None, 149, 149, 32  128        ['block1_conv1[0][0]']           
 ation)                         )                                                          

In [None]:
#Setting all the layers to non-trainable
for layer in model.layers:
  layer.trainable = False


In [None]:
import gc

In [None]:
from keras.applications.xception import preprocess_input
from keras.backend import expand_dims
import tensorflow as tf

In [None]:
import math

In [None]:
from keras.layers import Input,Conv2D,MaxPooling2D,Dense,Dropout,BatchNormalization
from keras.models import Sequential

In [None]:
#defining the new last layer of the model 
from keras.layers import GlobalAveragePooling2D

model1=Sequential()


model1.add(GlobalAveragePooling2D(input_shape=(10,10,2048)))


model1.add(Dense(120,activation='softmax'))



In [None]:
model1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 120)               245880    
                                                                 
Total params: 245,880
Trainable params: 245,880
Non-trainable params: 0
_________________________________________________________________


In [None]:
# bottleneck to reduce excess computation - pass all the images once and use that during training.

#bottleneck for segmented mixed breed dataset
bottle_neck_set_segmented = model.predict(preprocess_input(createdatasets(files)),verbose=1)

#bottleneck for complete mixed breed dataset
bottle_neck_set_complete = model.predict(preprocess_input(createdatasets(allfiles)),verbose=1)



In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd 

results = [[0 for j in range(3)] for i in range(10)]

print(bottle_neck_set_segmented.shape)
print(targets.shape)

#loop 10 times to get multiple results in order to calculate p-value
for x in range(10):

    bottle_neck_train_set_segmented, bottle_neck_test_set_segmented, targets_train, targets_test = train_test_split(bottle_neck_set_segmented, targets, test_size=0.25)#, random_state=87)

    bottle_neck_train_set_segmented, bottle_neck_val_set_segmented, targets_train, targets_val = train_test_split(bottle_neck_train_set_segmented, targets_train, test_size=0.25) #, random_state=87) # 0.25 x 0.8 = 0.2

    model1.compile(loss='categorical_crossentropy',optimizer='adam'
                     ,metrics=['accuracy'])

    
    # ************************************
    # LOADING PUREBRED MODEL 
    model1.load_weights("purebreed.best.hdf5")
    # ************************************
 

    from keras.callbacks import ModelCheckpoint
    # Comment this line out when looping to avoid 
    # loading model checkpoints that train on different train/test split
    chkp = ModelCheckpoint("purebreed.best.hdf5",verbose=1,  
                     save_best_only=True)
    model1.fit(bottle_neck_train_set_segmented,targets_train,
              batch_size=64,epochs=50,verbose=1,
             validation_data=(bottle_neck_val_set_segmented,targets_val),
               shuffle=True)

    _pred = []
    for i, el in enumerate(bottle_neck_test_set_segmented):
        el = el.reshape((1, *el.shape))
        _pred.extend(model1(el))
    _pred = np.array(_pred) 
    
    _pred_one_class = np.argmax(_pred, axis=1)
    original_pred_one_class = np.argmax(np.array(targets_test, dtype=float),axis=1)
    
    true=[]
    
    for index in range(len(_pred)):
        if(original_pred_one_class[index]==_pred_one_class[index]):
            true.append(1) #Appending one if the model got it right otherwise zero
        else: 
            true.append(0)
    print("Test Set Accuracy {}%".format((sum(true)/len(true))*100))
    
    
    # Purebred setting
    test_targets = alltargets
    test_set = bottle_neck_set_complete
    
    # Mixedbreed setting
    #test_targets = targets_test
    #test_set = bottle_neck_test_set_segmented
    
    _pred = []
    for i, el in enumerate(test_set):
        el = el.reshape((1, *el.shape))
        _pred.extend(model1(el))
    _pred = np.array(_pred) 

    original_pred = np.array(test_targets, dtype=float)
    _pred_one_class = np.argmax(_pred, axis=1)
    original_pred_one_class = np.argmax(np.array(test_targets, dtype=float),axis=1)
    _pred_five_class = np.argsort(_pred, axis=1)[:, -5:]
    original_pred_five_class = np.argsort(np.array(test_targets, dtype=float),axis=1)[:, -5:]

    true=[]
    
    #metric 1
    for index in range(len(_pred)):
        if(original_pred_one_class[index]==_pred_one_class[index]):
            true.append(1) #Appending one if the model got it right otherwise zero
        else: 
            true.append(0)
    print("Mixed Breed Test Set Accuracy {}%".format((sum(true)/len(true))*100))
    results[x][0] = sum(true)/len(true)*100

    true=[]

    #metric 2
    for index in range(len(_pred)):
        orig_prediction = original_pred_five_class[index]
        prediction = _pred_five_class[index]
        overlap = 0
        for j in range(len(prediction)):
            overlap += orig_prediction[j] in prediction
        true.append(overlap)

    print("Mixed Breed Test Set Top-5 Set Average {}".format((sum(true)/len(true))))
    results[x][1] = sum(true)/len(true)

    true=[]
    #metric 3
    for index in range(len(_pred)):
        distance = np.sum(np.abs(original_pred[index] - _pred[index]))
        true.append(distance)
    print("Mixed Breed Test Set Average Distance {}".format((sum(true)/len(true))))
    results[x][2] = sum(true)/len(true)
    
    
pd.DataFrame(results).to_csv("resultsMixedSingleDataset.csv")

(60, 10, 10, 2048)
(60, 120)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Set Accuracy 33.33333333333333%
Mixed Breed Test Set Accuracy 51.66666666666667%
Mixed Breed Test Set Top-5 Set Average 2.566666666666667
Mixed Breed Test Set Average Distance 0.9414169474208764
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/

In [None]:
#test your own image
def Breed(imagePath):
  figure = plt.figure(figsize=(20,10))
  gs = fig.add_gridspec(2, 3)
  image = cv2.imread(imagePath)
  ax = figure.add_subplot(gs[0,0])
  ax.imshow(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))
  res = cv2.resize(image,dsize=(299,299),interpolation=cv2.INTER_CUBIC)
  res1 = np.zeros([1,299,299,3])
  res1[0,:,:,:] = res
  print(res.shape)
  bottle_neck = model.predict(preprocess_input(res1),verbose=1)
  prediction = int(np.argmax(model1.predict(bottle_neck),axis=1))
  ax.set_title("I think it is "+dog_names[prediction])
  ax1 = figure.add_subplot(gs[1,:])
  #print(model1.predict(bottle_neck))
  mask = model1.predict(bottle_neck)[0] > 0.03
  predicted_dogs = np.array(dog_names)[mask]
  predictions = model1.predict(bottle_neck)[0][mask]
  ax1.bar(predicted_dogs, predictions)
