The dataset Tiny ImageNet has to be in the 'data/' folder

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
from tensorflow.python.client import device_lib
def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]
print(get_available_devices()) 

import os
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

['/device:CPU:0', '/device:GPU:0']


Instruction to download and extract the data:

wget http://cs231n.stanford.edu/tiny-imagenet-200.zip

unzip tiny-imagenet-200.zip

rm tiny-imagenet-200.zip

# 0. Loading the data

### TinyImageNet does not have labels for the testing data. We will use the 10k validation images for the testing data.

In [2]:
#Generate dataset

###### TRAINING DATA #######
#Load Training images and labels
train_directory = "./data/tiny-imagenet-200/train" 
image_list=[]
label_list=[]

label_dic={} #convert label str to int from 0 



for l,sub_dir in enumerate(os.listdir(train_directory)):
    if not sub_dir in label_dic:
        label_dic[sub_dir]=l
    sub_dir_name=os.path.join(train_directory,sub_dir,'images')
    n=0
    for file in os.listdir(sub_dir_name):
        filename = os.fsdecode(file)
        if filename.endswith(".JPEG")  or filename.endswith(".jpg") or filename.endswith(".png"):
            img=np.array(Image.open(os.path.join(sub_dir_name,file)))
            if(img.shape == (64, 64, 3)): #removing images without 3 channels
                image_list.append(img)
                label_list.append(int(label_dic[sub_dir]))
                n+=1
    print(n,'pictures in folder',sub_dir,sep=' ' )
            
X_tot=np.array(image_list)
y_tot=np.array(label_list)

###### TEST DATA #######

#getting the labels from the txt file
df = pd.read_table('./data/tiny-imagenet-200/val/val_annotations.txt', header=None)
test_labels={} 
for index, row in df.iterrows():
    test_labels[str(row[0])]= row[1]


test_directory = "./data/tiny-imagenet-200/val/images" 

test_image_list=[]
test_label_list=[]

for file in os.listdir(test_directory):
    filename = os.fsdecode(file)
  
    if filename.endswith(".JPEG")  or filename.endswith(".jpg") or filename.endswith(".png"):
        img=np.array(Image.open(os.path.join(test_directory,file)))
        if(img.shape == (64, 64, 3)): #removing images without 3 channels
            test_image_list.append(img)
            test_label_list.append(label_dic[test_labels[filename]])
            
X_test=np.array(test_image_list)
y_test=tf.keras.utils.to_categorical(np.array(test_label_list))

497 pictures in folder n03617480
499 pictures in folder n09332890
499 pictures in folder n01917289
497 pictures in folder n04398044
496 pictures in folder n03977966
491 pictures in folder n04486054
485 pictures in folder n02481823
499 pictures in folder n01910747
500 pictures in folder n01443537
495 pictures in folder n02823428
497 pictures in folder n04254777
487 pictures in folder n03837869
500 pictures in folder n07695742
493 pictures in folder n03983396
500 pictures in folder n04597913
495 pictures in folder n02395406
500 pictures in folder n02132136
492 pictures in folder n04259630
500 pictures in folder n02279972
493 pictures in folder n04371430
500 pictures in folder n03980874
454 pictures in folder n03763968
499 pictures in folder n07768694
497 pictures in folder n02948072
497 pictures in folder n02231487
497 pictures in folder n02206856
494 pictures in folder n09428293
498 pictures in folder n03100240
473 pictures in folder n04532670
495 pictures in folder n01944390
495 pictur

In [4]:
shuffler=np.random.RandomState(seed=10).permutation(len(X_tot))
X_tot = X_tot[shuffler]
y_tot = y_tot[shuffler]
y_tot=tf.keras.utils.to_categorical(y_tot)

X_train = X_tot[5000:]
y_train =  y_tot[5000:]
X_val = X_tot[:5000]
y_val =  y_tot[:5000]


print('test data:')
print(X_test.shape)
print(y_test.shape)

print('train data:')
print(X_train.shape)
print(y_train.shape)

print('val data:')
print(X_val.shape)
print(y_val.shape)

test data:
(9832, 64, 64, 3)
(9832, 200)
train data:
(93179, 64, 64, 3)
(93179, 200)
val data:
(5000, 64, 64, 3)
(5000, 200)


#### Saving the variables allows faster loading in the future

In [5]:
#save for future use without having to reload from the images folders
np.save('data/X_test.npy', X_test)
np.save('data/y_test.npy', y_test)
np.save('data/X_train.npy', X_train)
np.save('data/y_train.npy', y_train)
np.save('data/X_val.npy', X_val)
np.save('data/y_val.npy', y_val) 

In [5]:
print('script running in '+os.path.abspath("."))
X_test = np.load('./data/X_test.npy')
y_test = np.load('./data/y_test.npy')
X_train = np.load('./data/X_train.npy')
y_train = np.load('./data/y_train.npy')
X_val = np.load('./data/X_val.npy')
y_val = np.load('./data/y_val.npy')

script running in /home/anton/e4040-2021fall-project-fren-an3078-wab2138-av3023


## 3.1 ResNeXt50 without data augmentation

In [3]:
from utils.custom_ResNeXt import custom_ResNeXt50
from utils.train_TinyImageNet_SE_ResNeXt import tinyImageNet_ResNeXt_train
# tinyImageNet_resnet_train(model,path,X_train,y_train,X_val,y_val,data_aug=False,learning_rate=0.6,steps=14,epochs=45,batch_size=128,es_patience=8)
ResNeXt_50=custom_ResNeXt50(input_shape=(64,64,3),n_classes=200)
ResNeXt_50.summary()

Model: "custom_ResNeXt50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 64, 64, 3)]  0                                            
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 32, 32, 64)   9472        input_1[0][0]                    
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 32, 32, 64)   256         conv1_conv[0][0]                 
__________________________________________________________________________________________________
conv1_relu (ReLU)               (None, 32, 32, 64)   0           conv1_bn[0][0]                   
___________________________________________________________________________________

In [6]:
hist_ResNeXt_50= tinyImageNet_ResNeXt_train(ResNeXt_50,
                          './models/tinyImageNet/ResNeXt_50_best.hdf5', #path for weights of best model
                          X_train,y_train,
                          X_val,
                          y_val,
                          data_aug=False, #no data aug
                          learning_rate=0.6, #starting learning rate
                          steps=20, #number of epochs between learning rate modification (* 0.1)
                          epochs=60,
                          batch_size=128,
                          es_patience=20) #early stopping

Epoch 1/60
steps before lr change:  20

Epoch 00001: LearningRateScheduler reducing learning rate to 0.6000000238418579.
