In [None]:
!unzip IMagenet.zip

Archive:  IMagenet.zip
replace IMagenet/.git/config? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
import time
import imageio
import numpy as np

path = 'IMagenet/tiny-imagenet-200/'

def get_id_dictionary():
    id_dict = {}
    for i, line in enumerate(open( path + 'wnids.txt', 'r')):
        id_dict[line.replace('\n', '')] = i
    return id_dict

def get_class_to_id_dict():
    id_dict = get_id_dictionary()
    all_classes = {}
    result = {}
    for i, line in enumerate(open( path + 'words.txt', 'r')):
        n_id, word = line.split('\t')[:2]
        all_classes[n_id] = word
    for key, value in id_dict.items():
        result[value] = (key, all_classes[key])
    return result

def get_data(id_dict):
    print('starting loading data')
    train_data, test_data = [], []
    train_labels, test_labels = [], []
    t = time.time()
    for key, value in id_dict.items():
        train_data += [imageio.imread( path + 'train/{}/images/{}_{}.JPEG'.format(key, key, str(i)), mode='RGB') for i in range(500)]
        train_labels_ = np.array([[0]*200]*500)
        train_labels_[:, value] = 1
        train_labels += train_labels_.tolist()

    for line in open( path + 'val/val_annotations.txt'):
        img_name, class_id = line.split('\t')[:2]
        test_data.append(imageio.imread( path + 'val/images/{}'.format(img_name) ,mode='RGB'))
        test_labels_ = np.array([[0]*200])
        test_labels_[0, id_dict[class_id]] = 1
        test_labels += test_labels_.tolist()

    print('finished loading data, in {} seconds'.format(time.time() - t))
    return np.array(train_data), np.array(train_labels), np.array(test_data), np.array(test_labels)

train_data, train_labels, test_data, test_labels = get_data(get_id_dictionary())

print( "train data shape: ",  train_data.shape )
print( "train label shape: ", train_labels.shape )
print( "test data shape: ",   test_data.shape )
print( "test_labels.shape: ", test_labels.shape )

starting loading data


  train_data += [imageio.imread( path + 'train/{}/images/{}_{}.JPEG'.format(key, key, str(i)), mode='RGB') for i in range(500)]
  test_data.append(imageio.imread( path + 'val/images/{}'.format(img_name) ,mode='RGB'))


finished loading data, in 46.45710849761963 seconds
train data shape:  (100000, 64, 64, 3)
train label shape:  (100000, 200)
test data shape:  (10000, 64, 64, 3)
test_labels.shape:  (10000, 200)


In [None]:
def shuffle_data(train_data, train_labels ):
    size = len(train_data)
    train_idx = np.arange(size)
    np.random.shuffle(train_idx)

    return train_data[train_idx], train_labels[train_idx]

train_data, train_labels = shuffle_data(train_data, train_labels)

In [None]:
# Keras, dataset, and VGG19 imports
import keras
from keras.datasets import cifar100, cifar10
from keras.applications import VGG19

In [None]:
# Loading VGG19 with imagenet weights
from keras.layers import Input

vgg19_model = VGG19(include_top = True, weights='imagenet')
vgg19_model.summary()

Model: "vgg19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout

# define new empty model
model = Sequential()

# add all layers except output from VGG19 to new model
for layer in vgg19_model.layers[:-1]:
  model.add(layer)

# freeze all weights
for layer in model.layers:
  layer.trainable = False

# add dropout layer and new output layer
model.add(Dropout(0.5))
model.add(Dense(200, activation='softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0         
                                                                 
 block3_conv1 (Conv2D)       (None, 56, 56, 256)      

In [None]:
# load dataset
#(x_train, y_train) , (x_val, y_val) = cifar10.load_data()

In [None]:
import numpy as np
import cv2

In [None]:
import matplotlib.pyplot as plt

NUM_CLASSES = 200

# Onehot encode labels

train_labels = keras.utils.to_categorical(train_labels, NUM_CLASSES)
test_labels = keras.utils.to_categorical(test_labels, NUM_CLASSES)

In [None]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["categorical_accuracy"])

In [None]:
# returns batch_size random samples from either training set or validation set
# resizes each image to (224, 244, 3), the native input size for VGG19
def getBatch(batch_size, train_or_val='train'):
  x_batch = []
  y_batch = []
  if train_or_val == 'train':
    idx = np.random.randint(0, len(train_data), (batch_size))

    for i in idx:
      img = cv2.resize(train_data[i], (224, 224), interpolation=cv2.INTER_CUBIC)
      x_batch.append(img)
      y_batch.append(train_labels[i] if np.isscalar(train_labels[i]) else train_labels[i][0])
  elif train_or_val == 'val':
    idx = np.random.randint(0, len(test_data), (batch_size))

    for i in idx:
      img = cv2.resize(test_data[i], (224, 224), interpolation=cv2.INTER_CUBIC)
      x_batch.append(img)
      y_batch.append(test_labels[i] if np.isscalar(test_labels[i]) else test_labels[i][0])
  else:
    print("error, please specify train or val")

  x_batch = np.array(x_batch)
  y_batch = np.array(y_batch)
  #print(x_batch.shape)
  #print(y_batch.shape)
  return x_batch, y_batch

In [None]:
import pandas as pd
EPOCHS = 20
BATCH_SIZE = 128
VAL_SIZE = 500
STEPS = 50

df = pd.DataFrame(columns=['Epoch', 'Training Loss', 'Training Acc', 'Validation Loss', 'Validation Acc'])



for e in range(EPOCHS):
  train_loss = 0
  train_acc = 0

  for s in range(STEPS):
    x_batch, y_batch = getBatch(BATCH_SIZE, "train")
    out = model.train_on_batch(x_batch, y_batch)
    train_loss += out[0]
    train_acc += out[1]

  print(f"Epoch: {e}\nTraining Loss = {train_loss / STEPS}\tTraining Acc = {train_acc / STEPS}")

  x_v, y_v = getBatch(VAL_SIZE, "val")
  eval = model.evaluate(x_v, y_v)
  print(f"Validation loss: {eval[0]}\tValidation Acc: {eval[1]}\n")
  df.loc[len(df)] = [e, train_loss / STEPS, train_acc / STEPS, eval[0], eval[1]]


df.to_csv("vgg19_training_history_ImageNet.csv")




Epoch: 0
Training Loss = 0.35254057401226757	Training Acc = 0.95015625
Validation loss: 0.0010624699061736465	Validation Acc: 1.0

Epoch: 1
Training Loss = 0.042299407917744246	Training Acc = 0.99296875
Validation loss: 0.032177481800317764	Validation Acc: 0.9900000095367432

Epoch: 2
Training Loss = 0.02629288557334803	Training Acc = 0.99265625
Validation loss: 0.0002757782058324665	Validation Acc: 1.0

Epoch: 3
Training Loss = 0.016375793664701634	Training Acc = 0.99625
Validation loss: 0.018314167857170105	Validation Acc: 0.9940000176429749

Epoch: 4
Training Loss = 0.030866612774771054	Training Acc = 0.99421875
Validation loss: 0.020182672888040543	Validation Acc: 0.9959999918937683

Epoch: 5
Training Loss = 0.0220845567014112	Training Acc = 0.99578125
Validation loss: 0.03969494625926018	Validation Acc: 0.9940000176429749

Epoch: 6
Training Loss = 0.033713894347711174	Training Acc = 0.9940625
Validation loss: 0.002224444644525647	Validation Acc: 0.9980000257492065

Epoch: 7
Traini

In [None]:
x_v, y_v = getBatch(VAL_SIZE, "val")
eval1 = model.evaluate(x_v, y_v)
print(f"Validation loss: {eval1[0]}\tValidation Acc: {eval1[1]}\n")

Validation loss: 0.03979633376002312	Validation Acc: 0.9940000176429749

