In [0]:
import hashlib
import math
import numpy as np
import os
import six
import sys
import tarfile
import requests
from logging import getLogger, StreamHandler, DEBUG
logger = getLogger(__name__)
handler = StreamHandler()
logger.addHandler(handler)
logger.propagate = False
try:
    import cPickle as pickle
except:
    import pickle

try:
    from itertools import zip_longest
except:
    from itertools import izip_longest as zip_longest

from itertools import product
from collections import defaultdict
from pathlib import Path
from PIL import Image
from tqdm import tqdm
import sys
import time
import threading

class Spinner:
    busy = False
    delay = 0.1

    @staticmethod
    def spinning_cursor():
        while 1:
            for cursor in '|/-\\': yield cursor

    def __init__(self, delay=None, prefix=None):
        self.prefix = prefix
        self.prefix_len = len(prefix)
        self.spinner_generator = self.spinning_cursor()
        if delay and float(delay): self.delay = delay

    def spinner_task(self):
        while self.busy:
            sys.stdout.write(self.prefix)
            sys.stdout.write(next(self.spinner_generator))
            sys.stdout.flush()
            time.sleep(self.delay)
            sys.stdout.write('\r')
            sys.stdout.flush()

    def start(self):
        self.busy = True
        threading.Thread(target=self.spinner_task).start()

    def stop(self):
        self.busy = False
        sys.stdout.write('\r')
        sys.stdout.write(' ' * (self.prefix_len + 1))
        sys.stdout.write('\r')
        sys.stdout.flush()
        time.sleep(self.delay)


CIFAR100_URL = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
CIFAR100_TAR_FILENAME = 'cifar-100-python.tar.gz'

CIFAR100_TRAIN_DATA_NAMES = ['cifar-100-python/train']
CIFAR100_TEST_DATA_NAMES  = ['cifar-100-python/test']

CIFAR100_LABELS_LIST = [
    'apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle',
    'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel',
    'can', 'castle', 'caterpillar', 'cattle', 'chair', 'chimpanzee', 'clock',
    'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur',
    'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster',
    'house', 'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion',
    'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain', 'mouse',
    'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear',
    'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy', 'porcupine',
    'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose', 'sea',
    'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider',
    'squirrel', 'streetcar', 'sunflower', 'sweet_pepper', 'table', 'tank',
    'telephone', 'television', 'tiger', 'tractor', 'train', 'trout', 'tulip',
    'turtle', 'wardrobe', 'whale', 'willow_tree', 'wolf', 'woman', 'worm'
]

CIFAR100_CLASSES_LABELS_LIST = [
    ['beaver', 'dolphin', 'otter', 'seal', 'whale'],
    ['aquarium_fish', 'flatfish', 'ray', 'shark', 'trout'],
    ['orchid', 'poppy', 'rose', 'sunflower', 'tulip'],
    ['bottle', 'bowl', 'can', 'cup', 'plate'],
    ['apple', 'mushroom', 'orange', 'pear', 'sweet_pepper'],
    ['clock', 'keyboard', 'lamp', 'telephone', 'television'],
    ['bed', 'chair', 'couch', 'table', 'wardrobe'],
    ['bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach'],
    ['bear', 'leopard', 'lion', 'tiger', 'wolf'],
    ['bridge', 'castle', 'house', 'road', 'skyscraper'],
    ['cloud', 'forest', 'mountain', 'plain', 'sea'],
    ['camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo'],
    ['fox', 'porcupine', 'possum', 'raccoon', 'skunk'],
    ['crab', 'lobster', 'snail', 'spider', 'worm'],
    ['baby', 'boy', 'girl', 'man', 'woman'],
    ['crocodile', 'dinosaur', 'lizard', 'snake', 'turtle'],
    ['hamster', 'mouse', 'rabbit', 'shrew', 'squirrel'],
    ['maple_tree', 'oak_tree', 'palm_tree', 'pine_tree', 'willow_tree'],
    ['bicycle', 'bus', 'motorcycle', 'pickup_truck', 'train'],
    ['lawn_mower', 'rocket', 'streetcar', 'tank', 'tractor']
]


def unpickle(dump):
    if six.PY2:
        data = pickle.loads(dump.read())
    elif six.PY3:
        data = pickle.loads(dump.read(), encoding='latin1')
    return data


def check_output_path(output):
    outputdir = Path(output)
    if outputdir.exists():
        logger.error("output dir `{}` already exists. Please specify a different output path".format(output))
        sys.exit(1)


# Reference: https://stackoverflow.com/questions/37573483/progress-bar-while-download-file-over-http-with-requests/37573701
def download_with_progress(url, filename):
    logger.warning("Downloading {}".format(filename))
    r = requests.get(url, stream=True)
    total_size = int(r.headers.get('content-length', 0))
    block_size = 1024
    wrote = 0
    with open(filename, 'wb') as f:
        for data in tqdm(r.iter_content(block_size), total=math.ceil(total_size//block_size) , unit='KB', unit_scale=True):
            wrote = wrote  + len(data)
            f.write(data)
    if total_size != 0 and wrote != total_size:
        logger.error("ERROR, something went wrong")
        sys.exit(1)


def download_cifar(dataset):
    if dataset == 'cifar10':
        download_with_progress(CIFAR10_URL, CIFAR10_TAR_FILENAME)
    elif dataset in ['cifar100', 'cifar100superclass']:
        download_with_progress(CIFAR100_URL, CIFAR100_TAR_FILENAME)


def check_cifar(dataset):
    if dataset == 'cifar10':
        cifar = Path(CIFAR10_TAR_FILENAME)
        md5sum = CIFAR10_TAR_MD5
    elif dataset in ['cifar100', 'cifar100superclass']:
        cifar = Path(CIFAR100_TAR_FILENAME)
        md5sum = CIFAR100_TAR_MD5

    if not cifar.is_file():
        logger.warning("{} does not exists.".format(cifar))
        download_cifar(dataset)

    cifar_md5sum = hashlib.md5(cifar.open('rb').read()).hexdigest()
    if md5sum != cifar_md5sum:
        logger.error("File `{0}` may be corrupted (wrong md5 checksum). Please delete `{0}` and retry".format(cifar))
        sys.exit(1)

    return True


def get_data_params(dataset):
    if dataset == 'cifar10':
        TARFILE = CIFAR10_TAR_FILENAME
        label_data = 'data'
        label_labels = 'labels'
        label_coarse = None
    elif dataset == 'cifar100':
        TARFILE = CIFAR100_TAR_FILENAME
        label_data = 'data'
        label_labels = 'fine_labels'
        label_coarse = None
    elif dataset == 'cifar100superclass':
        TARFILE = CIFAR100_TAR_FILENAME
        label_data = 'data'
        label_labels = 'fine_labels'
        label_coarse = 'coarse_labels'
    return TARFILE, label_data, label_labels, label_coarse


def get_datanames(dataset, mode):
    if dataset == 'cifar10':
        if mode == 'train':
            return CIFAR10_TRAIN_DATA_NAMES
        elif mode == 'test':
            return CIFAR10_TEST_DATA_NAMES
    elif dataset in ['cifar100', 'cifar100superclass']:
        if mode == 'train':
            return CIFAR100_TRAIN_DATA_NAMES
        elif mode == 'test':
            return CIFAR100_TEST_DATA_NAMES


def parse_cifar(dataset, mode):
    features = []
    labels = []
    coarse_labels = []
    batch_names = []

    TARFILE, label_data, label_labels, label_coarse = get_data_params(dataset)
    datanames = get_datanames(dataset, mode)

    try:
        spinner = Spinner(prefix="Loading {} data...".format(mode))
        spinner.start()
        tf = tarfile.open(TARFILE)
        for dataname in datanames:
            ti = tf.getmember(dataname)
            data = unpickle(tf.extractfile(ti))
            features.append(data[label_data])
            labels.append(data[label_labels])
            batch_names.extend([dataname.split('/')[1]] * len(data[label_data]))
            if dataset == 'cifar100superclass':
                coarse_labels.append(data[label_coarse])
        features = np.concatenate(features)
        features = features.reshape(features.shape[0], 3, 32, 32)
        features = features.transpose(0, 2, 3, 1).astype('uint8')
        labels = np.concatenate(labels)
        if dataset == 'cifar100superclass':
            coarse_labels = np.concatenate(coarse_labels)
        spinner.stop()
    except KeyboardInterrupt:
        spinner.stop()
        sys.exit(1)

    return features, labels, coarse_labels, batch_names


def save_cifar():
    dataset = "cifar100"
    output = "./train_data"
    if dataset == 'cifar10':
        LABELS = CIFAR10_LABELS_LIST
        LABELS_LIST = CIFAR10_LABELS_LIST
    elif dataset == 'cifar100':
        LABELS = CIFAR100_LABELS_LIST
        LABELS_LIST = CIFAR100_LABELS_LIST
    elif dataset == 'cifar100superclass':
        LABELS = []
        for i in zip(CIFAR100_SUPERCLASS_LABELS_LIST, CIFAR100_CLASSES_LABELS_LIST):
            for j in product([i[0]], i[1]):
                LABELS.append('/'.join(j))
        LABELS_LIST = CIFAR100_LABELS_LIST
        COARSE_LABELS_LIST = CIFAR100_SUPERCLASS_LABELS_LIST

    for mode in ['train', 'test']:
        for label in LABELS:
            dirpath = os.path.join(output, mode, label)
            os.system("mkdir -p {}".format(dirpath))

        features, labels , coarse_labels, batch_names = parse_cifar(dataset, mode)

        label_count = defaultdict(int)
        batch_count = defaultdict(int)
        for feature, label, coarse_label, batch_name in tqdm(zip_longest(features, labels, coarse_labels, batch_names), total=len(labels), desc="Saving {} images".format(mode)):
            label_count[label] += 1
            if True:
                if dataset == 'cifar10':
                    filename = '%s_index_%04d.png' % (batch_name, batch_count[batch_name])
                else:
                    filename = '%s_index_%05d.png' % (batch_name, batch_count[batch_name])
            else:
                filename = '%04d.png' % label_count[label]
            batch_count[batch_name] += 1

            if dataset == 'cifar100superclass':
                filepath = os.path.join(output, mode, COARSE_LABELS_LIST[coarse_label], LABELS_LIST[label], filename)
            else:
                filepath = os.path.join(output, mode, LABELS_LIST[label], filename)
            image = Image.fromarray(feature)
            image = image.convert('RGB')
            image.save(filepath)

In [0]:
download_cifar('cifar100')

Downloading cifar-100-python.tar.gz
165kKB [00:03, 44.1kKB/s]                          


In [0]:
save_cifar()



Saving train images: 100%|██████████| 50000/50000 [00:16<00:00, 3003.87it/s]




Saving test images: 100%|██████████| 10000/10000 [00:03<00:00, 3117.56it/s]


In [0]:
import tarfile
!mkdir 'data'
fname='cifar-100-python.tar.gz'
if (fname.endswith("tar.gz")):
    tar = tarfile.open(fname, "r:gz")
    tar.extractall('./data')
    tar.close()

In [0]:
import pickle
import numpy as np
import cv2
from keras import applications
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing import image
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator

#Residual Network with ILSVRC weights
base_model = applications.resnet50.ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
#Debug
base_model.summary()
#Extract from the average pooling layer
layers_to_extract = ["avg_pool"]

#Select the features from average pooling layer
model = Model(input=base_model.input, output=base_model.get_layer(layers_to_extract[0]).output)

Using TensorFlow backend.















Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5
Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (Non



In [0]:
import numpy as np
import pandas as pd
import pickle
from scipy import misc
from tqdm import tqdm

def unpickle(file):
    with open(file, 'rb') as fo:
        res = pickle.load(fo, encoding='bytes')
    return res

meta = unpickle('data/cifar-100-python/meta')


In [0]:
fine_label_names = [t.decode('utf8') for t in meta[b'fine_label_names']]

train = unpickle('data/cifar-100-python/train')

filenames = [t.decode('utf8') for t in train[b'filenames']]
fine_labels = train[b'fine_labels']
train_data = train[b'data']

In [0]:
#To extract the features from the selected layer of ResNet50 Net
layer_num=0
feats=[]
for img_count in range (0,50000): #Change to 10000 for test data
	
	#Pre-processing
	image1 = np.zeros((32,32,3),dtype=np.uint8)
	image1[...,0] = np.reshape(train_data[img_count,:1024],(32,32)) #replace with test_data for test data features
	image1[...,1] = np.reshape(train_data[img_count,1024:2048],(32,32)) #replace with test_data for test data features
	image1[...,2] = np.reshape(train_data[img_count,2048:3072],(32,32)) #replace with test_data for test data features
	image1 = cv2.resize(image1,(224,224))
	x_in = image.img_to_array(image1)
	x_in = np.expand_dims(x_in, axis=0)
	x_in = preprocess_input(x_in)
	
	#Feature Extraction
	features = model.predict(x_in)
	features = features.flatten()
	feats.append(features)
	features_arr = np.char.mod('%f', features)
	
feature_list = np.squeeze(np.asarray(feats))
np.save("train_data"+layers_to_extract[layer_num]+"resnet_data.npy",feature_list)

In [0]:
layer_num=0
feats=[]

test = unpickle('data/cifar-100-python/test')

filenames = [t.decode('utf8') for t in test[b'filenames']]
fine_labels = test[b'fine_labels']
test_data = test[b'data']

for img_count in range (0,10000): #Change to 10000 for test data

	#Pre-processing
	image1 = np.zeros((32,32,3),dtype=np.uint8)
	image1[...,0] = np.reshape(test_data[img_count,:1024],(32,32)) #replaced with test_data for test data features
	image1[...,1] = np.reshape(test_data[img_count,1024:2048],(32,32)) #replaced with test_data for test data features
	image1[...,2] = np.reshape(test_data[img_count,2048:3072],(32,32)) #replaced with test_data for test data features
	image1 = cv2.resize(image1,(224,224))
	x_in = image.img_to_array(image1)
	x_in = np.expand_dims(x_in, axis=0)
	x_in = preprocess_input(x_in)
	
	#Feature Extraction
	features = model.predict(x_in)
	features = features.flatten()
	feats.append(features)
	features_arr = np.char.mod('%f', features)
	
feature_list = np.squeeze(np.asarray(feats))
np.save("test_data"+layers_to_extract[layer_num]+"resnet_data.npy",feature_list)

In [0]:
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
import keras
from keras import applications
from keras.preprocessing import image
from keras.models import Model
from keras import models
from keras import layers
from keras import optimizers
from keras.layers import Input,Dense,Flatten,Dropout,Activation,BatchNormalization,Convolution2D,MaxPooling2D,Conv2D
from keras.models import Sequential
from keras import regularizers
from keras.layers import BatchNormalization
from keras.layers import AveragePooling2D

#Loading the Features
X_train = np.load("train_dataavg_poolresnet_data.npy")
X_test = np.load("test_dataavg_poolresnet_data.npy")

#Convert the labels to one-hot encoding
trainImageLabels = keras.utils.to_categorical(fine_labels, num_classes=100)

#Train Test Split 80%-20%
x_tr,x_ts,y_tr,y_ts = train_test_split(X_train, trainImageLabels, test_size=0.2,random_state=1)

#Creating a Deep Model for classifying the features of CIFAR-100 extracted from ResNet-50 trained on ILSVRC Dataset
model = Sequential()
model.add(Dense(2048, input_dim=2048, kernel_initializer="uniform")) #Fully connected layer (as output feature size of ResNet is 2048
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))  
model.add(Dense(512, kernel_initializer="uniform"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))  
model.add(Dense(100))
model.add(Activation("softmax"))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#### Reason for dense layers only
The input dimensions from the datasets is (2048,20000) and not (32,32,3). 
Hence just to test the concept of Transfer learning, I proceed with the same.

Will surely update once i have complete understanding of reshaping the input dimensions based on models input shape

In [25]:
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 2048)              4196352   
_________________________________________________________________
batch_normalization_15 (Batc (None, 2048)              8192      
_________________________________________________________________
activation_66 (Activation)   (None, 2048)              0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 2048)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               1049088   
_________________________________________________________________
batch_normalization_16 (Batc (None, 512)               2048      
_________________________________________________________________
activation_67 (Activation)   (None, 512)              

In [26]:
history = model.fit(x_tr,y_tr,batch_size=128,epochs=30,validation_data=(x_ts, y_ts),verbose=1)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 40000 samples, validate on 10000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [27]:
#Validation Accuracy
score = model.evaluate(x_ts, y_ts,verbose=1)
print(score)

#Save the trained Model
model.save('trainedModel.h5')

#Classify the test data (Submission files have been trained with 100% of training data after validating on 80-20)
predictions_ts = (model.predict(X_test))
class_result=np.argmax(predictions_ts,axis=-1)

np.savetxt("submission_labels.csv", class_result, delimiter=",")

[1.16016103181839, 0.7414]
