In [None]:
from keras.preprocessing.image import ImageDataGenerator 
import os 
import numpy as np 
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

img_width,img_height = 224, 224
#nb_train_samples =400 
#nb_validation_samples = 100
epochs = 4
batch_size = 16

data_dir = os.path.join(os.getcwd(),'BlobStorage')
validation_data_dir = os.path.join(data_dir, 'validation_data')
train_data_dir = os.path.join(data_dir, 'train_data')


    
train_datagen = ImageDataGenerator( 
    rescale=1. / 255, 
    shear_range=0.2, 
    zoom_range=0.2, 
    horizontal_flip=True) 
  
test_datagen = ImageDataGenerator(rescale=1. / 255) 

f = open("fasttext/clusterCenters.txt",'r')

lines = f.readlines()

#print(lines)

line = lines[0].split()
print(line)
modelName = line[0]
classesNow = line[1:]
print(modelName)
print(classesNow)
f.close()  



In [None]:
#from keras.applications.inception_resnet_v2 import InceptionResNetV2
#classesNow = ['car','coffee']

In [None]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()



In [None]:

train_generator = train_datagen.flow_from_directory( 
        train_data_dir, 
        classes = classesNow,
        target_size=(img_width, img_height), 
        color_mode = 'rgb',
        batch_size=batch_size, 
        class_mode='categorical') 


validation_generator = test_datagen.flow_from_directory( 
        validation_data_dir, 
        classes = classesNow,
        target_size=(img_width, img_height), 
        color_mode = 'rgb',
        batch_size=batch_size, 
        class_mode='categorical') 

n_classes = len(np.unique(train_generator.classes))
channel = 3



base_model=VGG19(weights='imagenet',include_top=False) 
#base_model=ResNet50(weights='imagenet',include_top=False) 
#base_model=InceptionResNetV2(weights='imagenet',include_top=False) 
print(type(base_model))

x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(1024,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
x=Dense(1024,activation='relu')(x) #dense layer 2
x=Dense(512,activation='relu')(x) #dense layer 3
preds=Dense(n_classes,activation='softmax')(x) #final layer with softmax activation with n_classes 

model=Model(inputs=base_model.input,outputs=preds)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional VGG19 layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='Adam', loss='categorical_crossentropy',metrics=['categorical_accuracy','accuracy'])
    
model.summary()

In [None]:
#Fitting the model
model.fit_generator( 
        train_generator, 
        steps_per_epoch=train_generator.n // batch_size, 
        epochs=epochs, 
        validation_data=validation_generator, 
        validation_steps=validation_generator.n // batch_size)

modelNameStr = "trained_models/"  + modelName +".h5"
model.save(modelNameStr)

In [None]:
#Testing
from keras.models import load_model
import os
from keras.preprocessing.image import ImageDataGenerator 
from keras.models import Sequential
from keras.optimizers import SGD
from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

data_dir = os.path.join(os.getcwd(),'BlobStorage')
test_data_dir = os.path.join(data_dir, 'test_data_20') # the categories need to be in folders
print(test_data_dir)

img_width,img_height = 224, 224
batch_size = 16

test_datagen = ImageDataGenerator(rescale=1. / 255) 

test_generator = test_datagen.flow_from_directory( 
        test_data_dir, 
        target_size=(img_width, img_height), 
        color_mode = 'rgb',
        batch_size=batch_size, 
        #class_mode='categorical',
        class_mode=None,
        shuffle = False) 

test_generator.reset()

#Getting list of stored models in trained_models folder
#models_list = [l for l in os.listdir("trained_models") if l.endswith('.h5')]
#print(models_list)

#model = load_model("model_contextobject_4classes.h5")

filenames =  test_generator.filenames
nb_samples = len(filenames)

print('nb_samples '+str(nb_samples))
print(filenames[1])
#predictions = model.predict_generator(test_generator, steps=nb_samples)

In [None]:
import pandas as pd
import pickle

f = open("fasttext/clusterCenters.txt",'r')

lines = f.readlines()


line = lines[0].split()
print(line)
modelName = line[0]
classesNow = line[1:]
f.close()

#Keras sorts the list of classes used for training
classesNow.sort()
    
model = load_model('trained_models/'+modelName+'.h5')
predictions = model.predict_generator(test_generator, steps=nb_samples/batch_size, verbose = 1)
pred_df = pd.DataFrame(data=predictions, index=filenames, columns=classesNow)
#pred_df = pd.DataFrame(data=predictions, index=filenames, columns=['car','coffee'])
print(pred_df.head())
print(pred_df.shape)
    

In [None]:
    
#Finding highest probability category
pred_df['max_prob'] = pred_df.idxmax(axis=1)
pred_df['max_prob'].to_csv('predictions/'+modelName+'.txt', header=True, index=True, sep=',',mode = 'w+')
    #pred_df.to_pickle('predictions/'+modelName+'.pkl') 
print(pred_df.head())
print(pred_df.shape)



In [None]:
pred_df.shape

In [None]:
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 

pred_df = pd.read_csv('predictions/'+modelName+'.txt', header=0, sep=',')

pred_df.columns = ['test_file','max_prob']
pred_df['actual_label'] = pred_df.test_file.apply(lambda x: x.split('/')[0])

results = confusion_matrix(pred_df['actual_label'], pred_df['max_prob']) 
print(results)

print('Accuracy Score :',accuracy_score(pred_df['actual_label'], pred_df['max_prob']))

print('Report : ')
print(classification_report(pred_df['actual_label'], pred_df['max_prob']) )