In [1]:
'''
- The model described in the Report.pdf file was created using the below seeds. Thus, if you want to 
reproduce the same model keep the same seeds.
'''
import numpy as np
import tensorflow as tf
import random as rn

import os
os.environ['PYTHONHASHSEED'] = '0'

np.random.seed(5)

rn.seed(6)

session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

from keras import backend as K

tf.set_random_seed(7)

sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

Using TensorFlow backend.


In [2]:
os.getcwd()[:-6]

'/home/arthur/Desktop/Projeto_final/'

In [2]:
'''
- Importing Libraries
'''
from PIL import Image
import os
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.metrics import confusion_matrix
from plot_confusion_matrix import plot_confusion_matrix
import matplotlib.pyplot as plt



In [3]:
'''
- Importing Images
'''

from os import listdir
from os.path import isfile, join


BIG_CITIES = "big.cities/"
BEACHES = "beaches_n_resorts/"
FOREST = "forest/"

data_path = os.getcwd()[:-6] + 'Data/'

big_cities_path = data_path + BIG_CITIES
forest_path = data_path + FOREST
beaches_path = data_path + BEACHES

big_cities_images = [Image.open(big_cities_path+img).resize((32,32),Image.ANTIALIAS)
                         for img in listdir(big_cities_path)
                             if Image.open(big_cities_path+img).size == (640,640)]

forest_images = [Image.open(forest_path+img).resize((32,32), Image.ANTIALIAS)
                         for img in listdir(forest_path)
                             if Image.open(forest_path+img).size == (640,640)]

beaches_images = [Image.open(beaches_path+img).resize((32,32), Image.ANTIALIAS)
                         for img in listdir(beaches_path)
                             if Image.open(beaches_path+img).size == (640,640)]

In [4]:
'''
- Normalize all pixels between 0 and 1
'''

big_cities_images = [np.asarray(i)/255. for i in big_cities_images]
forest_images = [np.asarray(i)/255. for i in forest_images]
beaches_images = [np.asarray(i)/255. for i in beaches_images]

In [5]:
'''
- Creating labels
    * 0 means Urban
    * 1 means Forests
    * 2 means Beaches
'''
big_cities_label = [0 for i in range(len(big_cities_images))]
forest_label = [1 for i in range(len(forest_images))]
beaches_label = [2 for i in range(len(beaches_images))]

In [6]:
'''
- Creating our pandas dataframe
'''
data = pd.DataFrame({"Features": big_cities_images + forest_images + beaches_images,
                     "Label"   : big_cities_label+ forest_label + beaches_label
                    })

X_all = list(data["Features"])
y_all = list(data["Label"])

In [7]:
'''
- Spliting into train and test
'''

X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=11)

In [8]:
'''
- Building our model
'''

from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout
model = Sequential()
model.add(Dense(units=500,activation="relu",input_shape=(32,32,3)))
model.add(Dropout(0.2))
model.add(Dense(units=300,activation="relu",input_shape=(32,32,3)))
model.add(Dropout(0.2))
model.add(Dense(units=100,activation="relu",input_shape=(32,32,3)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(units=3,activation="softmax"))


In [9]:
'''
- Train model
'''

from keras.utils import to_categorical
from keras import optimizers

sgd = optimizers.SGD()

model.compile(loss="categorical_crossentropy",  optimizer=sgd, metrics=['acc'])

model.summary()

history = model.fit(np.asarray(X_train), 
                      to_categorical(y_train), 
                      epochs=24, 
                      verbose=2, 
                      batch_size=12,
                      shuffle=False,
                      validation_data=(np.asarray(X_test), to_categorical(y_test)))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 32, 32, 500)       2000      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32, 32, 500)       0         
_________________________________________________________________
dense_2 (Dense)              (None, 32, 32, 300)       150300    
_________________________________________________________________
dropout_2 (Dropout)          (None, 32, 32, 300)       0         
_________________________________________________________________
dense_3 (Dense)              (None, 32, 32, 100)       30100     
_________________________________________________________________
dropout_3 (Dropout)          (None, 32, 32, 100)       0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 102400)            0         
__________

In [10]:
'''
- Plotting loss over the train epochs
'''
import matplotlib.pyplot as plt
plt.plot(history.history['loss'], label="train loss")
plt.plot(history.history['val_loss'],label="val loss")
plt.legend()
plt.ylabel('Loss')
plt.xlabel('Iteration')
plt.show()

In [11]:
'''
- Applying model to the train dataset
'''
pred=model.predict_classes(np.asarray(X_train))
cm=confusion_matrix(y_train, pred)

acc = int(sum(cm.diagonal())/float(len(X_train))*100)
plot_confusion_matrix(cm, classes=["big_cities", "forest", "beaches"],
                      title='Confusion matrix, Accuracy: {}%'.format(acc))
plt.show()

Confusion matrix, without normalization
[[135  49  26]
 [  0 230   1]
 [  0   9 306]]


In [12]:
print "Accuracy: ", sum(cm.diagonal())/float(len(X_train))

Accuracy:  0.8875661375661376


In [19]:
'''
- Applying model to the test dataset
'''

pred=model.predict_classes(np.asarray(X_test))
cm=confusion_matrix(y_test, pred)
test_acc = int(sum(cm.diagonal())/float(len(X_test))*100)
plot_confusion_matrix(cm, classes=["big_cities", "forest", "beaches"],
                      title='Confusion matrix, Accuracy: {}%'.format(test_acc))
plt.show()

Confusion matrix, without normalization
[[20 13 16]
 [ 0 57  4]
 [ 3  7 69]]


In [14]:
print "Accuracy: ", sum(cm.diagonal())/float(len(X_test)) #before 73%

Accuracy:  0.7724867724867724


In [20]:
'''
- Ploting Recall by label
'''
recall_array = [float(cm[i][z])/sum(cm[i]) for i in xrange(len(cm)) for z in xrange(len(cm)) if i == z]
fig, ax = plt.subplots()   
barlist=plt.bar(range(3), recall_array)


axes = plt.gca()
axes.set_ylim([0.,1])

barlist[0].set_color('grey')
barlist[1].set_color('green')
barlist[2].set_color('blue')
plt.xticks([.5,1.5,2.5], ["Urbano", "Floresta", "Praias"])
plt.title("Recall by label")

temp = 0
for rect in barlist:
    height = rect.get_height()
    plt.text(rect.get_x() + rect.get_width()/2.0, height, round(recall_array[temp],2), ha='center', va='bottom')
    temp += 1

plt.show()

In [21]:
'''
- Ploting Precision by label
'''
precision_array = [float(cm[i][z])/sum(cm)[i] for i in xrange(len(cm)) for z in xrange(len(cm)) if i == z]
barlist=plt.bar(range(3), precision_array)
barlist[0].set_color('grey')
barlist[1].set_color('green')
barlist[2].set_color('blue')
plt.xticks([.5,1.5,2.5], ["Urbano", "Floresta", "Praias"])
axes = plt.gca()
axes.set_ylim([0.,1.])
plt.title("Precision by label")

temp = 0
for rect in barlist:
    height = rect.get_height()
    plt.text(rect.get_x() + rect.get_width()/2.0, height, round(precision_array[temp],2), ha='center', va='bottom')
    temp += 1
plt.show()

In [22]:
'''
- Saving model
'''
model.save("models/dl_first_architecture.h5")