## CNN Final Model

## Import Data

In [1]:
import pandas as pd
import numpy as np
import os
import zipfile
import matplotlib.pyplot as plt
from random import shuffle


In [2]:
### import newly, non corrupted csv
df_clean = pd.read_csv('non_corrupted.csv').iloc[:, 1:]
df_clean.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12092,12093,12094,12095,12096,12097,12098,12099,Volcano?,Corrupted?
0,95,101,99,103,95,86,96,89,70,104,...,92,89,103,99,117,116,118,96,1,0
1,91,92,91,89,92,93,96,101,107,104,...,93,95,98,105,104,100,90,81,0,0
2,87,70,72,74,84,78,93,104,106,106,...,95,102,94,80,91,80,84,90,0,0
3,114,118,124,119,95,118,105,116,123,112,...,102,93,109,104,106,117,111,115,0,0
4,79,95,90,82,73,74,77,75,82,87,...,79,78,65,71,62,97,89,73,0,0


The number of images with volcanoes have increased to almost 23% after dropping duplicates and removing corruped images in the previous notebook.

## Split Data for Model  Insertion

In [3]:
### Shuffle Dataframe, remove 'Corrupted?' column
df_clean = df_clean.sample(frac=1).reset_index(drop=True).iloc[:, :-1]
df_clean.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12091,12092,12093,12094,12095,12096,12097,12098,12099,Volcano?
0,75,97,97,95,87,83,87,96,89,76,...,94,92,96,75,75,76,82,92,84,0
1,88,80,86,79,66,77,69,88,106,95,...,89,88,90,86,86,81,75,82,94,0
2,74,74,68,57,77,75,97,104,88,86,...,83,82,88,57,86,79,71,81,78,0
3,76,80,84,74,63,86,91,88,73,73,...,104,98,97,106,105,108,99,108,95,0
4,105,93,96,108,95,100,119,112,101,92,...,89,64,86,72,84,88,98,77,70,1


### Split data into 4 cross validation sets

In [4]:
cv_image_set1 = np.array_split(df_clean, 4)[0].iloc[:, :-1]
cv_label_set1 = np.array_split(df_clean, 4)[0].iloc[:, -1]

cv_image_set2 = np.array_split(df_clean, 4)[1].iloc[:, :-1]
cv_label_set2 = np.array_split(df_clean, 4)[1].iloc[:, -1]

cv_image_set3 = np.array_split(df_clean, 4)[2].iloc[:, :-1]
cv_label_set3 = np.array_split(df_clean, 4)[2].iloc[:, -1]

cv_image_set4 = np.array_split(df_clean, 4)[3].iloc[:, :-1]
cv_label_set4 = np.array_split(df_clean, 4)[3].iloc[:, -1]

In [5]:
### double check the dimensions of image data for initializing our models
print(cv_image_set1.shape)
print(cv_image_set2.shape)
print(cv_image_set3.shape)
print(cv_image_set4.shape)

(1552, 12100)
(1551, 12100)
(1551, 12100)
(1551, 12100)


In [6]:
### double check the dimensions of label data for initializing our models
print(cv_label_set1.shape)
print(cv_label_set2.shape)
print(cv_label_set3.shape)
print(cv_label_set4.shape)

(1552,)
(1551,)
(1551,)
(1551,)


## Test Train Split

In [7]:
### Resize image data into 110x110 matrices
cv_image_set1 = np.resize(cv_image_set1, (1552, 110, 110, 1))
cv_image_set2 = np.resize(cv_image_set2, (1551, 110, 110, 1))
cv_image_set3 = np.resize(cv_image_set3, (1551, 110, 110, 1))
cv_image_set4 = np.resize(cv_image_set4, (1551, 110, 110, 1))

In [8]:
## Normalize image data
cv_image_set1 = cv_image_set1/255.0
cv_image_set2 = cv_image_set2/255.0
cv_image_set3 = cv_image_set3/255.0
cv_image_set4 = cv_image_set4/255.0

# Train Adams model

In [9]:
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
## Create binary categories for Labels
cv_label_set1 = to_categorical(cv_label_set1, num_classes = 2)
cv_label_set2 = to_categorical(cv_label_set2, num_classes = 2)
cv_label_set3 = to_categorical(cv_label_set3, num_classes = 2)
cv_label_set4 = to_categorical(cv_label_set4, num_classes = 2)

Using TensorFlow backend.


#### Initialize Model

In [10]:
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Dropout
from keras.optimizers import Adam

modelA = Sequential()
modelA.add(Conv2D(filters = 8, kernel_size = (5,5), activation ='relu', input_shape = (110,110,1)))
modelA.add(MaxPool2D(pool_size=(2,2)))
modelA.add(Conv2D(filters = 16, kernel_size = (3,3), activation ='relu'))
modelA.add(MaxPool2D(pool_size=(2,2)))
modelA.add(Flatten())

modelA.add(Dense(2, activation = "sigmoid"))

In [11]:
modelA.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 106, 106, 8)       208       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 53, 53, 8)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 51, 51, 16)        1168      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 25, 25, 16)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 10000)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 20002     
Total params: 21,378
Trainable params: 21,378
Non-trainable params: 0
_________________________________________________________________


## Train 1, 2, 3

In [14]:
### Initialize Model
modelA = Sequential()
modelA.add(Conv2D(filters = 8, kernel_size = (5,5), activation ='relu', input_shape = (110,110,1)))
modelA.add(MaxPool2D(pool_size=(2,2)))
modelA.add(Conv2D(filters = 16, kernel_size = (3,3), activation ='relu'))
modelA.add(MaxPool2D(pool_size=(2,2)))
modelA.add(Flatten())


modelA.add(Dense(2, activation = "sigmoid"))


modelA.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

## Combine sets 1, 2, 3
train_123 = np.concatenate([cv_image_set1, cv_image_set2, cv_image_set3], axis=0)
label_123 = np.concatenate([cv_label_set1, cv_label_set2, cv_label_set3], axis=0)
print('Sets 1, 2, 3 combined\n\n\n')


print('The input images have dimension' + str(train_123.shape))
print('The test labels have dimension' + str(label_123.shape))


### Train
moldelA_trained = modelA.fit(train_123, label_123, batch_size=800, epochs = 10) #validation_data=(X_val, y_val))
print('Training complete\n\n\n\n')
## Test on set 4
scoreA = modelA.evaluate(cv_image_set4, cv_label_set4)
print('Test Loss on set 4 = ', scoreA[0])
print('Test Accuracy on set 4 = ', scoreA[1])

Sets 1, 2, 3 combined



The input images have dimension(4654, 110, 110, 1)
The test labels have dimension(4654, 2)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training complete




Test Loss on set 4 =  0.5354993117001655
Test Accuracy on set 4 =  0.7666021923262561


## Train 1, 2, 4

In [15]:
### Initialize Model
modelB = Sequential()
modelB.add(Conv2D(filters = 8, kernel_size = (5,5), activation ='relu', input_shape = (110,110,1)))
modelB.add(MaxPool2D(pool_size=(2,2)))
modelB.add(Conv2D(filters = 16, kernel_size = (3,3), activation ='relu'))
modelB.add(MaxPool2D(pool_size=(2,2)))
modelB.add(Flatten())

modelB.add(Dense(2, activation = "sigmoid"))


modelB.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])





## Combine sets 1, 2, 4
train_124 = np.concatenate([cv_image_set1, cv_image_set2, cv_image_set4], axis=0)
label_124 = np.concatenate([cv_label_set1, cv_label_set2, cv_label_set4], axis=0)
print('Sets 2, 3, 4 combined\n\n\n')


print('The input images have dimension' + str(train_124.shape))
print('The test labels have dimension' + str(label_124.shape))


### Train on sets 1,2 4
moldelB_trained = modelB.fit(train_124, label_124, batch_size=800, epochs = 10)
print('Training complete\n\n\n\n')

## Test on set 3
scoreB = modelB.evaluate(cv_image_set3, cv_label_set3)
print('Test Loss on set 3 = ', scoreB[0])
print('Test Accuracy on set 3 = ', scoreB[1])

Sets 2, 3, 4 combined



The input images have dimension(4654, 110, 110, 1)
The test labels have dimension(4654, 2)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training complete




Test Loss on set 3 =  0.5293892025947571
Test Accuracy on set 3 =  0.7678916828621595


## Train 1, 3, 4

In [16]:
### Initialize Model
modelC = Sequential()
modelC.add(Conv2D(filters = 8, kernel_size = (5,5), activation ='relu', input_shape = (110,110,1)))
modelC.add(MaxPool2D(pool_size=(2,2)))
modelC.add(Conv2D(filters = 16, kernel_size = (3,3), activation ='relu'))
modelC.add(MaxPool2D(pool_size=(2,2)))
modelC.add(Flatten())

#modelB.add(Dense(y_train.shape[1], activation = "sigmoid"))
modelC.add(Dense(2, activation = "sigmoid"))


modelC.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])






## Combine sets 1, 3, 4
train_134 = np.concatenate([cv_image_set1, cv_image_set3, cv_image_set4], axis=0)
label_134 = np.concatenate([cv_label_set1, cv_label_set3, cv_label_set4], axis=0)
print('Sets 1, 3, 4 combined\n\n\n')


print('The input images have dimension' + str(train_134.shape))
print('The test labels have dimension' + str(label_134.shape))


### Train on sets 1,2 4
moldelC_trained = modelC.fit(train_134, label_134, batch_size=800, epochs = 10)
print('Training complete\n\n\n\n')

## Test on set 2
scoreC = modelC.evaluate(cv_image_set2, cv_label_set2)
print('Test Loss on set 2 = ', scoreC[0])
print('Test Accuracy on set 2 = ', scoreC[1])

Sets 1, 3, 4 combined



The input images have dimension(4654, 110, 110, 1)
The test labels have dimension(4654, 2)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training complete




Test Loss on set 2 =  0.504737905884158
Test Accuracy on set 2 =  0.7904577692196032


## Train 2, 3, 4

In [18]:
### Initialize Model
modelD = Sequential()
modelD.add(Conv2D(filters = 8, kernel_size = (5,5), activation ='relu', input_shape = (110,110,1)))
modelD.add(MaxPool2D(pool_size=(2,2)))
modelD.add(Conv2D(filters = 16, kernel_size = (3,3), activation ='relu'))
modelD.add(MaxPool2D(pool_size=(2,2)))
modelD.add(Flatten())

#modelB.add(Dense(y_train.shape[1], activation = "sigmoid"))
modelD.add(Dense(2, activation = "sigmoid"))


modelD.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])





## Combine sets 2, 3, 4
train_234 = np.concatenate([cv_image_set2, cv_image_set3, cv_image_set4], axis=0)
label_234 = np.concatenate([cv_label_set2, cv_label_set3, cv_label_set4], axis=0)
print('Sets 2, 3, 4 combined\n\n\n')


print('The input images have dimension' + str(train_234.shape))
print('The test labels have dimension' + str(label_234.shape))


### Train on sets 2,3, 4
moldelD_trained = modelD.fit(train_234, label_234, batch_size=800, epochs = 10)
print('Training complete\n\n\n\n')

## Test on set 1
scoreD = modelD.evaluate(cv_image_set1, cv_label_set1)
print('Test Loss on set 1 = ', scoreD[0])
print('Test Accuracy on set 1 = ', scoreD[1])

Sets 2, 3, 4 combined



The input images have dimension(4653, 110, 110, 1)
The test labels have dimension(4653, 2)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training complete




Test Loss on set 1 =  0.5433497988071638
Test Accuracy on set 1 =  0.7609536082474226


## Train Stochastic Gradient Descent

## Train 1, 2, 3

In [19]:
### Initialize Model
modelA = Sequential()
modelA.add(Conv2D(filters = 8, kernel_size = (5,5), activation ='relu', input_shape = (110,110,1)))
modelA.add(MaxPool2D(pool_size=(2,2)))
modelA.add(Conv2D(filters = 16, kernel_size = (3,3), activation ='relu'))
modelA.add(MaxPool2D(pool_size=(2,2)))
modelA.add(Flatten())


modelA.add(Dense(2, activation = "sigmoid"))


modelA.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

## Combine sets 1, 2, 3
train_123 = np.concatenate([cv_image_set1, cv_image_set2, cv_image_set3], axis=0)
label_123 = np.concatenate([cv_label_set1, cv_label_set2, cv_label_set3], axis=0)
print('Sets 1, 2, 3 combined\n\n\n')


print('The input images have dimension' + str(train_123.shape))
print('The test labels have dimension' + str(label_123.shape))


### Train
moldelA_trained = modelA.fit(train_123, label_123, batch_size=800, epochs = 10) #validation_data=(X_val, y_val))
print('Training complete\n\n\n\n')
## Test on set 4
scoreA = modelA.evaluate(cv_image_set4, cv_label_set4)
print('Test Loss on set 4 = ', scoreA[0])
print('Test Accuracy on set 4 = ', scoreA[1])

Sets 1, 2, 3 combined



The input images have dimension(4654, 110, 110, 1)
The test labels have dimension(4654, 2)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training complete




Test Loss on set 4 =  0.5413515110849028
Test Accuracy on set 4 =  0.7666021923262561


## Train 1, 2, 4

In [20]:
### Initialize Model
modelB = Sequential()
modelB.add(Conv2D(filters = 8, kernel_size = (5,5), activation ='relu', input_shape = (110,110,1)))
modelB.add(MaxPool2D(pool_size=(2,2)))
modelB.add(Conv2D(filters = 16, kernel_size = (3,3), activation ='relu'))
modelB.add(MaxPool2D(pool_size=(2,2)))
modelB.add(Flatten())


modelB.add(Dense(2, activation = "sigmoid"))


modelB.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])



## Combine sets 1, 2, 4
train_124 = np.concatenate([cv_image_set1, cv_image_set2, cv_image_set4], axis=0)
label_124 = np.concatenate([cv_label_set1, cv_label_set2, cv_label_set4], axis=0)
print('Sets 2, 3, 4 combined\n\n\n')


print('The input images have dimension' + str(train_124.shape))
print('The test labels have dimension' + str(label_124.shape))


### Train on sets 1,2 4
moldelB_trained = modelB.fit(train_124, label_124, batch_size=800, epochs = 10)
print('Training complete\n\n\n\n')

## Test on set 3
scoreB = modelB.evaluate(cv_image_set3, cv_label_set3)
print('Test Loss on set 3 = ', scoreB[0])
print('Test Accuracy on set 3 = ', scoreB[1])

Sets 2, 3, 4 combined



The input images have dimension(4654, 110, 110, 1)
The test labels have dimension(4654, 2)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training complete




Test Loss on set 3 =  0.5400045802714669
Test Accuracy on set 3 =  0.7678916828621595


## Train 1, 3, 4

In [21]:
### Initialize Model
modelC = Sequential()
modelC.add(Conv2D(filters = 8, kernel_size = (5,5), activation ='relu', input_shape = (110,110,1)))
modelC.add(MaxPool2D(pool_size=(2,2)))
modelC.add(Conv2D(filters = 16, kernel_size = (3,3), activation ='relu'))
modelC.add(MaxPool2D(pool_size=(2,2)))
modelC.add(Flatten())


modelC.add(Dense(2, activation = "sigmoid"))


modelC.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])



## Combine sets 1, 3, 4
train_134 = np.concatenate([cv_image_set1, cv_image_set3, cv_image_set4], axis=0)
label_134 = np.concatenate([cv_label_set1, cv_label_set3, cv_label_set4], axis=0)
print('Sets 1, 3, 4 combined\n\n\n')


print('The input images have dimension' + str(train_134.shape))
print('The test labels have dimension' + str(label_134.shape))


### Train on sets 1,2 4
moldelC_trained = modelC.fit(train_134, label_134, batch_size=800, epochs = 10)
print('Training complete\n\n\n\n')

## Test on set 2
scoreC = modelC.evaluate(cv_image_set2, cv_label_set2)
print('Test Loss on set 2 = ', scoreC[0])
print('Test Accuracy on set 2 = ', scoreC[1])

Sets 1, 3, 4 combined



The input images have dimension(4654, 110, 110, 1)
The test labels have dimension(4654, 2)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training complete




Test Loss on set 2 =  0.5126757949379319
Test Accuracy on set 2 =  0.7904577692196032


## Train 2, 3, 4

In [22]:
### Initialize Model
modelD = Sequential()
modelD.add(Conv2D(filters = 8, kernel_size = (5,5), activation ='relu', input_shape = (110,110,1)))
modelD.add(MaxPool2D(pool_size=(2,2)))
modelD.add(Conv2D(filters = 16, kernel_size = (3,3), activation ='relu'))
modelD.add(MaxPool2D(pool_size=(2,2)))
modelD.add(Flatten())


modelD.add(Dense(2, activation = "sigmoid"))


modelD.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])



## Combine sets 2, 3, 4
train_234 = np.concatenate([cv_image_set2, cv_image_set3, cv_image_set4], axis=0)
label_234 = np.concatenate([cv_label_set2, cv_label_set3, cv_label_set4], axis=0)
print('Sets 2, 3, 4 combined\n\n\n')


print('The input images have dimension' + str(train_234.shape))
print('The test labels have dimension' + str(label_234.shape))


### Train on sets 2,3, 4
moldelD_trained = modelD.fit(train_234, label_234, batch_size=800, epochs = 10)
print('Training complete\n\n\n\n')

## Test on set 1
scoreD = modelD.evaluate(cv_image_set1, cv_label_set1)
print('Test Loss on set 1 = ', scoreD[0])
print('Test Accuracy on set 1 = ', scoreD[1])

Sets 2, 3, 4 combined



The input images have dimension(4653, 110, 110, 1)
The test labels have dimension(4653, 2)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training complete




Test Loss on set 1 =  0.546758536825475
Test Accuracy on set 1 =  0.7609536082474226
