In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from matplotlib import pyplot
from matplotlib.image import imread
from os import listdir
from keras.preprocessing.image import load_img, img_to_array
from numpy import asarray, savez_compressed, load
from collections import Counter
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight, shuffle
from imblearn.over_sampling import SMOTE
from keras.utils import to_categorical
from keras.applications import resnet50  
from keras.utils import np_utils
from keras.optimizers import SGD, Adam
from keras.models import Model,load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D,GlobalAveragePooling2D
from keras.callbacks import TensorBoard,ReduceLROnPlateau,ModelCheckpoint
from sklearn.mixture import BayesianGaussianMixture
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
#Function to load saved satellite imagery data
def load_saved_dataset(file):
    data = load(file)
    X, y = data['arr_0'], data['arr_1']
    trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.20)
    print(trainX.shape, trainY.shape, testX.shape, testY.shape)
    return trainX, trainY, testX, testY
  
#function to evaluate model    
def eval(testY, predY):
    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(testY, predY)
    print('Accuracy: %f' % accuracy)
    # precision tp / (tp + fp)
    precision = precision_score(testY, predY, average='macro')
    print('Precision: %f' % precision)
    # recall: tp / (tp + fn)
    recall = recall_score(testY, predY, average='macro')
    print('Recall: %f' % recall)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(testY, predY, average='macro')
    print('F1 score: %f' % f1)
    cm = confusion_matrix(testY, predY)
    print('Confusion Matrix:\n', cm)

In [3]:
#Pedestrian Accidents - First Split

In [3]:
#load data
trainX, trainY, testX, testY = load_saved_dataset('ped_acc.npz')
print(Counter(trainY), Counter(testY))
#find average number of instances
m = round(sum(Counter(trainY).values())/4)
print('Mean: ', m)
#implement SMOTE on minority classes
trainX = trainX.reshape(trainX.shape[0], -1)
sm = SMOTE({1:m, 2:m, 3:m})
X_sm, Y_sm = sm.fit_sample(trainX, trainY)
print('After SMOTE: ', X_sm.shape, Y_sm.shape, Counter(Y_sm))
#split out majority set from dataset
ds_maj = []
X_tmp = []
Y_tmp = []
for i in range(len(X_sm)):
    target = Y_sm[i]
    var = X_sm[i]
    if target == 0:
        ds_maj.append(var)
    if target == 1:
        X_tmp.append(var)
        Y_tmp.append(1)
    if target == 2:
        X_tmp.append(var)
        Y_tmp.append(2)
    if target == 3:
        X_tmp.append(var)
        Y_tmp.append(3)

X_tmp = np.asarray(X_tmp)
Y_tmp = np.asarray(Y_tmp)
ds_maj = np.asarray(ds_maj)
print('After undersampling Majority: ', X_tmp.shape, Y_tmp.shape, ds_maj.shape)
#undersample majority set to m number of instances
ds_maj = shuffle(ds_maj, n_samples=m)
#generate target set of 0's for undersampled majority set
ds_maj_y = np.repeat(0, len(ds_maj))
#combine datasets
X1_tmp = np.concatenate([X_tmp, ds_maj])
Y1_tmp = np.concatenate([Y_tmp, ds_maj_y])
#shuffle combined dataset
X_sm, Y_sm = shuffle(X1_tmp, Y1_tmp, random_state=1)
print('Final Class Counter: ', Counter(Y_sm))
#prepare data for training
X_sm = X_sm.reshape(X_sm.shape[0], 128, 128, 3)
Y_sm = to_categorical(Y_sm)
testY = to_categorical(testY)
print('Final training dataset: ', X_sm.shape, Y_sm.shape)
print('Final validation dataset: ', testX.shape, testY.shape)

(46283, 128, 128, 3) (46283,) (11571, 128, 128, 3) (11571,)
Counter({0: 45069, 1: 863, 2: 300, 3: 51}) Counter({0: 11244, 1: 226, 2: 91, 3: 10})
Mean:  11571
After SMOTE:  (79782, 49152) (79782,) Counter({0: 45069, 1: 11571, 2: 11571, 3: 11571})
After undersampling Majority:  (34713, 49152) (34713,) (45069, 49152)
Final Class Counter:  Counter({0: 11571, 1: 11571, 2: 11571, 3: 11571})
Final training dataset:  (46284, 128, 128, 3) (46284, 4)
Final validation dataset:  (11571, 128, 128, 3) (11571, 4)


In [4]:
#train first split
base_model = resnet50.ResNet50(weights='imagenet', include_top=False, input_shape= (128,128,3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(4, activation='softmax')(x)
model_dssm1 = Model(inputs=base_model.input, outputs=predictions)

model_dssm1.compile(optimizer=Adam(lr=0.0001), loss = 'categorical_crossentropy', metrics=['acc'])

history1 = model_dssm1.fit(X_sm, Y_sm, validation_data=(testX, testY), epochs= 25, batch_size = 256)

W0804 14:49:37.451262 139900605351680 deprecation_wrapper.py:119] From /usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0804 14:49:37.478320 139900605351680 deprecation_wrapper.py:119] From /usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0804 14:49:37.490186 139900605351680 deprecation_wrapper.py:119] From /usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py:4185: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

W0804 14:49:37.517039 139900605351680 deprecation_wrapper.py:119] From /usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0804 14:49:37.517740

Train on 46284 samples, validate on 11571 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [5]:
#model predictions
predictions = model_dssm1.predict(testX)
predY = np.argmax(predictions, axis=1)
testY = np.argmax(testY, axis=1)
#Results #1
eval(testY, predY)

Accuracy: 0.959036
Precision: 0.379923
Recall: 0.311289
F1 score: 0.326160
Confusion Matrix:
 [[11061   169    13     1]
 [  188    34     4     0]
 [   73    16     1     1]
 [    8     1     0     1]]


In [None]:
#Pedestrian Accidents - Second Split

In [5]:
#load data
trainX, trainY, testX, testY = load_saved_dataset('ped_acc.npz')
print(Counter(trainY), Counter(testY))
#find average number of instances
m = round(sum(Counter(trainY).values())/4)
print('Mean: ', m)
#implement SMOTE on minority classes
trainX = trainX.reshape(trainX.shape[0], -1)
sm = SMOTE({1:m, 2:m, 3:m})
X_sm, Y_sm = sm.fit_sample(trainX, trainY)
print('After SMOTE: ', X_sm.shape, Y_sm.shape, Counter(Y_sm))
#split out majority set from dataset
ds_maj = []
X_tmp = []
Y_tmp = []
for i in range(len(X_sm)):
    target = Y_sm[i]
    var = X_sm[i]
    if target == 0:
        ds_maj.append(var)
    if target == 1:
        X_tmp.append(var)
        Y_tmp.append(1)
    if target == 2:
        X_tmp.append(var)
        Y_tmp.append(2)
    if target == 3:
        X_tmp.append(var)
        Y_tmp.append(3)

X_tmp = np.asarray(X_tmp)
Y_tmp = np.asarray(Y_tmp)
ds_maj = np.asarray(ds_maj)
print('After undersampling Majority: ', X_tmp.shape, Y_tmp.shape, ds_maj.shape)
#undersample majority set to m number of instances
ds_maj = shuffle(ds_maj, n_samples=m)
#generate target set of 0's for undersampled majority set
ds_maj_y = np.repeat(0, len(ds_maj))
#combine datasets
X1_tmp = np.concatenate([X_tmp, ds_maj])
Y1_tmp = np.concatenate([Y_tmp, ds_maj_y])
#shuffle combined dataset
X_sm, Y_sm = shuffle(X1_tmp, Y1_tmp, random_state=1)
print('Final Class Counter: ', Counter(Y_sm))
#prepare data for training
X_sm = X_sm.reshape(X_sm.shape[0], 128, 128, 3)
Y_sm = to_categorical(Y_sm)
testY = to_categorical(testY)
print('Final training dataset: ', X_sm.shape, Y_sm.shape)
print('Final validation dataset: ', testX.shape, testY.shape)

(46283, 128, 128, 3) (46283,) (11571, 128, 128, 3) (11571,)
Counter({0: 45060, 1: 857, 2: 316, 3: 50}) Counter({0: 11253, 1: 232, 2: 75, 3: 11})
Mean:  11571
After SMOTE:  (79773, 49152) (79773,) Counter({0: 45060, 1: 11571, 2: 11571, 3: 11571})
After undersampling Majority:  (34713, 49152) (34713,) (45060, 49152)
Final Class Counter:  Counter({0: 11571, 1: 11571, 2: 11571, 3: 11571})
Final training dataset:  (46284, 128, 128, 3) (46284, 4)
Final validation dataset:  (11571, 128, 128, 3) (11571, 4)


In [6]:
#train second split
base_model = resnet50.ResNet50(weights='imagenet', include_top=False, input_shape= (128,128,3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(4, activation='softmax')(x)
model_dssm2 = Model(inputs=base_model.input, outputs=predictions)

model_dssm2.compile(optimizer=Adam(lr=0.0001), loss = 'categorical_crossentropy', metrics=['acc'])

history2 = model_dssm2.fit(X_sm, Y_sm, validation_data=(testX, testY), epochs= 25, batch_size = 256)

W0804 16:28:25.952041 140251022423808 deprecation_wrapper.py:119] From /usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0804 16:28:25.967650 140251022423808 deprecation_wrapper.py:119] From /usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0804 16:28:25.972793 140251022423808 deprecation_wrapper.py:119] From /usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py:4185: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

W0804 16:28:25.994935 140251022423808 deprecation_wrapper.py:119] From /usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0804 16:28:25.995633

Train on 46284 samples, validate on 11571 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [7]:
#model predictions
predictions = model_dssm2.predict(testX)
predY = np.argmax(predictions, axis=1)
testY = np.argmax(testY, axis=1)
#Results #2
eval(testY, predY)

Accuracy: 0.938035
Precision: 0.297224
Recall: 0.316647
F1 score: 0.298686
Confusion Matrix:
 [[10791   435    27     0]
 [  162    59    11     0]
 [   52    19     4     0]
 [    4     7     0     0]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [None]:
#Pedestrian Accidents - Third Split

In [3]:
#load data
trainX, trainY, testX, testY = load_saved_dataset('ped_acc.npz')
print(Counter(trainY), Counter(testY))
#find average number of instances
m = round(sum(Counter(trainY).values())/4)
print('Mean: ', m)
#implement SMOTE on minority classes
trainX = trainX.reshape(trainX.shape[0], -1)
sm = SMOTE({1:m, 2:m, 3:m})
X_sm, Y_sm = sm.fit_sample(trainX, trainY)
print('After SMOTE: ', X_sm.shape, Y_sm.shape, Counter(Y_sm))
#split out majority set from dataset
ds_maj = []
X_tmp = []
Y_tmp = []
for i in range(len(X_sm)):
    target = Y_sm[i]
    var = X_sm[i]
    if target == 0:
        ds_maj.append(var)
    if target == 1:
        X_tmp.append(var)
        Y_tmp.append(1)
    if target == 2:
        X_tmp.append(var)
        Y_tmp.append(2)
    if target == 3:
        X_tmp.append(var)
        Y_tmp.append(3)

X_tmp = np.asarray(X_tmp)
Y_tmp = np.asarray(Y_tmp)
ds_maj = np.asarray(ds_maj)
print('After undersampling Majority: ', X_tmp.shape, Y_tmp.shape, ds_maj.shape)
#undersample majority set to m number of instances
ds_maj = shuffle(ds_maj, n_samples=m)
#generate target set of 0's for undersampled majority set
ds_maj_y = np.repeat(0, len(ds_maj))
#combine datasets
X1_tmp = np.concatenate([X_tmp, ds_maj])
Y1_tmp = np.concatenate([Y_tmp, ds_maj_y])
#shuffle combined dataset
X_sm, Y_sm = shuffle(X1_tmp, Y1_tmp, random_state=1)
print('Final Class Counter: ', Counter(Y_sm))
#prepare data for training
X_sm = X_sm.reshape(X_sm.shape[0], 128, 128, 3)
Y_sm = to_categorical(Y_sm)
testY = to_categorical(testY)
print('Final training dataset: ', X_sm.shape, Y_sm.shape)
print('Final validation dataset: ', testX.shape, testY.shape)

(46283, 128, 128, 3) (46283,) (11571, 128, 128, 3) (11571,)
Counter({0: 45030, 1: 896, 2: 305, 3: 52}) Counter({0: 11283, 1: 193, 2: 86, 3: 9})
Mean:  11571
After SMOTE:  (79743, 49152) (79743,) Counter({0: 45030, 1: 11571, 2: 11571, 3: 11571})
After undersampling Majority:  (34713, 49152) (34713,) (45030, 49152)
Final Class Counter:  Counter({0: 11571, 1: 11571, 2: 11571, 3: 11571})
Final training dataset:  (46284, 128, 128, 3) (46284, 4)
Final validation dataset:  (11571, 128, 128, 3) (11571, 4)


In [4]:
#train third split
base_model = resnet50.ResNet50(weights='imagenet', include_top=False, input_shape= (128,128,3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(4, activation='softmax')(x)
model_dssm3 = Model(inputs=base_model.input, outputs=predictions)

model_dssm3.compile(optimizer=Adam(lr=0.0001), loss = 'categorical_crossentropy', metrics=['acc'])

history3 = model_dssm3.fit(X_sm, Y_sm, validation_data=(testX, testY), epochs= 25, batch_size = 256)

W0804 19:00:17.020333 139644722501376 deprecation_wrapper.py:119] From /usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0804 19:00:17.035673 139644722501376 deprecation_wrapper.py:119] From /usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0804 19:00:17.040537 139644722501376 deprecation_wrapper.py:119] From /usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py:4185: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

W0804 19:00:17.061901 139644722501376 deprecation_wrapper.py:119] From /usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0804 19:00:17.062586

Train on 46284 samples, validate on 11571 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [None]:
#model predictions
predictions = model_dssm3.predict(testX)
predY = np.argmax(predictions, axis=1)
testY = np.argmax(testY, axis=1)
#Results #3
eval(testY, predY)

Accuracy: 0.953159
Precision: 0.303816
Recall: 0.313624
F1 score: 0.306615
Confusion Matrix:
 [[10986   246    51     0]
 [  145    34    13     1]
 [   53    24     9     0]
 [    4     5     0     0]]
