In [None]:
# hide
%load_ext autoreload
%autoreload 2

In [None]:
# default_exp feature_extractor

# VGG16 Feature Extractor and Logistic Regression Classifier

> This notebook goes over how to get features using VGG16 and use a logistic regression classifier to predict whether the image contains flood or not

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
# example of using the vgg16 model as a feature extraction model
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.applications.vgg16 import decode_predictions
from keras.applications.vgg16 import VGG16
from keras.models import Model
from keras.layers import Flatten
from pickle import dump
import os

In [None]:
vgg16 = VGG16(weights='imagenet', include_top=False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
import numpy as np
img_path = 'image_1.jpg'
image = load_img(img_path, target_size=(224, 224))
# img = image.load_img(img_path)
x = img_to_array(image)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
# print(x.shape)
features = vgg16.predict(x)
print(features.shape)

(1, 7, 7, 512)


In [None]:
features.flatten()
# Flatten(features, )

array([ 0.     ,  0.     ,  0.     , ...,  0.     , 46.54387,  0.     ],
      dtype=float32)

In [None]:
#export
def get_features(path):
    '''
    This function outputs the features from the VGG16 architecture (7x7x512) and flattens it a
    and returns it as an .npy array
    '''

    all_features = []
    for fx in sorted(os.listdir(path)):
        if fx.endswith('.jpg'):
            image = load_img(path+fx, target_size=(224, 224))
            # convert the image pixels to a numpy array
            image = img_to_array(image)

            # reshape data for the model
            image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))

            # prepare the image for the VGG model
            image = preprocess_input(image)

            # get extracted features
            # features = model.predict(image)
            features = vgg16.predict(image)

            # print(features.shape)
            # print(features.reshape(-1,).shape)
            # all_features.append(features.reshape(-1,))
            all_features.append(features.flatten())
            print('Done with {}'.format(fx[:-4]))
    return np.asarray(all_features)


In [None]:
path = '/content/flood/'
ft = get_features(path)

Done with image_1
Done with image_10
Done with image_100
Done with image_101
Done with image_102
Done with image_103
Done with image_104
Done with image_105
Done with image_106
Done with image_107
Done with image_108
Done with image_109
Done with image_11
Done with image_110
Done with image_111
Done with image_112
Done with image_113
Done with image_114
Done with image_115
Done with image_116
Done with image_117
Done with image_118
Done with image_119
Done with image_12
Done with image_120
Done with image_121
Done with image_122
Done with image_123
Done with image_124
Done with image_125
Done with image_126
Done with image_127
Done with image_128
Done with image_129
Done with image_13
Done with image_130
Done with image_131
Done with image_132
Done with image_133
Done with image_134
Done with image_135
Done with image_136
Done with image_137
Done with image_138
Done with image_139
Done with image_14
Done with image_140
Done with image_141
Done with image_142
Done with image_143
Done wi

In [None]:
ft.shape

(253, 25088)

In [None]:
path = '/content/no_flood/'
nf = get_features(path)

Done with 005vc16644
Done with 005vc16645
Done with 1_Seneca_EW
Done with 1_Union_NS
Done with 2_Pike_EW
Done with 2_Pike_NS
Done with 2_University_NS
Done with 3_Union_EW
Done with 4_Olive_NS
Done with 4_Pine_NS
Done with 5_Pine_EW
Done with Alaskan_Madison_NS
Done with Alaskan_Pike_NS
Done with Alaskan_University_NS
Done with Broad-Third
Done with Broadway_E_Roy_NS
Done with CCTV021a
Done with CCTV022
Done with CCTV035a
Done with CCTV037a
Done with CCTV042a
Done with CCTV043a
Done with CCTV044a
Done with CCTV045a
Done with CCTV047a
Done with CCTV091a-L
Done with CCTV1070
Done with CMH091
Done with CivicCtr-Rich
Done with Day1069
Done with Dublin-315
Done with Eastlake_E_Stewart_NS
Done with Elliott_W_Mercer_NS
Done with Fourth-Mound
Done with Fourth-Town
Done with Front-I70
Done with Front-Main
Done with Front-Mound
Done with Front-Nationwide
Done with Front-Spring
Done with HighMain
Done with I-670_at_SR-315
Done with I-675_at_Colonel_Glenn_Highway
Done with I-675_at_Indian_Ripple_R

In [None]:
nf.shape

(76, 25088)

In [None]:
import pandas as pd

In [None]:
df1 = pd.DataFrame(nf)

In [None]:
df1['label'] = [0]*len(df1)

In [None]:
df2 = pd.DataFrame(ft)

In [None]:
df2['label'] = [1]*len(df2)

In [None]:
df3 = pd.concat([df1, df2])

In [None]:
pd.concat([df1, df2])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,25048,25049,25050,25051,25052,25053,25054,25055,25056,25057,25058,25059,25060,25061,25062,25063,25064,25065,25066,25067,25068,25069,25070,25071,25072,25073,25074,25075,25076,25077,25078,25079,25080,25081,25082,25083,25084,25085,25086,25087
0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,4.782909,0.0,0.000000,0.000000,0.0,40.074249,0.0,0.0,0.0,0.000000,13.051147,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,31.390358,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000
1,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,12.322658,0.0,0.000000,18.123991,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,5.307984,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,53.128769,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.425227,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000
2,0.0,0.0,0.0,0.0,64.251503,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.935779,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0000,0.0,0.000000,0.000000,0.000000,5.921244,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,6.013273
3,0.0,0.0,0.0,0.0,21.469673,19.998602,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,3.659942,0.0,0.0,0.000000,0.0,0.0,0.111449,0.0,0.0,0.000000,0.0,0.573259,0.0000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,14.646975,0.000000
4,0.0,0.0,0.0,0.0,31.258314,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,19.977528,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,63.313198,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,29.017616,0.0,0.0,0.000000,0.0,0.0,33.997482,0.0,0.0,2.313589,0.0,0.000000,0.0000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,31.409794,0.0,16.259485,10.092151
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
248,0.0,0.0,0.0,0.0,6.728823,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,5.280284,0.0,0.000000,0.000000,0.0,3.448245,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,25.217863,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,12.519950,0.0,0.0,0.000000,0.0,0.000000,8.3123,0.0,0.969183,0.000000,14.424568,0.000000,2.852381,0.000000,0.377016,2.500970,0.000000,0.0,0.0,0.000000,0.0,1.539717,0.000000
249,0.0,0.0,0.0,0.0,42.643631,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,18.370747,0.000000,0.0,0.000000,0.0,0.0,0.0,7.903407,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,33.604862,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0000,0.0,0.000000,3.590216,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,14.266870,0.000000
250,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,155.178604,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,22.991663,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,3.500954,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,4.918267,0.000000,0.0,0.0,0.000000,0.0,10.646352,0.000000
251,0.0,0.0,0.0,0.0,11.763714,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,54.160988,0.000000,0.0,0.0,0.0,0.0,0.0,38.743473,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,3.931798,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,1.313841,0.000000


In [None]:
df3['label'].value_counts()

1    253
0     76
Name: label, dtype: int64

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
log_model = LogisticRegression()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(pd.concat([df1, df2]), df3['label'], test_size = 0.2)

In [None]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,25048,25049,25050,25051,25052,25053,25054,25055,25056,25057,25058,25059,25060,25061,25062,25063,25064,25065,25066,25067,25068,25069,25070,25071,25072,25073,25074,25075,25076,25077,25078,25079,25080,25081,25082,25083,25084,25085,25086,25087
241,0.0,0.0,0.0,0.000000,7.433420,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.00000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,21.088278,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,17.617620,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,17.273216,0.0,0.000000,0.0,12.545872,0.000000,0.000000,0.00000,0.000000,0.0,0.000000,0.0
237,0.0,0.0,0.0,0.000000,11.201962,0.0,0.0,7.230029,0.0,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,6.113144,0.000000,0.00000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,3.120533,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.768721,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,5.525537,0.0,1.043730,0.0,3.583110,0.000000,0.000000,0.00000,0.000000,0.0,0.000000,0.0
111,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.00000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.658015,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,2.861928,0.0,0.0,0.000000,0.0,0.0,2.010965,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.030239,12.749418,8.176852,0.516059,0.0,0.000000,0.0,15.330009,0.000000,0.000000,0.00000,0.000000,0.0,4.188579,0.0
89,0.0,0.0,0.0,0.000000,35.478561,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.00000,0.0,6.093552,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.043684,0.00000,0.000000,0.0,0.000000,0.0
197,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,37.012638,27.519007,0.00000,0.000000,0.0,0.0,0.0,0.0,0.000000,45.878113,101.24678,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,18.125708,0.0,0.000000,28.087433,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,1.407147,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,34.377892,0.0,3.239211,0.0,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,0.0,0.0,0.0,0.000000,11.763714,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.00000,0.0,54.160988,0.0,0.0,0.0,0.0,0.0,0.0,38.743473,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,3.931798,0.0,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,1.313841,0.0
231,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.00000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,49.797634,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,3.909983,5.693439,0.0,0.000000,0.0,3.469330,0.000000,0.000000,0.00000,0.000000,0.0,0.000000,0.0
9,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,3.03844,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.00000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,26.310814,0.0,18.823215,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,42.392452,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,58.257053,0.000000,0.000000,0.00000,0.000000,0.0,0.000000,0.0
252,0.0,0.0,0.0,0.000000,40.630161,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.00000,3.624081,0.0,0.0,0.0,0.0,6.738894,0.000000,0.00000,0.0,36.454014,0.0,0.0,0.0,0.0,0.0,0.0,47.027584,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.0,2.605985,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,29.724831,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,5.448785,0.0,0.000000,1.953861,0.000000,0.00000,0.000000,0.0,0.000000,0.0


In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(0.99)

In [None]:
pca.fit_transform(pd.concat([df1, df2])).shape

(329, 309)

In [None]:
log_model.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
y_pred = log_model.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.94      0.97        16
           1       0.98      1.00      0.99        50

    accuracy                           0.98        66
   macro avg       0.99      0.97      0.98        66
weighted avg       0.99      0.98      0.98        66

