In [68]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt; 
 
# Importing sklearn libraries
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score
import hypopt
from hypopt import GridSearch
 
from keras.utils import np_utils
from keras.models import Sequential
from keras.applications import VGG16
from keras.applications import imagenet_utils
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.layers import Dense, Conv2D, MaxPooling2D
from keras.layers import Dropout, Flatten, GlobalAveragePooling2D
 
import warnings
warnings.filterwarnings('ignore')

In [70]:
train = [os.path.join("D:\cleared",img) for img in os.listdir("D:\cleared")]

In [71]:
len(train)

437

In [72]:
train[0:5]

['D:\\cleared\\204.png',
 'D:\\cleared\\205.png',
 'D:\\cleared\\206.png',
 'D:\\cleared\\207.png',
 'D:\\cleared\\208.png']

In [73]:
#train_y = [int(img.split("\\")[-1].split("_")[0]) for img in train]

In [74]:
# load the VGG16 network
print("[INFO] loading network...")
 
# chop the top dense layers, include_top=False
model = VGG16(weights="imagenet", include_top=False)
model.summary()

[INFO] loading network...
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None,

In [75]:
def create_features(dataset, pre_model):
 
    x_scratch = []

    for imagePath in dataset:
 
        image = load_img(imagePath, target_size=(224, 224))
        image = img_to_array(image)

        image = np.expand_dims(image, axis=0)
        image = imagenet_utils.preprocess_input(image)
 
        x_scratch.append(image)
 
    x = np.vstack(x_scratch)
    features = pre_model.predict(x, batch_size=32)
    features_flatten = features.reshape((features.shape[0], 7 * 7 * 512))
    return x, features, features_flatten

In [76]:
train_x, train_features, train_features_flatten = create_features(train, model)

In [77]:
print(train_x.shape, train_features.shape, train_features_flatten.shape)

(437, 224, 224, 3) (437, 7, 7, 512) (437, 25088)


In [78]:
z = pd.DataFrame(train_features_flatten)

In [79]:
z.shape

(437, 25088)

In [80]:
z.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25078,25079,25080,25081,25082,25083,25084,25085,25086,25087
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.3327,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.897519,0.0
1,0.0,0.0,0.0,0.0,0.203241,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.51048
2,0.0,0.0,0.0,0.0,0.478585,0.0,0.0,0.0,0.0,0.0,...,4.385857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.342268,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.716055,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.382866,0.0
4,0.0,0.0,0.0,0.0,0.227291,0.0,0.0,0.0,0.0,0.0,...,3.947851,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [81]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(z)
scaled_data = scaler.transform(z)

In [82]:
#scaled_data = scaled_data.flatten()
scaled_data.shape

(437, 25088)

In [83]:
from sklearn.decomposition import PCA

In [84]:
pca = PCA(n_components = 300)

In [85]:
pca.fit(scaled_data)

PCA(copy=True, iterated_power='auto', n_components=300, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [86]:
x_pca = pca.transform(scaled_data)

In [87]:
sum(pca.explained_variance_ratio_)

0.9207363059394993

In [88]:
x_pca.shape

(437, 300)

In [89]:
x_pca

array([[-1.0444730e+01,  4.6611047e+00,  9.8495493e+00, ...,
         1.1189542e+00, -4.3036225e-01, -5.3842467e-01],
       [-3.5536179e+01,  7.7843609e+00,  4.2844013e+01, ...,
        -9.7006187e-02, -4.0296584e-02, -4.5649782e-01],
       [-4.6288466e+00,  1.1535113e+00, -1.8068393e+00, ...,
        -1.5006965e+00, -2.1910863e+00,  7.4389362e+00],
       ...,
       [-7.1319375e+00, -1.7024778e+00, -3.6759336e+00, ...,
        -4.7672758e+00,  3.1064281e+00, -1.3014789e-01],
       [-7.9709377e+00, -2.5413103e+01, -1.4869870e+00, ...,
        -2.5454619e+00,  2.8093722e-01, -7.9439054e+00],
       [-1.8498642e+01, -1.3908784e+01, -2.2208200e+00, ...,
         2.4384363e+00, -5.1710737e-01,  1.0533013e+00]], dtype=float32)

In [90]:
k = pd.DataFrame(x_pca)

In [91]:
k.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
0,-10.44473,4.661105,9.849549,-0.903321,-10.143111,-13.12748,11.195497,3.299569,46.441799,2.735783,...,1.536737,-0.060611,0.323464,0.413137,-0.268665,-0.126839,-1.189388,1.118954,-0.430362,-0.538425
1,-35.536179,7.784361,42.844013,2.155625,24.841558,18.057373,-1.330271,-5.816511,-20.640882,27.364361,...,0.023582,0.638392,0.640741,0.594092,-0.511576,-0.260805,-0.646197,-0.097006,-0.040297,-0.456498
2,-4.628847,1.153511,-1.806839,-3.937223,-20.005037,-0.066961,-2.847182,-3.588678,5.46637,-3.76083,...,2.320066,3.523527,-0.553331,-6.991115,-2.121491,-10.109548,-0.605339,-1.500697,-2.191086,7.438936
3,-18.351189,-0.658571,-15.08188,3.658185,-10.691589,-1.246146,-4.018961,0.788919,8.57992,-3.446223,...,-8.369588,-1.05106,-1.864871,-2.08953,-2.654392,1.654212,7.841827,1.138777,-3.541554,1.616553
4,-8.859509,-8.935403,-3.350512,2.252593,5.826995,10.123581,18.911684,-2.141064,-15.519496,-8.299529,...,0.647525,-0.061152,0.680764,-0.177417,0.623649,-0.307552,-0.176116,0.158529,0.265895,0.310474


In [92]:
# k.to_csv(r'C:\Users\Ankit\Desktop\cleaned_features.csv',index=False)

In [93]:
k=k.round()


In [94]:
k.apply(pd.to_numeric)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
0,-10.0,5.0,10.0,-1.0,-10.0,-13.0,11.0,3.0,46.0,3.0,...,2.0,-0.0,0.0,0.0,-0.0,-0.0,-1.0,1.0,-0.0,-1.0
1,-36.0,8.0,43.0,2.0,25.0,18.0,-1.0,-6.0,-21.0,27.0,...,0.0,1.0,1.0,1.0,-1.0,-0.0,-1.0,-0.0,-0.0,-0.0
2,-5.0,1.0,-2.0,-4.0,-20.0,-0.0,-3.0,-4.0,5.0,-4.0,...,2.0,4.0,-1.0,-7.0,-2.0,-10.0,-1.0,-2.0,-2.0,7.0
3,-18.0,-1.0,-15.0,4.0,-11.0,-1.0,-4.0,1.0,9.0,-3.0,...,-8.0,-1.0,-2.0,-2.0,-3.0,2.0,8.0,1.0,-4.0,2.0
4,-9.0,-9.0,-3.0,2.0,6.0,10.0,19.0,-2.0,-16.0,-8.0,...,1.0,-0.0,1.0,-0.0,1.0,-0.0,-0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,-2.0,-3.0,7.0,-17.0,-2.0,-2.0,-0.0,-0.0,4.0,-0.0,...,10.0,19.0,-9.0,-9.0,1.0,7.0,-4.0,8.0,-1.0,-9.0
433,9.0,-5.0,5.0,-16.0,6.0,-4.0,-2.0,7.0,4.0,-1.0,...,0.0,-0.0,-0.0,-1.0,1.0,-1.0,-3.0,2.0,0.0,0.0
434,-7.0,-2.0,-4.0,-0.0,13.0,-8.0,-15.0,6.0,0.0,-3.0,...,2.0,-4.0,1.0,-4.0,-1.0,1.0,2.0,-5.0,3.0,-0.0
435,-8.0,-25.0,-1.0,8.0,0.0,5.0,18.0,-6.0,-9.0,-8.0,...,-2.0,-1.0,3.0,0.0,-6.0,0.0,6.0,-3.0,0.0,-8.0
