In [34]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import MaxPooling2D, GlobalAveragePooling2D
from PIL import Image
%matplotlib inline

In [35]:
model = VGG16(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
)

In [36]:
def load_img(path_to_img, size=224):
  img = image.load_img(path_to_img, target_size=(size,size))
  data = image.img_to_array(img)
  return data

def show_img(img):
  plt.imshow(img.astype('uint8'))

def predict(model, img):
  return model.predict(img[tf.newaxis, :])

In [11]:
# img = load_img('./sweater.jpg')
# show_img(img)
# print(predict(model, img))

In [12]:
# img = load_img('./hen-vs-rooster.jpg')
# show_img(img)
# print(predict(model, img))

In [16]:
# model.summary()

In [37]:
layers = [
    'block1_pool', 
    'block2_pool',
    'block3_pool',
    'block4_pool',
    'block5_pool'
    ]
# models = dict()
# for n in names:
#     models[n] = Model(inputs=model.input, outputs=model.get_layer(n).output)

In [18]:
# img = load_img('./hen-vs-rooster.jpg')

# for k in models:
#     preds = predict(models[k], img)
#     print(k)
#     print(preds.shape)
#     avg_pool = np.mean(preds, axis=(1,2))
#     max_pool = np.max(preds, axis=(1,2))
#     print(k, 'avg', avg_pool.shape)
#     print(k, 'max', max_pool.shape)

In [19]:
# import glob

# files = glob.glob('./data/*.jpg')

In [20]:
# len(files)

In [38]:
import pandas as pd

In [22]:
df = pd.DataFrame()

In [24]:
# df['files'] = files

In [21]:
df.head()

Unnamed: 0,files
0,./data/dog.8011.jpg
1,./data/cat.5077.jpg
2,./data/dog.7322.jpg
3,./data/cat.2718.jpg
4,./data/cat.10151.jpg


In [22]:
df['y'] = df['files'].str.contains('dog', regex=False)

In [23]:
df.head()

Unnamed: 0,files,y
0,./data/dog.8011.jpg,True
1,./data/cat.5077.jpg,False
2,./data/dog.7322.jpg,True
3,./data/cat.2718.jpg,False
4,./data/cat.10151.jpg,False


In [24]:
df.to_csv('data.csv', index=False)

In [42]:
import pandas as pd
from sklearn.linear_model import LogisticRegression

In [43]:
LAYER = 'block5_pool'
POOL = 'avg'

In [44]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,files,y
0,./data/dog.8011.jpg,True
1,./data/cat.5077.jpg,False
2,./data/dog.7322.jpg,True
3,./data/cat.2718.jpg,False
4,./data/cat.10151.jpg,False


In [95]:
def process_data(feature_extractor, path):
    img = load_img(path)
    preds = feature_extractor.predict(img[tf.newaxis, :])
    
    avgs = [np.mean(p, axis=(1,2))[0] for p in preds]
    avgs_keys = [l + '_avg' for l in layers]
    
    maxs = [np.max(p, axis=(1,2))[0] for p in preds]
    maxs_keys = [l + '_max' for l in layers]
    return dict(zip(avgs_keys + maxs_keys, avgs + maxs))    

In [96]:
features = Model(inputs=model.input, outputs=[model.get_layer(l).output for l in layers])

In [97]:
a = process_data(features, './data/dog.8011.jpg')

In [98]:
for k in a:
    print(k, a[k].shape)

block1_pool_avg (64,)
block2_pool_avg (128,)
block3_pool_avg (256,)
block4_pool_avg (512,)
block5_pool_avg (512,)
block1_pool_max (64,)
block2_pool_max (128,)
block3_pool_max (256,)
block4_pool_max (512,)
block5_pool_max (512,)


In [99]:
from sklearn.model_selection import train_test_split

In [100]:
X_train, X_test, y_train, y_test = train_test_split(df['files'], df['y'], train_size=1000)

In [101]:
X_train.shape
y_train.shape

(1000,)

In [102]:
y_train.value_counts()

False    505
True     495
Name: y, dtype: int64

In [103]:
feature_vectors = []
for x in X_train:
    values = process_data(features, x)
    feature_vectors.append(values)
    

In [104]:
len(feature_vectors)

1000

In [105]:
vectors_df = pd.DataFrame(feature_vectors)

In [106]:
vectors_df.head()

Unnamed: 0,block1_pool_avg,block2_pool_avg,block3_pool_avg,block4_pool_avg,block5_pool_avg,block1_pool_max,block2_pool_max,block3_pool_max,block4_pool_max,block5_pool_max
0,"[232.43803, 111.86441, 114.63116, 69.70049, 10...","[550.8669, 302.81857, 240.82935, 572.07416, 16...","[163.54953, 826.91364, 318.92474, 218.27974, 1...","[7.237293, 33.711315, 91.29004, 46.802017, 70....","[0.054669693, 0.34990084, 5.220468, 0.0, 3.832...","[1532.8881, 623.8135, 383.6462, 366.66348, 183...","[3857.636, 4501.8994, 4419.2554, 5512.4546, 20...","[4617.9614, 4674.4004, 6450.3413, 4565.4775, 2...","[256.21313, 828.3305, 665.2178, 1577.9825, 103...","[2.678815, 8.587236, 36.505814, 0.0, 49.224876..."
1,"[294.631, 134.24577, 2.829853, 123.9705, 1309....","[200.2058, 462.6417, 335.24124, 819.8253, 149....","[317.4424, 603.2505, 481.95248, 458.0383, 239....","[7.0778255, 33.94589, 94.65074, 24.571453, 20....","[4.868306, 12.291414, 15.248455, 0.0, 12.21828...","[2098.925, 975.886, 336.7371, 551.4925, 2027.6...","[2037.4856, 7046.905, 6817.8193, 9259.778, 298...","[3463.657, 4775.4287, 7660.5615, 6787.7236, 31...","[500.42957, 679.61835, 845.0152, 639.233, 592....","[65.51491, 160.98589, 82.702286, 0.0, 119.8859..."
2,"[209.50548, 126.316536, 221.24706, 76.084045, ...","[214.75456, 284.5005, 210.76225, 595.37885, 75...","[169.68027, 529.8136, 208.36688, 92.062935, 45...","[13.054252, 31.958807, 66.686386, 18.833864, 7...","[1.850535, 4.7587934, 6.931901, 0.0, 4.100008,...","[1749.6357, 542.82874, 809.092, 473.13736, 189...","[1764.8684, 4686.0415, 4429.3525, 4371.765, 13...","[3662.009, 2686.1162, 3969.7754, 2326.9268, 23...","[620.13275, 1119.0118, 659.8493, 629.28015, 24...","[47.7252, 100.31574, 118.86117, 0.0, 53.181255..."
3,"[275.726, 120.23607, 57.588947, 123.55803, 971...","[232.26418, 404.92398, 322.72388, 669.58966, 1...","[368.4057, 935.235, 358.93607, 652.0451, 141.0...","[5.744563, 123.19941, 71.736206, 14.512931, 11...","[4.83798, 0.0, 2.330783, 1.620902, 5.6983395, ...","[2022.8408, 910.29285, 423.12793, 556.22217, 1...","[1889.0115, 5470.5747, 5224.774, 6354.448, 227...","[2642.9392, 6637.952, 11831.7, 7009.264, 3025....","[390.8073, 1165.059, 839.5464, 321.54626, 621....","[31.06264, 0.0, 23.12342, 34.777344, 43.826744..."
4,"[200.40477, 132.74019, 122.81338, 497.40533, 1...","[207.37444, 384.6086, 311.6742, 740.42004, 316...","[205.61017, 487.30756, 404.11234, 396.60376, 1...","[11.496488, 20.044012, 36.549664, 27.062574, 8...","[1.4933167, 1.3658535, 6.551396, 0.0, 6.182544...","[1645.4377, 541.7472, 1451.4487, 1108.017, 201...","[2346.3557, 5544.132, 3423.0166, 8201.541, 228...","[2967.54, 2882.5479, 6661.954, 4149.636, 2376....","[541.8059, 338.3957, 576.7407, 899.3213, 735.6...","[34.188293, 29.793888, 46.546776, 0.0, 59.5633..."


In [136]:
X_block5_pool_avg = np.vstack(vectors_df['block5_pool_avg'].to_list())
log_reg = LogisticRegression()
log_reg.fit(X_block5_pool_avg, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [138]:
y_pred = log_reg.predict(X_block5_pool_avg)

In [142]:
from sklearn.metrics import accuracy_score, classification_report

In [143]:
accuracy_score(y_train, y_pred)

1.0

In [150]:
r = classification_report(y_train, y_pred)
print(r)

precision    recall  f1-score   support

       False       1.00      1.00      1.00       505
        True       1.00      1.00      1.00       495

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000

