## Classify Extracted Features of CatsAndDogs using ResNet50 

**Objective:** Classify the extracted features of CatsAndDogs dataset using Logistic Regression.

## Load the extracted features of Training dataset

In [1]:
import h5py

In [2]:
# to verify script - sample cases
db = h5py.File('./output/dry_run/cats_and_dogs_features.hdf5', 'r')

# full dataset
#db = h5py.File('./output/cats_and_dogs_features.hdf5', 'r')

In [3]:
db

<HDF5 file "cats_and_dogs_features.hdf5" (mode r)>

In [4]:
db['features'].shape

(100, 2048)

In [5]:
db['labels'].shape

(100,)

In [6]:
## train : validation split -> 75:25
train_idx = int(db['labels'].shape[0] * 0.75)

In [7]:
train_idx

75

## Build LogisticRegression Model

In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

In [9]:
##
print('Tuning parameters...')
params = { 'C': [0.0001, 0.001, 0.01, 0.1, 1]}

classifier = GridSearchCV(LogisticRegression(), params, cv=3)

Tuning parameters...


In [10]:
classifier.fit(db['features'][:train_idx], db['labels'][:train_idx])



GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [0.0001, 0.001, 0.01, 0.1, 1]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [11]:
print('Best hyperparameter : {}'.format(classifier.best_params_) )

Best hyperparameter : {'C': 0.001}


## Evaluate the model

In [12]:
#do the prediction for the validation set
pred = classifier.predict(db['features'][train_idx:])

In [13]:
pred_proba = classifier.predict_proba(db['features'][train_idx:])

In [14]:
#pred

In [15]:
classifier.classes_

array([0, 1])

In [24]:
#pred_proba

In [17]:
#pred_proba[:][:][0]

In [18]:
from sklearn.metrics import classification_report

In [19]:
print(classification_report(db['labels'][train_idx:], pred, target_names=['cat', 'dog']))

              precision    recall  f1-score   support

         cat       1.00      0.91      0.95        11
         dog       0.93      1.00      0.97        14

   micro avg       0.96      0.96      0.96        25
   macro avg       0.97      0.95      0.96        25
weighted avg       0.96      0.96      0.96        25



## Save Model

In [20]:
import pickle

In [25]:
classifier.best_estimator_

LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [21]:
model_file = './model/cats_and_dogs.pickle'
f = open(model_file, 'wb')
f.write(pickle.dumps(classifier.best_estimator_))
f.close()

## Close Database

In [22]:
# close the hdf5 database
db.close()