In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import pickle
import h5py

In [10]:
HDF5_DB = '../datasets/animals/hdf5/features.hdf5'
MODEL = 'animals.cpickle'
JOBS = -1

In [11]:
# Open the HDF5 database for reading then determine the index of the training and testing
# split, provided that this data was already shuffled prior to writing it to disk
db = h5py.File(HDF5_DB, "r")
i = int(db["labels"].shape[0] * 0.75)

In [12]:
db.__contains__('features')
[key for key in db.keys()]

['features', 'label_names', 'labels']

In [13]:
# Define the set of parameters that we want to tune then start a grid search 
# where we evaluate our model for each value of C
print("[INFO] Tuning Hyperparameters")
params = {"C": [0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0]}

model = GridSearchCV(LogisticRegression(), params, cv=3, n_jobs=JOBS)
model.fit(db["features"][:i], db["labels"][:i] )

print("[INFO] Best Hyperparamerters: {}".format(model.best_params_))

[INFO] Tuning Hyperparameters
[INFO] Best Hyperparamerters: {'C': 10.0}


In [14]:
print("[INFO] Evaluating...")
preds = model.predict(db["features"][i:])
print(classification_report(db["labels"][i:], preds, target_names=db["label_names"]))

[INFO] Evaluating...
             precision    recall  f1-score   support

       cats       1.00      1.00      1.00       255
       dogs       0.99      1.00      0.99       235
      panda       1.00      1.00      1.00       260

avg / total       1.00      1.00      1.00       750



In [15]:
print("[INFO] Saving Model")
f = open(MODEL, "wb")
f.write(pickle.dumps(model.best_estimator_))
f.close()

[INFO] Saving Model


In [8]:
db.close()