In [12]:
from IPython.display import HTML, display
import tabulate
import ast

Run our algorithm locally
---

In [13]:
import classifier

In [14]:
local_results = classifier.results()

Run our algorithm in docker container after building
---

In [15]:
%%bash
docker build -t edwardchalstrey/classifier:latest .

Sending build context to Docker daemon  14.85kB
Step 1/8 : FROM python:3
 ---> ac069ebfe1e1
Step 2/8 : RUN apt-get update
 ---> Using cache
 ---> 5a84d23aa7b5
Step 3/8 : RUN pip3 install numpy
 ---> Using cache
 ---> 4383ac463a3b
Step 4/8 : RUN pip3 install scipy
 ---> Using cache
 ---> 6fa2c9da864b
Step 5/8 : RUN pip3 install scikit-learn
 ---> Using cache
 ---> 0b888dbaed11
Step 6/8 : COPY classifier.py /classifier.py
 ---> 70514e4b536f
Step 7/8 : COPY display_classifier_results.py /display_classifier_results.py
 ---> 456011aa7d49
Step 8/8 : CMD python3 display_classifier_results.py
 ---> Running in 16a32b10d092
Removing intermediate container 16a32b10d092
 ---> 187dcdf1b701
Successfully built 187dcdf1b701
Successfully tagged edwardchalstrey/classifier:latest


In [16]:
%%bash --out docker_results
docker run edwardchalstrey/classifier:latest

In [17]:
docker_results = ast.literal_eval(docker_results)

How do they compare?
---

In [18]:
headers = ["Version"]
c_results = ["Basic"]
d_results = ["Container"]
for k, v in local_results.items():
    headers.append(k)
    c_results.append(v)
for k, v in docker_results.items():
    d_results.append(v)
display(HTML(tabulate.tabulate([headers, c_results, d_results], tablefmt='html')))

0,1,2,3
Version,Training time (s),Prediction time (s),Performance (micro avg f1 score)
Basic,0.10903620719909668,0.036577701568603516,0.9566184649610678
Container,0.13712811470031738,0.05720043182373047,0.9566184649610678


Same classification algorithm, different dataset
---

In [82]:
from sklearn import datasets, svm, metrics
import time
import numpy as np

In [83]:
#digits = datasets.load_digits()
iris = datasets.load_iris()

In [84]:
n_samples = len(iris.data)

In [85]:
targets_to_fit_from = iris.target[(n_samples // 6)*1:(n_samples // 6)*2]
targets_to_fit_from = np.append(targets_to_fit_from, iris.target[(n_samples // 6)*3:(n_samples // 6)*4])
targets_to_fit_from = np.append(targets_to_fit_from, iris.target[(n_samples // 6)*5:(n_samples // 6)*6])

data_to_fit_from = iris.data[(n_samples // 6)*1:(n_samples // 6)*2]
data_to_fit_from = np.concatenate((data_to_fit_from, iris.data[(n_samples // 6)*3:(n_samples // 6)*4]))
data_to_fit_from = np.concatenate((data_to_fit_from, iris.data[(n_samples // 6)*5:(n_samples // 6)*6]))

data_to_predict_from = iris.data[:n_samples // 6]
data_to_predict_from = np.concatenate((data_to_predict_from, iris.data[(n_samples // 6)*2:(n_samples // 6)*3]))
data_to_predict_from = np.concatenate((data_to_predict_from, iris.data[(n_samples // 6)*4:(n_samples // 6)*5]))

expected_targets = iris.target[:n_samples // 6]
expected_targets = np.concatenate((expected_targets, iris.target[(n_samples // 6)*2:(n_samples // 6)*3]))
expected_targets = np.concatenate((expected_targets, iris.target[(n_samples // 6)*4:(n_samples // 6)*5]))
expected_targets

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2])

In [86]:

#data = iris.data

expected = expected_targets

classifier = svm.SVC(gamma='scale')

start = time.time()
classifier.fit(data_to_fit_from, targets_to_fit_from)
end = time.time()
training_time = end - start

start = time.time()
predicted = classifier.predict(data_to_predict_from)
end = time.time()
classifier_time = end - start

report = metrics.classification_report(expected, predicted, output_dict=True)

performance = report['micro avg']['f1-score']
print({"Training time (s)": training_time, "Prediction time (s)": classifier_time,
    "Performance (micro avg f1 score)": report['micro avg']['f1-score']})
print(metrics.classification_report(expected, predicted))

{'Training time (s)': 0.0007910728454589844, 'Prediction time (s)': 0.0004901885986328125, 'Performance (micro avg f1 score)': 0.9733333333333334}
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        25
           1       0.96      0.96      0.96        25
           2       0.96      0.96      0.96        25

   micro avg       0.97      0.97      0.97        75
   macro avg       0.97      0.97      0.97        75
weighted avg       0.97      0.97      0.97        75

