In [21]:
import warnings
warnings.filterwarnings('ignore') # uncomment this line to suppress warnings

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

model_lbls = [
             'dt', 
             'nb', 
             'lp', 
             'svc',
             'knn'
            ]

# Set the parameters by cross-validation
tuned_param_dt = [{'max_depth': range(1,20)}]
tuned_param_nb = [{'var_smoothing': [10, 1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-07, 1e-8, 1e-9, 1e-10]}]
tuned_param_lp = [{'early_stopping': [True]}]
tuned_param_svc = [{'kernel': ['rbf'], 
                    'gamma': [1e-3, 1e-4],
                    'C': [1, 10, 100, 1000],
                    },
                    {'kernel': ['linear'],
                     'C': [1, 10, 100, 1000],                     
                    },
                   ]
tuned_param_knn =[{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}]

models = {
    'dt': {'name': 'Decision Tree       ',
           'estimator': DecisionTreeClassifier(), 
           'param': tuned_param_dt,
          },
    'nb': {'name': 'Gaussian Naive Bayes',
           'estimator': GaussianNB(),
           'param': tuned_param_nb
          },
#     'lp': {'name': 'Linear Perceptron   ',
#            'estimator': Perceptron(),
#            'param': tuned_param_lp,
#           },
    'svc':{'name': 'Support Vector      ',
           'estimator': SVC(), 
           'param': tuned_param_svc
          },
    'knn':{'name': 'K Nearest Neighbor ',
           'estimator': KNeighborsClassifier(),
           'param': tuned_param_knn
        
    }
}

scores = ['precision', 'recall']

In [22]:
def print_results(model):
    means = model.cv_results_['mean_test_score']
    stds = model.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, model.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print("Detailed classification report:\n")
    print("The model is trained on the full development set.\n")
    print("The scores are computed on the full evaluation set.\n")
    y_true, y_pred = y_test, model.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

In [23]:
import common

import cv2
import numpy as np
from matplotlib import pyplot as plt

import random
import getopt
import sys

import extract
import generator
import knn
import svm
import ocr
import sift

def __unpackage(train_set):
    data, labels = list(), list()
    for (l, d) in train_set:
        data.append(d)
        labels.append(l)
    return np.array(data), np.array(labels)

train = 10
dump = "data_set"
load = None
verbose = True

t = ocr.OCR.get_train_set(train, verbose=verbose)
data, labels = __unpackage(t)

ts = 0.3 #0.3
random_state = 0 #42

x_image = []
y_labels = []

for d, l in zip(data, labels):
    x_image.append(d.flatten())
    y_labels.append(l)
#     plt.imshow(d, cmap='gray', vmin=0, vmax=255)
#     plt.show()
#     print(l)

x_image = np.array(x_image)
y_labels = np.array(y_labels)

#Split train and test data
X_train, X_test, y_train, y_test = train_test_split(
    x_image, y_labels, test_size=ts, random_state=random_state)
print("Training on %d examples" % len(X_train))
# print(len(y_train))

d = {}
for score in scores:
    print('========================================')
    print('# Tuning hyper-parameters for', score)
    print('\n----------------------------------------')
    for i in models:
        print('Trying model', models[i]['name'])
        
        model = GridSearchCV(models[i]['estimator'], models[i]['param'], cv=5,
                       scoring='%s_macro' % score) #cv=5
        model.fit(X_train, y_train)
        
        print_results(model)
        d[models[i]['name']] = model.best_score_
    print("Best scores for", score, "are:\n", d)

Appended generated glyph: '1'
Appended generated glyph: '2'
Appended generated glyph: '3'
Appended generated glyph: '4'
Appended generated glyph: '5'
Appended generated glyph: '6'
Appended generated glyph: '7'
Appended generated glyph: '8'
Appended generated glyph: '9'
Appended generated glyph: '0'
1/10
Appended generated glyph: '1'
Appended generated glyph: '2'
Appended generated glyph: '3'
Appended generated glyph: '4'
Appended generated glyph: '5'
Appended generated glyph: '6'
Appended generated glyph: '7'
Appended generated glyph: '8'
Appended generated glyph: '9'
Appended generated glyph: '0'
2/10
Appended generated glyph: '1'
Appended generated glyph: '2'
Appended generated glyph: '3'
Appended generated glyph: '4'
Appended generated glyph: '5'
Appended generated glyph: '6'
Appended generated glyph: '7'
Appended generated glyph: '8'
Appended generated glyph: '9'
Appended generated glyph: '0'
3/10
Appended generated glyph: '1'
Appended generated glyph: '2'
Appended generated glyph:

0.100 (+/-0.000) for {'max_depth': 1}
0.140 (+/-0.098) for {'max_depth': 2}
0.170 (+/-0.120) for {'max_depth': 3}
0.200 (+/-0.141) for {'max_depth': 4}
0.200 (+/-0.141) for {'max_depth': 5}
0.210 (+/-0.223) for {'max_depth': 6}
0.160 (+/-0.194) for {'max_depth': 7}
0.200 (+/-0.210) for {'max_depth': 8}
0.200 (+/-0.141) for {'max_depth': 9}
0.170 (+/-0.233) for {'max_depth': 10}
0.190 (+/-0.194) for {'max_depth': 11}
0.190 (+/-0.075) for {'max_depth': 12}
0.220 (+/-0.150) for {'max_depth': 13}
0.190 (+/-0.147) for {'max_depth': 14}
0.260 (+/-0.223) for {'max_depth': 15}
0.260 (+/-0.117) for {'max_depth': 16}
0.210 (+/-0.172) for {'max_depth': 17}
0.210 (+/-0.172) for {'max_depth': 18}
0.230 (+/-0.250) for {'max_depth': 19}
Detailed classification report:

The model is trained on the full development set.

The scores are computed on the full evaluation set.

              precision    recall  f1-score   support

           0       0.25      0.50      0.33         2
           1       1.0