In [31]:
import numpy as np
from pystruct.datasets import load_letters
from pystruct.models import ChainCRF
from pystruct.learners import FrankWolfeSSVM

In [32]:
def build_arg_parser():
    parser = argparse.ArgumentParser(description='Train a Conditional Field classifier')
    parser.add_argument("--C", dest="c_val", required=False, type=float,
                        default=1.0, help='C value to be used for traingin' )
    return parser

class CRFModel(object):
    def __init__(self, c_val=1.0):
        self.clf = FrankWolfeSSVM(model=ChainCRF(), C=c_val, max_iter=50)
    def load_data(self):
        alphabets = load_letters()
        X=np.array(alphabets['data'])
        y= np.array(alphabets['labels'])
        folds = alphabets['folds']
        return X, y, folds
    
    def train(self, X_train, y_train):
        self.clf.fit(X_train, y_train)
        
    def evaluate(self, X_test, y_test):
        return self.clf.score(X_test, y_test)
    
    def classify(self, input_data):
        return self.clf.predict(input_data)[0]

In [33]:
letters = load_letters()

In [34]:
print('Data', letters['data'][:3])
print('Labels', letters['labels'][:3])
print('Folds', letters['folds'][:3])
print(letters.keys())
print('Len', len(letters['data']))

Data [array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 1, ..., 1, 0, 0]], dtype=uint8), array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]], dtype=uint8), array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 1, 1, 1]], dtype=uint8)]
Labels [array([14, 12, 12,  0, 13,  3,  8, 13,  6]), array([14, 12, 12,  0, 13,  3,  8, 13,  6]), array([14, 12, 12,  0, 13,  3,  8, 13,  6])]
Folds [0 7 8]
dict_keys(['folds', 'labels', 'data', 'word', 'images'])
Len 6877


In [38]:
import string
def convert_to_letters(indices):
    alphabets = np.array(list(string.ascii_lowercase))
    output=np.take(alphabets, indices)
    output= ''.join(output)
    return output

In [41]:
c_val=1.0
crf = CRFModel(c_val)

X, y, folds = crf.load_data()
X_train, X_test = X[folds == 1], X[folds != 1]
y_train, y_test = y[folds == 1], y[folds != 1]

crf.train(X_train, y_train)
score = crf.evaluate(X_test, y_test)
print('\nAccuracy score=', str(round(score*100,2)) + '%')


Accuracy score= 78.12%


In [42]:
indices = range(3000, len(y_test), 200)
for index in indices:
    print(y_test[index])
    print("Original =", convert_to_letters(y_test[index]))
    predicted = crf.classify([X_test[index]])
    print("Predicted =", convert_to_letters(predicted))

[17 14  9  4  2 19  8 14 13 18]
Original = rojections
Predicted = rojectiong
[20  5  5]
Original = uff
Predicted = ufr
[10  8  8 13  6]
Original = kiing
Predicted = kiing
[ 4  2 14 12 15 17  4 18 18]
Original = ecompress
Predicted = ecomertig
[20 25 25]
Original = uzz
Predicted = vex
[15 14  8 11  8 13  6]
Original = poiling
Predicted = aniting
[20  8 25 25  8  2  0 11 11 24]
Original = uizzically
Predicted = uzzzically
[14 12 15  0 17  0 19  8 21  4 11 24]
Original = omparatively
Predicted = omparatively
[ 0  1 20 11 14 20 18 11 24]
Original = abulously
Predicted = abuloualy
[14 17 12  0 11  8 25  0 19  8 14 13]
Original = ormalization
Predicted = ormalisation
[ 0 10  4]
Original = ake
Predicted = aka
[ 0  5  4 19  4 17  8  0]
Original = afeteria
Predicted = ateteria
[14  1  1 11  4]
Original = obble
Predicted = obble
[ 7  0  3 14 22]
Original = hadow
Predicted = habow
[13  3 20 18 19 17  8  0 11  8 25  4  3]
Original = ndustrialized
Predicted = ndusqrialyled
[24 12 15  0 19  7  4 19 