In [1]:
import os
import argparse
import string
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [12]:
from pystruct.datasets import load_letters
from pystruct.learners import FrankWolfeSSVM
from pystruct.models import ChainCRF

In [5]:
!pip install pystruct

Collecting pystruct
  Using cached pystruct-0.2.4.tar.gz
Collecting ad3 (from pystruct)
  Downloading ad3-2.1-cp35-cp35m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (519kB)
[K    100% |████████████████████████████████| 522kB 681kB/s ta 0:00:01
[?25hBuilding wheels for collected packages: pystruct
  Running setup.py bdist_wheel for pystruct ... [?25ldone
[?25h  Stored in directory: /Users/hodong/Library/Caches/pip/wheels/8b/87/bc/6fb10e64e8fd0b722e9e9e2236a939a1e9957d792b7b77486b
Successfully built pystruct
Installing collected packages: ad3, pystruct
Successfully installed ad3-2.1 pystruct-0.2.4


In [22]:
class CRFmodel(object) : 
    def __init__(self, c_val=1.0) : 
        self.clf = FrankWolfeSSVM(model=ChainCRF(), C=c_val, max_iter=50)
        
    def load_data(self) : 
        alphabets = load_letters()
        X = np.array(alphabets['data'])
        Y = np.array(alphabets['labels'])
        folds = alphabets['folds']
        
        return X, Y, folds
    
    def train(self, X_train, Y_train) : 
        self.clf.fit(X_train, Y_train)
        
    def evaluate(self, X_test, Y_test) : 
        return self.clf.score(X_test, Y_test)
    
    def classify(self, input_data) : 
        return self.clf.predict(input_data)[0]
    
    def convert_to_letters(self, indices) : 
        #모든 알파벳에 대한 numpy 배열 선언
        alphabets = np.array(list(string.ascii_lowercase))
        output = np.take(alphabets, indices) #np.take는 주어진 index(indices)에 해당하는 원소만을 추출해서 뽑아준다.
        output = ''.join(output)
        
        return output

In [25]:
#data preprocessing and model creating
c_val = 0.5
crf = CRFmodel(c_val)
X, Y, folds = crf.load_data()
X_train, X_test = X[folds==1], X[folds!=1]
Y_train, Y_test = Y[folds==1], Y[folds!=1]

In [26]:
#training CRFmodel
print("\nTraining the CRF model...")
crf.train(X_train, Y_train)


Training the CRF model...


In [27]:
#evaluating accuracy
score = crf.evaluate(X_test, Y_test) 
print("\nAccuracy score =", str(round(score*100, 2)) + "%")


Accuracy score = 78.55%


In [28]:
indices = range(3000, len(Y_test), 200)
for index in indices : 
    print("\nOriginal =", crf.convert_to_letters(Y_test[index]))
    predicted = crf.classify([X_test[index]])
    print("Predicted =", crf.convert_to_letters(predicted))


Original = rojections
Predicted = rojectiong

Original = uff
Predicted = ufr

Original = kiing
Predicted = kiing

Original = ecompress
Predicted = ecomeregg

Original = uzz
Predicted = vax

Original = poiling
Predicted = aniting

Original = uizzically
Predicted = uzzzically

Original = omparatively
Predicted = omparatively

Original = abulously
Predicted = abuloualy

Original = ormalization
Predicted = ormalisation

Original = ake
Predicted = aka

Original = afeteria
Predicted = ateteria

Original = obble
Predicted = obble

Original = hadow
Predicted = habow

Original = ndustrialized
Predicted = ndusqrinlyled

Original = ympathetically
Predicted = ympnshetically
