In [1]:
import numpy as np
import matplotlib.pyplot as plt
from itertools import product
from sklearn.decomposition import PCA
from sklearn.datasets import fetch_mldata
%matplotlib inline
from sklearn.utils import shuffle
from tga import TGA

# #use all digits

SPLIT = 16000
END = 24000

mnist = fetch_mldata("MNIST original")
x_dat, y_dat = mnist.data[:] / 255., mnist.target[:]
x_dat, y_dat = shuffle(x_dat, y_dat)


In [2]:
def showDigitImage(array):
    t = (array).reshape(28,28)
    plt.imshow(t)
    plt.show()

In [3]:
def filt_num(xs,ys,ns):
    xs = xs.copy()
    ys = ys.copy()
    all_truth = [False]*len(ys)
    for i in ns:
        cur_truth = ys==i
        all_truth = [a or b for a,b in zip(all_truth,cur_truth)]
    return xs[all_truth],ys[all_truth]

new_xs,new_ys = filt_num(x_dat,y_dat,[1,5])


In [4]:
pca = PCA(n_components=20)
pca.fit(new_xs)
pca_dat = pca.transform(new_xs)

tga = TGA(n_components=20)
tga.fit(new_xs)
tga_dat = tga.transform(new_xs)


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,

# {1,5} vs {1,5}

In [5]:
# Original feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation, svm
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),new_xs, new_ys, cv=10)
print("Logistic Regression",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,new_xs, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))





Logistic Regression 0.995912614517
             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00      7877
        5.0       1.00      1.00      1.00      6313

avg / total       1.00      1.00      1.00     14190

SVM 0.995137420719
             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00      7877
        5.0       0.99      0.99      0.99      6313

avg / total       1.00      1.00      1.00     14190



In [6]:
# PCA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),pca_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,pca_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))


0.989992952784
             precision    recall  f1-score   support

        1.0       0.99      0.99      0.99      7877
        5.0       0.99      0.99      0.99      6313

avg / total       0.99      0.99      0.99     14190

SVM 0.990486257928
             precision    recall  f1-score   support

        1.0       0.99      0.99      0.99      7877
        5.0       0.99      0.99      0.99      6313

avg / total       0.99      0.99      0.99     14190



In [7]:
# TGA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation, svm
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),tga_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,tga_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))


0.988653981677
             precision    recall  f1-score   support

        1.0       0.99      0.99      0.99      7877
        5.0       0.99      0.99      0.99      6313

avg / total       0.99      0.99      0.99     14190

SVM 0.988724453841
             precision    recall  f1-score   support

        1.0       0.99      0.99      0.99      7877
        5.0       0.99      0.99      0.99      6313

avg / total       0.99      0.99      0.99     14190



# {1,5} vs {1,5}: White noise

In [8]:
def add_noise_to_array(pure):
    pure = pure.copy()
    noise = np.random.normal(0, 0.3, pure.shape)
    signal = pure + noise
    return signal

def add_noise_to_train_set(x_train):
    rs = []
    for i in x_train:
        rs += [add_noise_to_array(i)]
    return rs

white_xs = add_noise_to_train_set(new_xs)


In [10]:
pca = PCA(n_components=20)
pca.fit(white_xs)
pca_dat = pca.transform(white_xs)

tga = TGA(n_components=20)
tga.fit(white_xs)
tga_dat = tga.transform(white_xs)


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,

In [11]:
# Original feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),white_xs, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,white_xs, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.986821705426
             precision    recall  f1-score   support

        1.0       0.99      0.99      0.99      7877
        5.0       0.99      0.98      0.99      6313

avg / total       0.99      0.99      0.99     14190

SVM 0.983932346723
             precision    recall  f1-score   support

        1.0       0.98      0.99      0.99      7877
        5.0       0.98      0.98      0.98      6313

avg / total       0.98      0.98      0.98     14190



In [12]:
# PCA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),pca_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,pca_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.987174066244
             precision    recall  f1-score   support

        1.0       0.99      0.99      0.99      7877
        5.0       0.99      0.99      0.99      6313

avg / total       0.99      0.99      0.99     14190

SVM 0.987455954898
             precision    recall  f1-score   support

        1.0       0.99      0.99      0.99      7877
        5.0       0.98      0.99      0.99      6313

avg / total       0.99      0.99      0.99     14190



In [13]:
# TGA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation, svm
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),tga_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,tga_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))


0.987244538407
             precision    recall  f1-score   support

        1.0       0.99      0.99      0.99      7877
        5.0       0.99      0.98      0.99      6313

avg / total       0.99      0.99      0.99     14190

SVM 0.986821705426
             precision    recall  f1-score   support

        1.0       0.99      0.99      0.99      7877
        5.0       0.98      0.99      0.99      6313

avg / total       0.99      0.99      0.99     14190



In [14]:
def rotate180(features):
    return features.copy()[::-1]
  
def vertical_flip(features):
    return np.flip(features.copy().reshape(28,28),1).ravel()
        
def add_true_noise_to_train_set(x_train,p):
    rs = []
    thresholds = np.random.rand(len(x_train)) < p
    for i in range(len(x_train)):
        if(thresholds[i]):
#             print(i)
            if(np.random.rand()<0.5):
                
                rs += [vertical_flip(x_train[i].copy())]
            else:
                rs += [rotate180(x_train[i].copy())]
        else:
            rs += [x_train[i].copy()]
    return rs


# {1,5} vs {1,5}: Flip

In [15]:
true_noise_xs = add_true_noise_to_train_set(new_xs,0.2)

pca = PCA(n_components=20)
pca.fit(true_noise_xs)
pca_dat = pca.transform(true_noise_xs)

tga = TGA(n_components=20)
tga.fit(true_noise_xs)
tga_dat = tga.transform(true_noise_xs)


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,

In [16]:
# Original feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),true_noise_xs, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,true_noise_xs, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.985623678647
             precision    recall  f1-score   support

        1.0       0.98      0.99      0.99      7877
        5.0       0.99      0.98      0.98      6313

avg / total       0.99      0.99      0.99     14190

SVM 0.984918957012
             precision    recall  f1-score   support

        1.0       0.98      0.99      0.99      7877
        5.0       0.99      0.98      0.98      6313

avg / total       0.98      0.98      0.98     14190



In [17]:
# PCA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),pca_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,pca_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.971317829457
             precision    recall  f1-score   support

        1.0       0.97      0.98      0.97      7877
        5.0       0.97      0.96      0.97      6313

avg / total       0.97      0.97      0.97     14190

SVM 0.972163495419
             precision    recall  f1-score   support

        1.0       0.97      0.98      0.97      7877
        5.0       0.97      0.97      0.97      6313

avg / total       0.97      0.97      0.97     14190



In [18]:
# TGA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation, svm
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),tga_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,tga_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.968921775899
             precision    recall  f1-score   support

        1.0       0.97      0.98      0.97      7877
        5.0       0.97      0.96      0.96      6313

avg / total       0.97      0.97      0.97     14190

SVM 0.970119802678
             precision    recall  f1-score   support

        1.0       0.97      0.98      0.97      7877
        5.0       0.97      0.96      0.97      6313

avg / total       0.97      0.97      0.97     14190



# Repeated with {6,9}

In [37]:
new_xs,new_ys = filt_num(x_dat,y_dat,[6,9])
pca = PCA(n_components=20)
pca.fit(new_xs)
pca_dat = pca.transform(new_xs)

tga = TGA(n_components=20)
tga.fit(new_xs)
tga_dat = tga.transform(new_xs)



In [20]:
# Original feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),new_xs, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,new_xs, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.998265143848
             precision    recall  f1-score   support

        6.0       1.00      1.00      1.00      6876
        9.0       1.00      1.00      1.00      6958

avg / total       1.00      1.00      1.00     13834

SVM 0.998337429521
             precision    recall  f1-score   support

        6.0       1.00      1.00      1.00      6876
        9.0       1.00      1.00      1.00      6958

avg / total       1.00      1.00      1.00     13834



In [21]:
# PCA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),pca_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,pca_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.995735145294
             precision    recall  f1-score   support

        6.0       1.00      1.00      1.00      6876
        9.0       1.00      1.00      1.00      6958

avg / total       1.00      1.00      1.00     13834

SVM 0.996024287986
             precision    recall  f1-score   support

        6.0       1.00      1.00      1.00      6876
        9.0       1.00      1.00      1.00      6958

avg / total       1.00      1.00      1.00     13834



In [22]:
# TGA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation, svm
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),tga_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,tga_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))


0.9946508602
             precision    recall  f1-score   support

        6.0       1.00      0.99      0.99      6876
        9.0       0.99      1.00      0.99      6958

avg / total       0.99      0.99      0.99     13834

SVM 0.9946508602
             precision    recall  f1-score   support

        6.0       1.00      0.99      0.99      6876
        9.0       0.99      1.00      0.99      6958

avg / total       0.99      0.99      0.99     13834



In [23]:
white_xs = add_noise_to_train_set(new_xs)
pca = PCA(n_components=20)
pca.fit(white_xs)
pca_dat = pca.transform(white_xs)

tga = TGA(n_components=20)
tga.fit(white_xs)
tga_dat = tga.transform(white_xs)



0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,

In [24]:
# Original feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),white_xs, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,white_xs, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.995446002602
             precision    recall  f1-score   support

        6.0       1.00      1.00      1.00      6876
        9.0       1.00      1.00      1.00      6958

avg / total       1.00      1.00      1.00     13834

SVM 0.993277432413
             precision    recall  f1-score   support

        6.0       0.99      0.99      0.99      6876
        9.0       0.99      0.99      0.99      6958

avg / total       0.99      0.99      0.99     13834



In [25]:
# PCA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),pca_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,pca_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.994289431835
             precision    recall  f1-score   support

        6.0       0.99      0.99      0.99      6876
        9.0       0.99      0.99      0.99      6958

avg / total       0.99      0.99      0.99     13834

SVM 0.993711146451
             precision    recall  f1-score   support

        6.0       0.99      0.99      0.99      6876
        9.0       0.99      0.99      0.99      6958

avg / total       0.99      0.99      0.99     13834



In [26]:
# PCA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),tga_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,tga_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.992554575683
             precision    recall  f1-score   support

        6.0       0.99      0.99      0.99      6876
        9.0       0.99      0.99      0.99      6958

avg / total       0.99      0.99      0.99     13834

SVM 0.992554575683
             precision    recall  f1-score   support

        6.0       0.99      0.99      0.99      6876
        9.0       0.99      0.99      0.99      6958

avg / total       0.99      0.99      0.99     13834



In [38]:
true_noise_xs = add_true_noise_to_train_set(new_xs,0.1)

pca = PCA(n_components=20)
pca.fit(true_noise_xs)
pca_dat = pca.transform(true_noise_xs)

tga = TGA(n_components=20)
tga.fit(true_noise_xs)
tga_dat = tga.transform(true_noise_xs)


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,

In [39]:
# Original feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),true_noise_xs, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,true_noise_xs, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.962556021397
             precision    recall  f1-score   support

        6.0       0.96      0.96      0.96      6876
        9.0       0.96      0.96      0.96      6958

avg / total       0.96      0.96      0.96     13834

SVM 0.964941448605
             precision    recall  f1-score   support

        6.0       0.96      0.96      0.96      6876
        9.0       0.97      0.97      0.97      6958

avg / total       0.96      0.96      0.96     13834



In [40]:
# PCA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),pca_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,pca_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.9454243169
             precision    recall  f1-score   support

        6.0       0.95      0.94      0.94      6876
        9.0       0.94      0.95      0.95      6958

avg / total       0.95      0.95      0.95     13834

SVM 0.949255457568
             precision    recall  f1-score   support

        6.0       0.95      0.95      0.95      6876
        9.0       0.95      0.95      0.95      6958

avg / total       0.95      0.95      0.95     13834



In [41]:
# TGA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation, svm
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),tga_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,tga_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.942677461327
             precision    recall  f1-score   support

        6.0       0.95      0.94      0.94      6876
        9.0       0.94      0.95      0.94      6958

avg / total       0.94      0.94      0.94     13834

SVM 0.947159173052
             precision    recall  f1-score   support

        6.0       0.95      0.94      0.95      6876
        9.0       0.94      0.95      0.95      6958

avg / total       0.95      0.95      0.95     13834



# -----------------------------------------------------------------------------

In [42]:
true_noise_xs = add_true_noise_to_train_set(new_xs,0.4)

pca = PCA(n_components=20)
pca.fit(true_noise_xs)
pca_dat = pca.transform(true_noise_xs)

tga = TGA(n_components=20)
tga.fit(true_noise_xs)
tga_dat = tga.transform(true_noise_xs)


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,

In [43]:
# Original feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),true_noise_xs, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,true_noise_xs, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.930027468556
             precision    recall  f1-score   support

        6.0       0.93      0.93      0.93      6876
        9.0       0.93      0.93      0.93      6958

avg / total       0.93      0.93      0.93     13834

SVM 0.931473182015
             precision    recall  f1-score   support

        6.0       0.93      0.93      0.93      6876
        9.0       0.94      0.93      0.93      6958

avg / total       0.93      0.93      0.93     13834



In [44]:
# PCA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),pca_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,pca_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.872126644499
             precision    recall  f1-score   support

        6.0       0.88      0.87      0.87      6876
        9.0       0.87      0.88      0.87      6958

avg / total       0.87      0.87      0.87     13834

SVM 0.874150643342
             precision    recall  f1-score   support

        6.0       0.88      0.86      0.87      6876
        9.0       0.87      0.88      0.88      6958

avg / total       0.87      0.87      0.87     13834



In [45]:
# TGA feature

from sklearn.linear_model import LogisticRegression
from sklearn import metrics, cross_validation, svm
from sklearn import datasets
predicted = cross_validation.cross_val_predict(LogisticRegression(),tga_dat, new_ys, cv=10)
print(metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

clf = svm.SVC(kernel='linear', C=1)
predicted = cross_validation.cross_val_predict(clf,tga_dat, new_ys, cv=10)
print("SVM",metrics.accuracy_score(new_ys, predicted))
print(metrics.classification_report(new_ys, predicted))

0.862874078358
             precision    recall  f1-score   support

        6.0       0.87      0.85      0.86      6876
        9.0       0.86      0.87      0.87      6958

avg / total       0.86      0.86      0.86     13834

SVM 0.864681220182
             precision    recall  f1-score   support

        6.0       0.87      0.85      0.86      6876
        9.0       0.86      0.88      0.87      6958

avg / total       0.86      0.86      0.86     13834

