In [54]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV
from csv import reader
from pandas import read_csv
import sklearn as skl
from sklearn.neural_network import MLPClassifier
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier

def read_data(file):
    csv_reader = read_csv(file,header=None,delim_whitespace=True)
    data=np.array(csv_reader)
    return data

def svm_class(train_x,train_y,test_x,test_y):
    for kernel in {'linear','poly','rbf'}:
        clf=svm.SVC(verbose=True,kernel=kernel)
    # print(train_x.shape,train_y.shape)
        clf.fit(train_x,np.ravel(train_y))
        Y=clf.predict(test_x)
        print("SVM ",kernel,":",accuracy_score(Y,test_y))
        print(classification_report(Y,test_y))
    
def KNN(train_x,train_y,test_x,test_y):
    neigh=KNeighborsClassifier(n_neighbors=50)
    neigh.fit(train_x,np.ravel(train_y))
    knn_Y=neigh.predict(test_x)
#     print("KNN: ",mean_squared_error(knn_Y,test_y))
    print("KNN: ",accuracy_score(knn_Y,test_y))
    print(classification_report(knn_Y,test_y))

def rdforest(train_x,train_y,test_x,test_y):
    rdforest= RandomForestClassifier(n_estimators=10,verbose=True)
    rdforest.fit(train_x,np.ravel(train_y))
    rdforest_y=rdforest.predict(test_x)
#     print("Random Forest: ",mean_squared_error(rdforest_y,test_y))
    print("Random Forest: ",accuracy_score(rdforest_y,test_y))
    print(classification_report(rdforest_y,test_y))

def perceptron_nn(train_x,train_y,test_x,test_y):
    clf=MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(80,),random_state=1,activation='relu')
    clf.fit(train_x,np.ravel(train_y))
    y=clf.predict(test_x)
#     print("perceptron_nn:",mean_squared_error(y,test_y))
    print("perceptron_nn:",accuracy_score(y,test_y))
    print(classification_report(rdforest_y,test_y))
    
def feature_importance(train_x,train_y,test_x,test_y):
    forest = ExtraTreesClassifier(n_estimators=250,
                              random_state=0)

    forest.fit(train_x,train_y)
    importances = forest.feature_importances_
    std = np.std([tree.feature_importances_ for tree in forest.estimators_],
             axis=0)
    indices = np.argsort(importances)[::-1]

    # Print the feature ranking
    print("Feature ranking:")
    weight=np.ones(train_x.shape[1])
    for f in range(train_x.shape[1]):
        print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
        

    # Plot the feature importances of the forest
    plt.figure()
    plt.title("Feature importances")
    plt.bar(range(train_x.shape[1]), importances[indices],
           color="r", yerr=std[indices], align="center")
    plt.xticks(range(train_x.shape[1]), indices)
    plt.xlim([-1, train_x.shape[1]])
    plt.show()


    
train_x=read_data("data/train/X_train.txt")
train_y=read_data("data/train/y_train.txt")
test_x=read_data("data/test/X_test.txt")
test_y=read_data("data/test/y_test.txt")
scaler=StandardScaler()
scaler.fit(train_x)
train_x=scaler.transform(train_x)
test_x=scaler.transform(test_x)

# feature_importance(train_x,train_y,test_x,test_y)
KNN(train_x,train_y,test_x,test_y)
rdforest(train_x,train_y,test_x,test_y)
svm_class(train_x,train_y,test_x,test_y)
perceptron_nn(train_x,train_y,test_x,test_y)


# for 95 accuracy 80 neurons
    


KNN:  0.874448591788
             precision    recall  f1-score   support

          1       0.99      0.80      0.88       612
          2       0.90      0.87      0.88       486
          3       0.68      0.97      0.80       292
          4       0.73      0.94      0.82       384
          5       0.97      0.77      0.86       669
          6       0.94      1.00      0.97       504

avg / total       0.90      0.87      0.88      2947



[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.7s finished
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished


Random Forest:  0.91007804547
             precision    recall  f1-score   support

          1       0.98      0.85      0.91       572
          2       0.86      0.88      0.87       458
          3       0.81      0.96      0.88       357
          4       0.88      0.89      0.88       481
          5       0.90      0.89      0.90       542
          6       1.00      1.00      1.00       537

avg / total       0.91      0.91      0.91      2947

[LibSVM]SVM  linear : 0.960977265015
             precision    recall  f1-score   support

          1       1.00      0.96      0.98       517
          2       0.96      0.96      0.96       470
          3       0.95      0.99      0.97       402
          4       0.88      0.96      0.92       452
          5       0.97      0.90      0.93       569
          6       1.00      1.00      1.00       537

avg / total       0.96      0.96      0.96      2947

[LibSVM]SVM  poly : 0.923990498812
             precision    recall  f1-score  