### HW2 SVM/NN algorithms

### Libraries

In [84]:
import pandas as pd
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import preprocessing
from sklearn.decomposition import RandomizedPCA
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

### Preprocessing

In [85]:
data = pd.read_csv('acuteLymphoblasticLeukemia.data', sep=',')

In [86]:
X = data.drop('Decision', axis=1)
y = data['Decision']

In [87]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.25)

In [88]:
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

### SVM

In [89]:
params = {'C': [1, 5, 10, 50], 'kernel': ['linear', 'rbf', 'poly'], 'gamma': [0.001, 0.0001, 0.0005, 0.005]}
model = GridSearchCV(estimator=SVC(), param_grid=params, n_jobs=-1)

model.fit(X_train, y_train)
print(model.best_score_)
print(model.best_params_)

prediction = model.predict(X_test)

print(metrics.accuracy_score(prediction, y_test))
print(metrics.confusion_matrix(prediction, y_test))

0.908450704225
{'C': 1, 'gamma': 0.001, 'kernel': 'linear'}
0.979166666667
[[ 9  0  0  0  0]
 [ 0  3  0  0  0]
 [ 0  0 11  0  0]
 [ 1  0  0 15  0]
 [ 0  0  0  0  9]]


### Simple NN algorithm

In [90]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [91]:
mlp = MLPClassifier(hidden_layer_sizes=(30,30,30))
mlp.fit(X_train,y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(30, 30, 30), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [92]:
predictions = mlp.predict(X_test)
print(confusion_matrix(y_test,predictions))

[[ 9  0  0  1  0]
 [ 0  2  0  1  0]
 [ 0  0 10  1  0]
 [ 3  0  1 11  0]
 [ 0  0  0  1  8]]


In [93]:
print(classification_report(y_test,predictions))

                                          precision    recall  f1-score   support

 Precursor-B  ALL, subtype: Hyperdiploid       0.75      0.90      0.82        10
Precursor-B ALL, subtype: E2A-rearranged       1.00      0.67      0.80         3
      Precursor-B ALL, subtype: TEL-AML1       0.91      0.91      0.91        11
         Precursor-B ALL, subtype: other       0.73      0.73      0.73        15
                                   T-ALL       1.00      0.89      0.94         9

                             avg / total       0.84      0.83      0.83        48

