# Binary classifier for tumor classfication

#### This MLP classifier is designed to classify the presence tumor using the gene expression data.
#### Import all the required python dependencies

In [3]:
%matplotlib inline
import numpy as np
from sklearn.decomposition import PCA,KernelPCA
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score,classification_report
from sklearn.model_selection import learning_curve
from sklearn.feature_selection import SelectKBest
from sklearn.pipeline import FeatureUnion

#### Importing the Dataset, selecting the best features and combining it with the PCA transform features so that we dont lose the best features and further decreasing the input set to 6 axis by doing PCA of the combined features.

In [4]:
inputdata = pd.read_csv('colonTumor.data');
datasets = inputdata.values
X = datasets[:,0:2000]
Y = datasets[:,2000]

X = scale(X)
pca = PCA(n_components = 6)
selection = SelectKBest(k = 10)

# Build estimator from PCA and Univariate selection:
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])

# Use combined features to transform dataset:
X = combined_features.fit(X,Y).transform(X)

# futher transforming to 6 axis by using PCA:
X1 = pca.fit_transform(X)

#### Data spliting and assigning 80% of the dataset to train and further to test.

In [5]:
train = X1[0:40,:]
trainlabel = Y[0:40]
test = X1[40:61,:]
testlabel = Y[40:61]
print testlabel

[ 1.  1.  0.  0.  0.  0.  1.  0.  1.  1.  0.  0.  1.  1.  0.  0.  0.  0.
  1.  0.  1.]


#### Using Sklearn Neural Network library and training the dataset with the Multi level perceptron classifier for classification

In [12]:
clf = MLPClassifier(solver='sgd',learning_rate='adaptive' ,learning_rate_init=0.01, alpha=1e-10,verbose=10,hidden_layer_sizes=(15,200),random_state=1, max_iter =4000)
clf1 = clf.fit(train,trainlabel)

Iteration 1, loss = 1.89391765
Iteration 2, loss = 1.97214958
Iteration 3, loss = 0.79953257
Iteration 4, loss = 1.13048074
Iteration 5, loss = 0.64681703
Iteration 6, loss = 0.50062236
Iteration 7, loss = 0.41485574
Iteration 8, loss = 0.34924516
Iteration 9, loss = 0.29506818
Iteration 10, loss = 0.26106674
Iteration 11, loss = 0.23313207
Iteration 12, loss = 0.20913210
Iteration 13, loss = 0.18948948
Iteration 14, loss = 0.17343546
Iteration 15, loss = 0.15974517
Iteration 16, loss = 0.14785929
Iteration 17, loss = 0.13697493
Iteration 18, loss = 0.12679836
Iteration 19, loss = 0.11731902
Iteration 20, loss = 0.10847839
Iteration 21, loss = 0.10004173
Iteration 22, loss = 0.09213270
Iteration 23, loss = 0.08467106
Iteration 24, loss = 0.07780555
Iteration 25, loss = 0.07165166
Iteration 26, loss = 0.06623482
Iteration 27, loss = 0.06130863
Iteration 28, loss = 0.05687129
Iteration 29, loss = 0.05289023
Iteration 30, loss = 0.04925554
Iteration 31, loss = 0.04589922
Iteration 32, los

#### Testing the model and getting the accuracy of trained model 

In [13]:
y_pred = clf.predict(test)
print y_pred

target_names = ['class negative', 'class positive']
print(classification_report(testlabel, y_pred, target_names=target_names))
score = accuracy_score(testlabel , y_pred)
print(
        "Accuracy is: %.1f%%" %
        (score* 100)
    )

[ 1.  1.  0.  0.  0.  0.  1.  1.  1.  1.  0.  0.  1.  1.  1.  1.  0.  0.
  1.  0.  1.]
                precision    recall  f1-score   support

class negative       1.00      0.75      0.86        12
class positive       0.75      1.00      0.86         9

   avg / total       0.89      0.86      0.86        21

Accuracy is: 85.7%
