In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from xgboost import XGBClassifier

import matplotlib.pyplot as plt

In [2]:
datatrain = pd.read_csv('data/yeast/yeast-train.csv')
datatest = pd.read_csv('data/yeast/yeast-test.csv')

datatrain.loc[datatrain['Class1'] =="b'0'", 'Class1'] = 0
datatrain.loc[datatrain['Class1'] =="b'1'", 'Class1'] = 1
datatest.loc[datatest['Class1'] =="b'0'", 'Class1'] = 0
datatest.loc[datatest['Class1'] =="b'1'", 'Class1'] = 1

dataset = pd.concat([datatrain, datatest], ignore_index=True)

In [3]:
X = dataset.drop(['Class1','Class2','Class3','Class4','Class5','Class6','Class7','Class8','Class9','Class10','Class11','Class12','Class13','Class14'],axis=1)
y = dataset['Class1']

In [4]:
X.shape

(2417, 103)

In [5]:
y=y.values

In [6]:
scaler = StandardScaler()
scaler.fit(X)
scaled_dataset = scaler.transform(X)

In [None]:
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
lda.fit(scaled_dataset,y)

In [None]:
lda_dataset = lda.transform(scaled_dataset)

In [None]:
lda_dataset.shape

In [None]:
per_var = np.round(lda.explained_variance_ratio_*100, decimals=1)

In [None]:
labels = ['PC'+str(i) for i in range(1,len(per_var)+1)]

In [None]:
plt.subplots(figsize=(60,10))
plt.bar(x=range(1,len(per_var)+1), height=per_var,tick_label=labels)
plt.show()

In [None]:
random.seed(0)
X_train,X_test, y_train,y_test =train_test_split(lda_dataset,y,test_size=0.2,random_state=0)
print(X_train.shape)
print(X_test.shape)

In [None]:
a = [el/100 for el in range(1,101) ]


In [None]:
mxa=0
for i in a:
    xgbclassifier = XGBClassifier(learning_rate=i)
    xgbclassifier.fit(X_train, y_train)
    y_pred = xgbclassifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
#     print("Accuracy: %.2f%%" % (accuracy * 100.0))
    if accuracy>mxa:
        mxa=accuracy
        print(mxa,i)

In [None]:
from sklearn.svm import SVC

svmclassifier = SVC(kernel='linear' ,random_state=0)
svmclassifier.fit(X_train,y_train)
y_pred_svm = svmclassifier.predict(X_test)

cm = confusion_matrix(y_test,y_pred_svm)
print(cm)
accuracy=accuracy_score(y_test, y_pred_svm)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

In [None]:
from sklearn.ensemble import RandomForestClassifier
mxa = 0
for i in range(1,51):
    rfclassifier = RandomForestClassifier(n_estimators=i*5,criterion='entropy',random_state=0)
    rfclassifier.fit(X_train,y_train)
    y_pred_randForest = rfclassifier.predict(X_test)
    accuracy=accuracy_score(y_test, y_pred_randForest)
    if accuracy>mxa:
        mxa=accuracy
        print(accuracy,(i*5))
print("Accuracy: %.2f%%" % (accuracy * 100.0))

In [None]:
# Packages for training model
from keras.models import Sequential
from keras.layers.core import Dense, Dropout
from keras.utils import np_utils
from keras.optimizers import Adam
from keras.models import model_from_json
from keras.regularizers import l2
from keras.layers.normalization import BatchNormalization

# Packages for explanation
from keras import backend as K
from keras.models import Model
from deepexplain.tensorflow import DeepExplain
from IPython.display import display, HTML
import matplotlib.pyplot as plt

In [None]:
sgdOptimizer = 'adam'
lossFun='categorical_crossentropy'
finalLayerActivation = 'softmax'
batchSize=25
numEpochs = 500
nb_classes = 2
inputDim=1

In [None]:
trainY=np_utils.to_categorical(y_train, num_classes = nb_classes)
testY=np_utils.to_categorical(y_test, num_classes = nb_classes)

In [None]:
model = Sequential()
model.add(BatchNormalization(input_shape=(inputDim,)))
model.add(Dense(32, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(Dense(nb_classes, activation=finalLayerActivation))
model.compile(loss=lossFun, optimizer=sgdOptimizer, metrics=["accuracy"])
print(model.summary())

In [None]:
model.fit(X_train,trainY,validation_data=(X_test,testY) ,batch_size=batchSize, epochs=numEpochs, verbose=1)
trainScore = model.evaluate(X_train,trainY, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], trainScore[1]*100))
testScore = model.evaluate(X_test,testY, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], testScore[1]*100))