In [None]:
import pandas as pd
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import itertools
import csv


def plot_confusion_matrix(cm, classes,
             normalize=False,
             title='Confusion matrix',
             cmap=plt.cm.Blues):
    #Add Normalization Option
    '''prints pretty confusion metric with normalization option '''
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        #print("Normalized confusion matrix\\\\")
    #else:
        #print('Confusion matrix, without normalization\\\\')
    
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')


#load data
df = pd.read_csv("exoplanet.csv")
df.info()
df = df.replace('CONFIRMED', 0)
df = df.replace('CANDIDATE', 1)
df = df.replace('FALSE POSITIVE', 2)
df = df.fillna(0)

df.to_csv("Exoplanet_mod.csv")
df

In [None]:
#normalize feature with MinMaxScaler after it we need to fit the data
MinMaxScaler = preprocessing.MinMaxScaler()

y_data = pd.read_csv('Exoplanet_mod.csv', usecols= ['koi_disposition'])#.values.transpose()[0]
x_data = pd.read_csv('Exoplanet_mod.csv', usecols= ['koi_fpflag_nt','koi_fpflag_ss','koi_fpflag_co','koi_fpflag_ec','koi_period','koi_period_err1','koi_period_err2','koi_time0bk','koi_time0bk_err1','koi_time0bk_err2','koi_impact','koi_impact_err1','koi_impact_err2','koi_duration','koi_duration_err1','koi_duration_err2','koi_depth','koi_depth_err1','koi_depth_err2','koi_prad','koi_prad_err1','koi_prad_err2','koi_teq','koi_teq_err1','koi_teq_err2','koi_insol','koi_insol_err1','koi_insol_err2','koi_model_snr','koi_tce_plnt_num','koi_steff','koi_steff_err1','koi_steff_err2','koi_slogg','koi_slogg_err1','koi_slogg_err2','koi_srad','koi_srad_err1','koi_srad_err2','ra','dec','koi_kepmag'])

#take the 80% of data for train and 20% for test
x_train = x_data[:7650]
y_train = y_data[:7650]

x_test = x_data[7650:]
y_test = y_data[7650:]

x_train = x_train.values
x_test = x_test.values

print("Classes train : ", y_train.shape)
print("Features train : ", x_train.shape)

print("Classes test :", y_test.shape)
print("Features train : ", x_test.shape)

x_train = MinMaxScaler.fit_transform(x_train)
x_test = MinMaxScaler.fit_transform(x_test)


In [None]:
#calculate PCA with SKLEARN
pca = PCA(n_components=4, svd_solver='full')
pca_train = PCA().fit(x_train)
pca_test = PCA().fit(x_test)

cum_var = np.cumsum(pca_train.explained_variance_ratio_)

plt.plot(cum_var)
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')
plt.show()

XTrain = pca_train.transform(x_train)
XTest = pca_train.transform(x_test) 

In [None]:
#calculate LDA with SKLEARN
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis()
lda.fit(x_train, y_train)
XTrain = lda.transform(x_train)
XTest = lda.transform(x_test)

In [None]:
#Classifier without pca
kernels = ["rbf","poly3","poly5","sigmoid","linear"]

classes = ['0','1','2']

max_iteration = -1
for fun in kernels:
    if fun == 'poly3':
        model = OneVsRestClassifier(SVC(kernel='poly', degree=3, max_iter=max_iteration, probability=False, verbose=True)).fit(x_train,y_train)
    elif fun == 'rbf':
        model = OneVsRestClassifier(SVC(kernel='rbf', max_iter=max_iteration, probability=False, verbose=True,  C=30, gamma=1.5)).fit(x_train,y_train)
    elif fun == 'poly5':
        model = OneVsRestClassifier(SVC(kernel='poly', degree=5, max_iter=max_iteration, probability=False, verbose=True)).fit(x_train,y_train)
    else:
        model = OneVsRestClassifier(SVC(kernel=fun, max_iter=max_iteration, probability=False, verbose=True)).fit(x_train,y_train)
    predict = model.predict(x_test)

    classification_metrics = metrics.classification_report(y_test, np.round(predict), target_names=classes)
    #cm_dict = metrics.classification_report(y_test, np.round(predict), target_names=classes, output_dict=True)
    print("\n\n#################", fun ,"#################")
    print("\n" + classification_metrics)

    confusion_matrix= metrics.confusion_matrix(y_test, predict)

    plot_confusion_matrix(confusion_matrix, classes)
    plt.show()
    plot_confusion_matrix(confusion_matrix, classes, normalize=True)
    plt.show()

In [None]:
#Classifier without pca
kernels = ["rbf","poly3","poly5","sigmoid","linear"]

classes = ['0','1','2']

#clicle on different kernel, print the confiusion matrix and accuracy matrix
max_iteration = -1
for fun in kernels:
    if fun == 'poly3':
        model = OneVsRestClassifier(SVC(kernel='poly', degree=3, max_iter=max_iteration, probability=False, verbose=True)).fit(XTrain,y_train)
    elif fun == 'rbf':
        model = OneVsRestClassifier(SVC(kernel='rbf', max_iter=max_iteration, probability=False, verbose=True,  C=1, gamma=0.1)).fit(XTrain,y_train)
    elif fun == 'poly5':
        model = OneVsRestClassifier(SVC(kernel='poly', degree=5, max_iter=max_iteration, probability=False, verbose=True)).fit(XTrain,y_train)
    else:
        model = OneVsRestClassifier(SVC(kernel=fun, max_iter=max_iteration, probability=False, verbose=True)).fit(XTrain,y_train)
    predict = model.predict(XTest)

    classification_metrics = metrics.classification_report(y_test, np.round(predict), target_names=classes)
    print("\n\n#################", fun ,"#################")
    print("\n" + classification_metrics)

    confusion_matrix= metrics.confusion_matrix(y_test, predict)

    plot_confusion_matrix(confusion_matrix, classes)
    plt.show()
    plot_confusion_matrix(confusion_matrix, classes, normalize=True)
    plt.show()