In [None]:
# import the libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn import svm
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

# Import the datasets
liverCSV = pd.read_csv('E:\\Thesis\\Indian_Liver_Patient.csv')

# Handling the missing values
df = pd.DataFrame(liverCSV)
dataset = df.fillna(method='ffill', axis = 0)

# process  of checking the missing columns values
dataset.columns[dataset.isnull().any()]


# start oversampling process

target = 'Liver_patients'

# dataset counts 1 or 0 
print(dataset[target].value_counts())

# minority class length i.e 1
minority_class_len = len(dataset[dataset[target] == 0])
print(minority_class_len)

# display the indices of majority class i.e 0
majority_class_indices = dataset[dataset[target] == 1].index
print(majority_class_indices)

# randomly picking up the majority class indicies i.e. 0 
random_majority_indices = np.random.choice(majority_class_indices, minority_class_len, replace=False)
print(len(random_majority_indices))

# displaing the minority class indices i.e. 1
minority_class_indices = dataset[dataset[target] == 0].index
print(minority_class_indices)

# concatenate the minority indices and random majority incices
under_sample_indices = np.concatenate([minority_class_indices, random_majority_indices])

# locate the under_sample_indices to datasets
under_sample = dataset.loc[under_sample_indices]

# Bar Garph
sns.countplot(x=target, data=under_sample)

# displaing the values of 0 and 1
print(under_sample[target].value_counts())

# end oversampling process


tData = np.asarray(under_sample.drop('Liver_patients', 1))
tTarget = np.asarray(under_sample['Liver_patients'])

# Normalize Data
means = np.mean(tData, axis=0)
stds = np.std(tData, axis=0)
tData = (tData - means)/stds

X_train, X_test, y_train, y_test = train_test_split(tData,tTarget,test_size=0.20)

#applying Kernel PCA
#from sklearn.decomposition import KernelPCA
#kpca = KernelPCA(n_components = 3, kernel = 'rbf')
#X_train = kpca.fit_transform(X_train)
#X_test = kpca.transform(X_test) 

#Create a svm Classifier
livercheck = svm.SVC(kernel='linear')
#Train the model using the training sets
livercheck.fit(X_train,y_train)


accuracy = livercheck.score(X_test, y_test)

print("accuracy = ", accuracy * 100, "%")

# Confusion Matrix
y_pred = livercheck.predict(X_test)
print(confusion_matrix(y_test, y_pred))

print(metrics.classification_report(y_test, y_pred))

