<a href="https://colab.research.google.com/github/abhishhari/Code/blob/Research-Project/MachineLearning_Modelling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Machine Learning Modelling

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install scikit-learn
!pip install pandas
!pip install numpy
!pip install matplotlib

In [None]:
from sklearn import svm
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score 
from sklearn.metrics import roc_curve, roc_auc_score
from imblearn.over_sampling import SMOTE

In [None]:
#  Importing the dataset
df = pd.read_csv("/content/drive/MyDrive/Project/Dataset/ML/light_combined.csv")

In [None]:
# Selecting the features
features  = [ 
    'num_bursts',
    'mean_duration',
    'std_duration',
    'mean_amplitude',
    'std_amplitude',
    'slope',
    'xmin',
    'L',
    'peak_autocorr_data1',
    'peak_autocorr_data2',
    'second_peak_autocorr_data1',
    'second_peak_autocorr_data2',
    ]
X = df.loc[:, features].values
y = np.ravel(df.loc[:,['label']].values)


Balancing the dataset

In [None]:
# Apply SMOTE to balance the dataset
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X, y)

Classification using Leave-one-out Cross validation

In [None]:
from sklearn.model_selection import LeaveOneOut

from sklearn.metrics import roc_curve, roc_auc_score, accuracy_score
# Create a SVM classifier with a linear kernel
clf = svm.SVC(kernel='sigmoid', probability=True)

# Use leave-one-out cross-validation
cv = LeaveOneOut()

# Train and evaluate the classifier using leave-one-out cross-validation
y_true, y_pred, y_proba = [], [], []

for train_index, test_index in LeaveOneOut().split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model = clf.fit(X_train, y_train)
    y_true.append(y_test[0])
    y_pred.append(model.predict(X_test)[0])
    y_proba.append(model.predict_proba(X_test)[0][1])

acc = accuracy_score(y_true, y_pred) * 100
auc = roc_auc_score(y_true, y_proba) * 100

result_table = pd.DataFrame(columns=['classifiers','score','auc'])
result_table = result_table.append({'classifiers': 'SVM',
                                    'score':acc,
                                    'cross val':'LeaveOneOut',
                                    'auc':round(auc,2),
                                    }, ignore_index=True)
result_table.set_index('classifiers', inplace=True)
result_table

Classification using 5 Fold Cross validation

In [None]:
# Spliting the  data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# Creating a SVM classifier with a linear kernel
clf = svm.SVC(kernel='sigmoid', probability=True)

# Training the classifier on the training data
model = clf.fit(X_train, y_train)

# Making predictions on the testing data using 5 fold Cross Validation 
y_pred = clf.predict(X_test)

acc = model.score(X_test,y_test)*100
cross_val = round(cross_val_score(model, X, y, cv=5).mean()*100,2)
yproba = model.predict_proba(X_test)[::,1]


fpr, tpr, _ = roc_curve(y_test,  yproba)
auc = roc_auc_score(y_test, yproba)

result_table = pd.DataFrame(columns=['classifiers','score','auc'])
result_table = result_table.append({'classifiers': 'SVM',
                                    'score':acc,
                                    'cross val':cross_val,
                                    'auc':round(auc*100,2),
                                    }, ignore_index=True)
result_table.set_index('classifiers', inplace=True)
result_table