In [13]:
'''
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Step 1: Load the dataset
csv_file_path = 'wavelet_metrics.csv'  # Path to your CSV file
data = pd.read_csv(csv_file_path)

# Step 2: Separate features and labels
X = data.drop(['Data', 'label'], axis=1)  # Drop non-numeric and target columns
y = data['label']  # Extract labels (0 or 1)

# Step 3: Ensure equal class distribution in train and test sets
class_0 = data[data['label'] == 0]
class_1 = data[data['label'] == 1]

# Step 4: Split each class into 80% train and 20% test
train_0, test_0 = train_test_split(class_0, test_size=0.5, random_state=42)
train_1, test_1 = train_test_split(class_1, test_size=0.5, random_state=42)

# Step 5: Concatenate the training and testing data for both classes
train_data = pd.concat([train_0, train_1])
test_data = pd.concat([test_0, test_1])

# Step 6: Extract features and labels from the training and testing sets
X_train = train_data.drop(['Data', 'label'], axis=1)
y_train = train_data['label']
X_test = test_data.drop(['Data', 'label'], axis=1)
y_test = test_data['label']

# Step 7: Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 8: Apply LDA for dimensionality reduction
lda = LDA(n_components=1)  # Binary classification, so 1 component is sufficient
X_train_lda = lda.fit_transform(X_train, y_train)
X_test_lda = lda.transform(X_test)

# Step 9: Train an SVM classifier on the LDA-transformed data
svm = SVC(kernel='rbf', random_state=42)  # Using linear kernel
svm.fit(X_train_lda, y_train)

# Step 10: Predict and evaluate the SVM classifier
y_pred = svm.predict(X_test_lda)

# Step 11: Calculate the confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

# Step 12: Calculate Sensitivity and Specificity
sensitivity = tp / (tp + fn)  # True Positive Rate
specificity = tn / (tn + fp)  # True Negative Rate

# Step 11: Print evaluation metrics
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print(f"\nSensitivity: {sensitivity:.2f}")
print(f"Specificity: {specificity:.2f}")
'''

Confusion Matrix:
[[315  85]
 [ 94 306]]

Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.79      0.78       400
           1       0.78      0.77      0.77       400

    accuracy                           0.78       800
   macro avg       0.78      0.78      0.78       800
weighted avg       0.78      0.78      0.78       800

Accuracy: 0.78

Sensitivity: 0.77
Specificity: 0.79


In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, make_scorer

# Step 1: Load the dataset
csv_file_path = 'wavelet_metrics.csv'  # Path to your CSV file
data = pd.read_csv(csv_file_path)

# Step 2: Separate features and labels
X = data.drop(['Data', 'label'], axis=1)  # Drop non-numeric and target columns
y = data['label']  # Extract labels (0 or 1)

# Step 3: Ensure equal class distribution in train and test sets
class_0 = data[data['label'] == 0]
class_1 = data[data['label'] == 1]

# Step 4: Split each class into 80% train and 20% test
train_0, test_0 = train_test_split(class_0, test_size=0.5, random_state=42)
train_1, test_1 = train_test_split(class_1, test_size=0.5, random_state=42)

# Step 5: Concatenate the training and testing data for both classes
train_data = pd.concat([train_0, train_1])
test_data = pd.concat([test_0, test_1])

# Step 6: Extract features and labels from the training and testing sets
X_train = train_data.drop(['Data', 'label'], axis=1)
y_train = train_data['label']
X_test = test_data.drop(['Data', 'label'], axis=1)
y_test = test_data['label']

# Step 7: Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 8: Apply LDA for dimensionality reduction
lda = LDA(n_components=1)  # Binary classification, so 1 component is sufficient
X_train_lda = lda.fit_transform(X_train, y_train)
X_test_lda = lda.transform(X_test)

# Step 9: Define the SVM model and hyperparameters for GridSearchCV
param_grid = {
    'C': [0.01, 0.1, 1, 2, 3, 5, 10, 20, 30, 40, 50, 100, 200],      # Regularization parameter
    'gamma': [0.0001, 0.001, 0.01, 0.02, 0.05, 0.08, 0.1, 0.2, 0.3, 0.5, 1],  # RBF Kernel coefficient
}

# Use StratifiedKFold for balanced cross-validation splits
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Define the SVM with RBF kernel
svm = SVC(kernel='rbf')

# Perform GridSearchCV to find the optimal hyperparameters
grid_search = GridSearchCV(svm, param_grid, cv=cv, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(X_train_lda, y_train)

# Print the best hyperparameters
print(f"Best Parameters: {grid_search.best_params_}")

# Step 10: Train the final SVM model with the best parameters
best_svm = grid_search.best_estimator_
best_svm.fit(X_train_lda, y_train)

# Step 11: Predict on the test set
y_pred = best_svm.predict(X_test_lda)

# Step 12: Calculate the confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

# Step 13: Calculate Sensitivity and Specificity
sensitivity = tp / (tp + fn)  # True Positive Rate
specificity = tn / (tn + fp)  # True Negative Rate

# Step 14: Print evaluation metrics
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print(f"\nSensitivity: {sensitivity:.2f}")
print(f"Specificity: {specificity:.2f}")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")


Fitting 10 folds for each of 143 candidates, totalling 1430 fits
Best Parameters: {'C': 100, 'gamma': 0.2}
Confusion Matrix:
[[125  35]
 [ 49 111]]

Sensitivity: 0.69
Specificity: 0.78
Accuracy: 0.74
