In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV,cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, classification_report

import warnings
warnings.filterwarnings('ignore')


In [2]:
def create_baseline_model(X_train, X_test, y_train, y_test):
    """
    Create and evaluate a baseline SVM model
    """
    
    baseline_svm = SVC(kernel='linear', random_state=42)
    
    
    baseline_svm.fit(X_train, y_train)
    
    y_pred = baseline_svm.predict(X_test)
    
    baseline_accuracy = accuracy_score(y_test, y_pred)
    baseline_f1 = f1_score(y_test, y_pred)
    
    print("Baseline Model Performance:")
    print(f"Accuracy: {baseline_accuracy:.4f}")
    print(f"F1-score: {baseline_f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    return baseline_accuracy, baseline_f1

In [3]:
def perform_grid_search(X_train, y_train):
    """
    Perform grid search for hyperparameter tuning
    """
    param_grid = {
        'kernel': ['linear', 'rbf', 'poly'],
        'C': [0.1, 1, 10, 100],
        'gamma': ['scale', 0.001, 0.01, 0.1, 1],
        'degree': [2, 3, 4]
    }
    
    svm = SVC(random_state=42)
    
    grid_search = GridSearchCV(
        estimator=svm,
        param_grid=param_grid,
        cv=5,
        scoring='f1',
        n_jobs=-1,
        verbose=1
    )
    
    grid_search.fit(X_train, y_train)
    
    print("\nGrid Search Results:")
    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best cross-validation F1-score: {grid_search.best_score_:.4f}")
    
    return grid_search.best_estimator_, grid_search.best_params_

In [4]:
def evaluate_optimized_model(best_model, X_test, y_test):
    """
    Evaluate the optimized model
    """
    # Make predictions
    y_pred = best_model.predict(X_test)
    
    # Calculate metrics
    optimized_accuracy = accuracy_score(y_test, y_pred)
    optimized_f1 = f1_score(y_test, y_pred)
    
    print("\nOptimized Model Performance:")
    print(f"Accuracy: {optimized_accuracy:.4f}")
    print(f"F1-score: {optimized_f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    return optimized_accuracy, optimized_f1

In [5]:
df = pd.read_csv("../dataset/full_processed_heart_disease_cleveland.csv")

X = df.drop(["target"],axis=1)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)

In [6]:
# Create and evaluate baseline model
baseline_accuracy, baseline_f1 = create_baseline_model(X_train, X_test, y_train, y_test)

Baseline Model Performance:
Accuracy: 0.8852
F1-score: 0.8889

Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.90      0.88        29
           1       0.90      0.88      0.89        32

    accuracy                           0.89        61
   macro avg       0.88      0.89      0.89        61
weighted avg       0.89      0.89      0.89        61



In [8]:
# Perform grid search

best_model, best_params = perform_grid_search(X_train, y_train)

Fitting 5 folds for each of 180 candidates, totalling 900 fits


KeyboardInterrupt: 

In [None]:
# Evaluate optimized model
optimized_accuracy, optimized_f1 = evaluate_optimized_model(best_model, X_test, y_test)

In [None]:
print("\nPerformance Improvement Summary:")
print(f"Accuracy improvement: {(optimized_accuracy - baseline_accuracy) * 100:.2f}%")
print(f"F1-score improvement: {(optimized_f1 - baseline_f1) * 100:.2f}%")