In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, classification_report

In [None]:
def plot_kernel_comparison(results):
    """Plot comparison of kernel performances"""
    metrics = ['Accuracy', 'F1-score']
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    x = np.arange(len(metrics))
    width = 0.25
    multiplier = 0
    
    for kernel, result in results.items():
        offset = width * multiplier
        rects = ax.bar(x + offset, [result[metric] for metric in metrics], 
                      width, label=kernel)
        multiplier += 1
    
    ax.set_ylabel('Score')
    ax.set_title('Kernel Performance Comparison')
    ax.set_xticks(x + width, metrics)
    ax.legend(loc='lower right')
    ax.set_ylim(0, 1)
    
    plt.tight_layout()
    plt.show()

In [None]:
def compare_kernels(df):
    """Compare different kernel performances"""
    X = df.drop(["target"],axis=1)
    y = df["target"]

    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)

    kernel_params = {
        'RBF': {
            'params': {
                'kernel': ['rbf'],
                'C': [0.1, 1, 10, 100],
                'gamma': ['scale', 0.001, 0.01, 0.1, 1]
            }
        },
        'Polynomial': {
            'params': {
                'kernel': ['poly'],
                'C': [0.1, 1, 10, 100],
                'degree': [2, 3, 4],
                'gamma': ['scale', 0.01, 0.1],
                'coef0': [0, 1]
            }
        },
        'Linear': {
            'params': {
                'kernel': ['linear'],
                'C': [0.1, 1, 10, 100]
            }
        }
    }
    
    results = {}
    best_models = {}
    
    for kernel_name, config in kernel_params.items():
        print(f"\nTuning {kernel_name} kernel...")
        
        grid_search = GridSearchCV(
            SVC(random_state=42),
            config['params'],
            cv=5,
            scoring='f1',
            n_jobs=-1,
            verbose=1
        )
        
        grid_search.fit(X_train, y_train)
        
        y_pred = grid_search.predict(X_test)
        results[kernel_name] = {
            'Accuracy': accuracy_score(y_test, y_pred),
            'F1-score': f1_score(y_test, y_pred),
            'Best Parameters': grid_search.best_params_,
            'Cross-val Score': grid_search.best_score_
        }
        best_models[kernel_name] = grid_search.best_estimator_
        
        print(f"\n{kernel_name} Kernel Results:")
        print(f"Best parameters: {grid_search.best_params_}")
        print(f"Test set accuracy: {results[kernel_name]['Accuracy']:.4f}")
        print(f"Test set F1-score: {results[kernel_name]['F1-score']:.4f}")
        print("\nClassification Report:")
        print(classification_report(y_test, y_pred))
    
    # Visualize results
    plot_kernel_comparison(results)
    
    return results, best_models


In [None]:
df = pd.read_csv("../dataset/full_processed_heart_disease_cleveland.csv")



In [None]:
results, best_models = compare_kernels(df)