<a href="https://colab.research.google.com/github/chahatgarg884/Parameter-Optimization-of-SVM/blob/main/Parameter_Optimization_of_SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline

# Load Dataset
column_names = ['letter', 'x-box', 'y-box', 'width', 'high', 'onpix', 'x-bar',
                'y-bar', 'x2bar', 'y2bar', 'xybar', 'x2ybr', 'xy2br',
                'x-ege', 'xegvy', 'y-ege', 'yegvx']

data = pd.read_csv("/content/letter-recognition.csv", names=column_names)

# Encode labels
X = data.drop('letter', axis=1)
y = LabelEncoder().fit_transform(data['letter'])

# Define parameter grid
param_grid = {
    'svr__kernel': ['linear', 'rbf', 'poly'],
    'svr__C': [0.1, 1, 10],
    'svr__epsilon': [0.01, 0.1, 0.5]
}

results = []
all_convergences = []

# Run optimization 10 times
for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=i)

    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('svr', SVR())
    ])

    grid = GridSearchCV(pipeline, param_grid, cv=3, verbose=0)
    grid.fit(X_train, y_train)

    y_pred = grid.predict(X_test)
    y_pred_class = np.round(y_pred).astype(int)
    y_pred_class = np.clip(y_pred_class, 0, len(np.unique(y))-1)
    acc = accuracy_score(y_test, y_pred_class)

    results.append({
        'Sample': f"S{i+1}",
        'Accuracy': round(acc*100, 2),
        'Kernel': grid.best_params_['svr__kernel'],
        'C': grid.best_params_['svr__C'],
        'Epsilon': grid.best_params_['svr__epsilon'],
        'Model': grid.best_estimator_
    })

    # Simulate convergence (not real optimization loss, just accuracy over time)
    accs = []
    for j in range(1, 101):
        idx = max(1, int(len(X_train) * (j / 100)))  # ✅ Fix: Ensure at least 1 sample
        model = grid.best_estimator_
        model.fit(X_train[:idx], y_train[:idx])
        y_p = model.predict(X_test)
        y_p_c = np.round(y_p).astype(int)
        y_p_c = np.clip(y_p_c, 0, len(np.unique(y))-1)
        accs.append(accuracy_score(y_test, y_p_c) * 100)

    all_convergences.append(accs)

# Get best performing sample
best_idx = np.argmax([r['Accuracy'] for r in results])
best_convergence = all_convergences[best_idx]


# Save table results
df_results = pd.DataFrame(results)[['Sample', 'Accuracy', 'Kernel', 'C', 'Epsilon']]
df_results.to_csv("svm_results.csv", index=False)
print(df_results)

  Sample  Accuracy  Kernel     C  Epsilon
0     S1      0.00  linear   1.0     0.50
1     S2      0.00  linear   0.1     0.50
2     S3     16.67     rbf  10.0     0.01
3     S4      0.00     rbf  10.0     0.01
4     S5      0.00     rbf  10.0     0.50
5     S6      0.00     rbf  10.0     0.50
6     S7     16.67     rbf  10.0     0.10
7     S8      0.00     rbf  10.0     0.01
8     S9      0.00     rbf  10.0     0.10
9    S10     16.67     rbf  10.0     0.50
