<a href="https://colab.research.google.com/github/ishleenkaur256/Assignment_EAI/blob/main/SVM_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scikit-learn pandas matplotlib

import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


In [None]:
# Load Letter Recognition Dataset from UCI
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data"
columns = ['letter','x-box','y-box','width','height','onpix','x-bar','y-bar','x2bar',
           'y2bar','xybar','x2ybr','xy2br','x-ege','xegvy','y-ege','yegvx']

data = pd.read_csv(url, header=None, names=columns)
print("Dataset shape:", data.shape)
data.head()


In [None]:
samples = []
for i in range(10):
    X = data.drop("letter", axis=1)
    y = data["letter"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)
    samples.append((X_train, X_test, y_train, y_test))

print(f"Created {len(samples)} samples (70-30 splits)")


In [None]:
results = []
sample_progress = {}

for idx, (X_train, X_test, y_train, y_test) in enumerate(samples, start=1):
    best_acc = 0
    best_params = {}
    acc_progress = []

    for i in range(100):  # 100 iterations
        kernel = random.choice(['linear', 'rbf', 'poly'])
        C = random.choice([0.1, 1, 10])
        gamma = 'scale' if kernel == 'linear' else random.choice([0.001, 0.01, 0.1])

        # Subsample for faster training
        X_train_sub = X_train.sample(n=4000, random_state=i)
        y_train_sub = y_train.loc[X_train_sub.index]

        model = SVC(kernel=kernel, C=C, gamma=gamma)
        model.fit(X_train_sub, y_train_sub)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        acc_progress.append(acc)

        if acc > best_acc:
            best_acc = acc
            best_params = {'kernel': kernel, 'C': C, 'gamma': gamma}

    # Retrain with best parameters on full training data
    final_model = SVC(**best_params)
    final_model.fit(X_train, y_train)
    y_pred_final = final_model.predict(X_test)
    final_acc = accuracy_score(y_test, y_pred_final)

    results.append({"Sample": f"S{idx}", "Best Accuracy": final_acc, "Best Params": best_params})
    sample_progress[f"S{idx}"] = acc_progress

results_df = pd.DataFrame(results)
results_df


In [None]:
best_sample = results_df.iloc[results_df['Best Accuracy'].idxmax()]
best_sample_name = best_sample["Sample"]

plt.figure(figsize=(8,5))
plt.plot(sample_progress[best_sample_name])
plt.title(f"Convergence of {best_sample_name} (Best Accuracy)")
plt.xlabel("Iteration")
plt.ylabel("Accuracy")
plt.grid()
plt.show()


In [None]:
results_df.to_csv("svm_results.csv", index=False)
print("Results saved as svm_results.csv ")
