In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC


data = fetch_openml('letter', version=1, as_frame=True)
X = data.data
y = data.target


df = pd.concat([X, y.rename('target')], axis=1)
print("Dataset shape:", df.shape)
print("Class distribution:")
print(df['target'].value_counts())


param_space = {
    'kernel': ['rbf', 'poly', 'sigmoid'],
    'C': np.logspace(-3, 3, 1000),
    'gamma': ['scale', 'auto']
}


results = []
convergence = {}
for sample in range(1, 11):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=0.7, random_state=sample, stratify=y
    )
    best_acc = 0.0
    best_params = None
    best_so_far = []
    current_max = 0.0

    for it in range(100):

        params = {
            'kernel': np.random.choice(param_space['kernel']),
            'C': float(np.random.choice(param_space['C'])),
            'gamma': np.random.choice(param_space['gamma'])
        }
        clf = SVC(**params)
        clf.fit(X_train, y_train)
        acc = clf.score(X_test, y_test)
        current_max = max(current_max, acc)
        best_so_far.append(current_max)

        if acc > best_acc:
            best_acc = acc
            best_params = params.copy()

    results.append({'Sample': f"S{sample}", 'Best Accuracy': best_acc, 'Best Params': best_params})
    convergence[sample] = best_so_far


results_df = pd.DataFrame(results)
print("\nComparative performance of Optimized-SVM with different samples:")
print(results_df.to_string(index=False))


best_sample = int(results_df['Best Accuracy'].idxmax()) + 1
history = convergence[best_sample]

plt.figure()
plt.plot(range(1, 101), history)
plt.xlabel('Iteration')
plt.ylabel('Best Accuracy')
plt.title(f'Convergence of Best SVM (Sample S{best_sample})')
plt.show()