In [None]:
# Install necessary packages
!pip install scikit-learn pandas matplotlib

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
import random
import urllib.request

# Step 1: Download and Load UCI Letter Recognition Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data"
file_name = "letter-recognition.data"
urllib.request.urlretrieve(url, file_name)

# Step 2: Load the dataset into pandas
column_names = ['letter', 'x-box', 'y-box', 'width', 'high', 'onpix', 'x-bar',
                'y-bar', 'x2bar', 'y2bar', 'xybar', 'x2ybr', 'xy2br', 'x-ege',
                'xegvy', 'y-ege', 'yegvx']
df = pd.read_csv(file_name, header=None, names=column_names)

# Step 3: Preprocess data
X = df.iloc[:, 1:].values  # Features
y = df.iloc[:, 0].values   # Labels

# Encode labels
le = LabelEncoder()
y = le.fit_transform(y)

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Step 4: Define parameter search space
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
nu_vals = np.linspace(0.01, 0.9, 10)
eps_vals = np.linspace(0.001, 0.2, 10)

results = []
convergence_all = []

# Step 5: Loop over 10 random samples
for sample_id in range(10):
    # Train/test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, stratify=y, random_state=sample_id
    )

    best_accuracy = 0
    best_params = {}
    convergence = []

    for i in range(100):
        kernel = random.choice(kernels)
        nu = round(random.choice(nu_vals), 3)
        epsilon = round(random.choice(eps_vals), 3)

        try:
            clf = svm.NuSVC(kernel=kernel, nu=nu)
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            acc = accuracy_score(y_test, y_pred)

            convergence.append(acc)

            if acc > best_accuracy:
                best_accuracy = acc
                best_params = {'kernel': kernel, 'nu': nu, 'epsilon': epsilon}
        except:
            convergence.append(best_accuracy)
            continue

    results.append((sample_id + 1, best_accuracy, best_params))
    convergence_all.append(convergence)

# Step 6: Print Results Table
print("\nSample\tAccuracy\tKernel\tNu\tEpsilon")
for i, acc, params in results:
    print(f"S{i}\t{acc:.4f}\t{params['kernel']}\t{params['nu']}\t{params['epsilon']}")

# Step 7: Plot convergence graph for the best-performing sample
best_sample_idx = np.argmax([acc for _, acc, _ in results])
best_convergence = convergence_all[best_sample_idx]

plt.figure(figsize=(8,5))
plt.plot(best_convergence, color='blue')
plt.title('Fitness (bestAccuracy)')
plt.xlabel('Iteration')
plt.ylabel('Accuracy')
plt.grid(True)
plt.show()


