<a href="https://colab.research.google.com/github/cheshtabiala/parameter_optimization/blob/main/parameter_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import learning_curve
import warnings
warnings.filterwarnings('ignore')

In [None]:
dataset = pd.read_csv("/content/academics_data.csv",sep=";", header=0)
dataset.columns = dataset.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('"', '')
dataset.isnull().sum()


In [None]:
from sklearn.preprocessing import LabelEncoder
target_column=dataset["target"]
dataset["target"].unique()
class_labels = ["Dropout", "Graduate", "Enrolled"]

label_encoder = LabelEncoder()
label_encoder.fit(class_labels)
encoded_target = label_encoder.transform(target_column)

target_column = encoded_target

In [None]:
missing_values = dataset.isnull().sum()

if missing_values.any():
    print("Columns with missing values:")
    print(missing_values[missing_values > 0])
else:
    print("No missing values found in the dataset.")

In [None]:
X = dataset.iloc[:,0:-1]
y = dataset['target']
ss = StandardScaler()
ss.fit_transform(X)

In [None]:
samples = []
for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)
    samples.append((X_train, X_test, y_train, y_test))
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
result = pd.DataFrame(columns=['Sample', 'Best Accuracy', 'Best Kernel Value', 'Best C Value', 'Best Gamma Value'])
def fitnessFunction(kernel, C, gamma):
  svm = SVC(kernel=kernel, C=C, gamma=gamma, max_iter=100)
  svm.fit(X_train, y_train)
  y_pred = svm.predict(X_test)
  return accuracy_score(y_pred, y_test)

In [None]:
for i in range(len(samples)):
  best_accuracy = 0
  best_C = 0
  best_gamma = 0
  for kernel in kernels:
    X_train, X_test, y_train, y_test = samples[i]
    C = np.random.uniform(0, 10)
    gamma = np.random.uniform(0, 10)
    score = fitnessFunction(kernel, C, gamma)
    if score>best_accuracy:
      best_accuracy = round(score, 2)
      best_C = round(C, 2)
      best_gamma = round(gamma, 2)
      best_kernel = kernel
  result.loc[i] = [i+1, best_accuracy, best_kernel, best_C, best_gamma]
print(result)
X_train, X_test, y_train, y_test = samples[result['Best Accuracy'].idxmax()]
train_sizes, train_scores, test_scores = learning_curve(SVC(kernel=result['Best Kernel Value'].iloc[result['Best Accuracy'].idxmax()],
                                                        C=result['Best C Value'].iloc[result['Best Accuracy'].idxmax()],
                                                        gamma=result['Best Gamma Value'].iloc[result['Best Accuracy'].idxmax()],
                                                        max_iter = 100), X_train, y_train, cv=10, scoring='accuracy', n_jobs=-1,
                                                        train_sizes = np.linspace(0.01, 1.0, 50))

train_mean = np.mean(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
plt.figure(figsize=(10, 6))
plt.plot(train_sizes, train_mean, label='Score on Training')
plt.plot(train_sizes, test_mean, label='Score on Cross-Validation')
plt.xlabel('Iterations')
plt.ylabel('Accuracy')
plt.title('Convergence Graph')
plt.legend(loc="best")
plt.show()