In [26]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import pdist, squareform

In [17]:
def prepare_data(path):
    data = pd.read_csv(path)
    data.drop(columns=data.columns[len(data.columns)-1], axis=1, inplace=True)
    data = data.drop(columns=['id'])
    y = data['diagnosis']
    X = data.drop(columns='diagnosis')
    y = np.where(y == 'M', 1, 0)
    return X, y

def prepare_moodle_data(path):
    data = pd.read_csv(path)
    data.drop(columns=data.columns[0], axis=1, inplace=True)
    y = data['y']
    X = data.drop(columns=['y'])
    return X, y

In [None]:
# Load a dataset (e.g., Iris dataset)
X, y = prepare_data("data_cancer.csv")

# Split the data into training and testing sets
X_train_unscaled, X_test_unscaled, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train_unscaled)
X_test = scaler.transform(X_test_unscaled)


regularization_parameters = [0.0001, 0.001, 0.01, 0.1, 1, 10]
kernels = ['linear', 'poly', 'sigmoid', 'rbf']
gammas = [0.1, 0.01, 0.001]
# Define the parameter grid for the grid search
param_grid = {
    'C': [0.0001, 0.001, 0.01, 0.1, 1, 10],          # Regularization parameter
    'kernel': ['linear', 'poly', 'sigmoid', 'rbf'],  # Kernels: linear, radial basis function (RBF), polynomial
    'gamma': [0.0001, 0.001, 0.01, 0.1, 1, 10]      # σ (gamma) parameter for RBF kernel
}
model = SVC()
gs = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
gs.fit(X_train, y_train)

# Print the best parameters and corresponding accuracy
print("Best Parameters: ", gs.best_params_)
print("Best Accuracy: {:.2f}%".format(gs.best_score_ * 100))

# Make predictions on the scaled test set using the best model
best_svm_model = gs.best_estimator_
y_pred = best_svm_model.predict(X_test)

# Evaluate the model on the test set
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy: {:.2f}%".format(accuracy * 100))


#for c in regularization_parameters:
#    for kernel in kernels:
#        if kernel == 'rbf':
#            for gamma in gammas:
#                print(f"Kernel {kernel}, regularization parameter: {c}, gamma: {gamma}")
#                model = SVC(C=c, kernel=kernel, gamma=gamma)
#                model.fit(X_train, y_train)
#                y_pred = model.predict(X_test)
#                accuracy = accuracy_score(y_test, y_pred)
#                print("Accuracy: {:.2f}%".format(accuracy * 100))
#        else:
#            gamma = 0.1
#            print(f"Kernel {kernel}, regularization parameter: {c}, gamma: {gamma}")
#            model = SVC(C=c, kernel=kernel, gamma=gamma)
#            model.fit(X_train, y_train)
#            y_pred = model.predict(X_test)
#            accuracy = accuracy_score(y_test, y_pred)
#            print("Accuracy: {:.2f}%".format(accuracy * 100))
# Create an SVM classifier

# Evaluate the model on the test set

In [27]:
def kernel_regression(X_train, y_train, X_test, kernel_function, bandwidth):
    # Compute the kernel matrix
    K = kernel_matrix(X_train, X_train, kernel_function, bandwidth)
    
    # Compute alpha = K^-1 * y
    alpha = np.linalg.solve(K, y_train)
    
    # Compute the kernel matrix between test and training points
    K_star = kernel_matrix(X_test, X_train, kernel_function, bandwidth)
    
    # Compute predictions for the test points: y(z) = k* * alpha
    y_pred = np.dot(K_star, alpha)
    
    return y_pred

def kernel_matrix(X1, X2, kernel_function, bandwidth):
    # Compute distances
    distances = squareform(pdist(X, 'sqeuclidean'))
    # Apply the kernel function to the distances
    kernel_values = kernel_function(distances / bandwidth)
    print(kernel_values)
    
    return kernel_values

def gaussian_kernel(u):
    return np.exp(-0.5 * u**2) / np.sqrt(2 * np.pi)

def epanechnikov_kernel(u):
    return 0.75 * (1 - u**2) * (np.abs(u) <= 1)

def triangular_kernel(u):
    return (1 - np.abs(u)) * (np.abs(u) <= 1)

X, y = prepare_moodle_data("data_moodle.csv")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Perform kernel regression using Gaussian Kernel
bandwidth = 0.2
y_pred_gaussian = kernel_regression(X_train, y_train, X_test, gaussian_kernel, bandwidth)

# Perform kernel regression using Epanechnikov Kernel
y_pred_epanechnikov = kernel_regression(X_train, y_train, X_test, epanechnikov_kernel, bandwidth)

# Perform kernel regression using Triangular Kernel
y_pred_triangular = kernel_regression(X_train, y_train, X_test, triangular_kernel, bandwidth)

# Plot the results
import matplotlib.pyplot as plt

plt.scatter(X_train, y_train, color='black', label='Training data')
plt.plot(X_test, y_pred_gaussian, color='red', label='Kernel Regression (Gaussian)')
plt.plot(X_test, y_pred_epanechnikov, color='blue', label='Kernel Regression (Epanechnikov)')
plt.plot(X_test, y_pred_triangular, color='green', label='Kernel Regression (Triangular)')
plt.title('Kernel Regression with Different Kernels')
plt.legend()
plt.show()

[[0.39894228 0.37993061 0.18264909 ... 0.         0.         0.        ]
 [0.37993061 0.39894228 0.37993061 ... 0.         0.         0.        ]
 [0.18264909 0.37993061 0.39894228 ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.39894228 0.37993061 0.18264909]
 [0.         0.         0.         ... 0.37993061 0.39894228 0.37993061]
 [0.         0.         0.         ... 0.18264909 0.37993061 0.39894228]]


ValueError: solve1: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (m,m),(m)->(m) (size 137 is different from 197)

In [None]:
X, y = prepare_data("data_moodle.csv")
print(X)
print(y)