In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import make_scorer, accuracy_score, hamming_loss

# Load the data
file_path_input = r"C:\Users\20223560\OneDrive - TU Eindhoven\Documents\BMT 2e-jaars\advanced programming\group assignment\sorted_tested_molecules.csv"  # replace with your actual file path
data_input = pd.read_csv(file_path_input)

# Separate features and labels
X = data_input.iloc[:, :-2].values  # all rows, all columns except the last two
y = data_input[['PKM2_inhibition', 'ERK2_inhibition']].values  # combine labels into a matrix

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.75, random_state=42)

# Define the k-NN model
knn = KNeighborsClassifier()
multi_output_knn = MultiOutputClassifier(knn, n_jobs=-1)

# Determine the best k using GridSearchCV
param_grid = {'estimator__n_neighbors': np.arange(1, 21)}
grid_search = GridSearchCV(multi_output_knn, param_grid, cv=5, scoring=make_scorer(hamming_loss, greater_is_better=False))
grid_search.fit(X_train, y_train)

best_k = grid_search.best_params_['estimator__n_neighbors']
print(f"Best value for k: {best_k}")

# Use the best k to define the k-NN model
best_knn = KNeighborsClassifier(n_neighbors=best_k)
best_multi_output_knn = MultiOutputClassifier(best_knn, n_jobs=-1)

# Train the model with the best k
best_multi_output_knn.fit(X_train, y_train)

# Make predictions
y_pred = best_multi_output_knn.predict(X_test)
df_y_pred = pd.DataFrame(y_pred, columns=['Prediction_PKM2_inhibition', 'Prediction_ERK2_inhibition'])

# Calculate accuracy based on both predictions being correct
accuracy_both_correct = np.mean((y_pred == y_test).all(axis=1))

print(f"Accuracy for both PKM2 inhibition and ERK2 inhibition: {accuracy_both_correct:.10f}")

Best value for k: 4
Accuracy for both PKM2 inhibition and ERK2 inhibition: 0.9295101553


Unnamed: 0,Prediction_PKM2_inhibition,Prediction_ERK2_inhibition
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
