In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
from imblearn.under_sampling import RandomUnderSampler
import matplotlib.pyplot as plt

ddi = pd.read_csv("drugbank_with_features.csv", sep="\t")

top_classes = ddi['Y'].value_counts().nlargest(5).index
ddi = ddi[ddi['Y'].isin(top_classes)].copy()

label_encoder = LabelEncoder()
ddi['category_encoded'] = label_encoder.fit_transform(ddi['Y'])

feature_columns = [
    'MolWt_X1', 'LogP_X1', 'NumHDonors_X1', 'NumHAcceptors_X1', 'TPSA_X1',
    'MolWt_X2', 'LogP_X2', 'NumHDonors_X2', 'NumHAcceptors_X2', 'TPSA_X2'
]
X = ddi[feature_columns].values
y = ddi['category_encoded'].values

split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_idx, test_idx in split.split(X, y):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

rus = RandomUnderSampler(random_state=42)
X_train_bal, y_train_bal = rus.fit_resample(X_train, y_train)

scaler = StandardScaler()
X_train_bal = scaler.fit_transform(X_train_bal)
X_test = scaler.transform(X_test)

mlp = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    max_iter=300,
    solver='adam',
    random_state=42,
    early_stopping=True
)

mlp.fit(X_train_bal, y_train_bal)

y_pred = mlp.predict(X_test)

target_names = [str(cls) for cls in label_encoder.classes_]
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=target_names, digits=2))