# No_balancing Method

This notebook will apply the no_balancing balancing method to the training data.

In [2]:

from google.colab import files


uploaded = files.upload()

Saving train.csv to train.csv
Saving val.csv to val.csv


In [3]:
import pandas as pd

df_train = pd.read_csv('train.csv')
X_train = df_train.drop('Diabetes_binary', axis=1)
y_train = df_train['Diabetes_binary']

val_df = pd.read_csv('val.csv')
X_val = val_df.drop('Diabetes_binary', axis=1)
y_val = val_df['Diabetes_binary']

print("Pre-sampling class imbalance:")
print(y_train.value_counts(normalize=True))
print(y_val.value_counts(normalize=True))

Pre-sampling class imbalance:
0.0    0.860664
1.0    0.139336
Name: Diabetes_binary, dtype: float64
0.0    0.860671
1.0    0.139329
Name: Diabetes_binary, dtype: float64


# New Section

# MLP #
## Hyperparameter Optimization ##

In [4]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [5]:
mlp_gs = MLPClassifier(max_iter=100)
parameter_space = {
    'hidden_layer_sizes': [(10,), (20,), (50,), (100,), (150,)],
    'activation': ['logistic', 'tanh', 'relu'],
}
clf = GridSearchCV(mlp_gs, parameter_space, n_jobs=-1, cv=5)
clf.fit(X_train, y_train) # X is train samples and y is the corresponding labels

In [6]:
print("Best parameters found:\n", clf.best_params_)

Best parameters found:
 {'activation': 'relu', 'hidden_layer_sizes': (150,)}


In [10]:
y_pred = clf.predict(X_val)
print(f"results on validation set: {classification_report(y_val, y_pred)}")

results on validation set:               precision    recall  f1-score   support

         0.0       0.87      0.98      0.93     43667
         1.0       0.56      0.13      0.21      7069

    accuracy                           0.86     50736
   macro avg       0.72      0.56      0.57     50736
weighted avg       0.83      0.86      0.83     50736

