In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE


In [2]:
data=pd.read_csv('diabetes.csv')

In [3]:
X = data[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']]
y = data['Outcome']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [5]:
rf_classifier_before = RandomForestClassifier(random_state=42)
rf_classifier_before.fit(X_train, y_train)
y_pred_before = rf_classifier_before.predict(X_test)
classification_report_before = classification_report(y_test, y_pred_before)

In [6]:
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

In [7]:
rf_classifier_after = RandomForestClassifier(random_state=42)
rf_classifier_after.fit(X_resampled, y_resampled)
y_pred_after = rf_classifier_after.predict(X_test)
classification_report_after = classification_report(y_test, y_pred_after)


In [8]:
print("Classification Report BEFORE SMOTE:")
print(classification_report_before)
print("\nClassification Report AFTER SMOTE:")
print(classification_report_after)

Classification Report BEFORE SMOTE:
              precision    recall  f1-score   support

           0       0.82      0.80      0.81       151
           1       0.64      0.66      0.65        80

    accuracy                           0.75       231
   macro avg       0.73      0.73      0.73       231
weighted avg       0.76      0.75      0.75       231


Classification Report AFTER SMOTE:
              precision    recall  f1-score   support

           0       0.83      0.75      0.79       151
           1       0.60      0.70      0.65        80

    accuracy                           0.74       231
   macro avg       0.71      0.73      0.72       231
weighted avg       0.75      0.74      0.74       231

