In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# !pip install imblearn
from imblearn.over_sampling import SMOTE # Synthetic Minority Oversampling Technique
from imblearn.datasets import make_imbalance

In [3]:
from sklearn.datasets import load_iris

In [4]:
iris_Data = load_iris()

In [5]:
!python --version

Python 3.10.11


In [6]:
X = iris_Data.data
y = iris_Data.target

In [7]:
from collections import Counter

In [8]:
Counter(y)

Counter({0: 50, 1: 50, 2: 50})

In [9]:
np.unique(y, return_counts=True)

(array([0, 1, 2]), array([50, 50, 50], dtype=int64))

In [10]:
X_imbalanced, y_imbalanced = make_imbalance(X, y,
                                            random_state = 666,
                                            sampling_strategy={
                                                0: 10,
                                                1: 20,
                                                2: 50
                                            })

In [11]:
Counter(y_imbalanced)

Counter({2: 50, 1: 20, 0: 10})

In [12]:
smote = SMOTE(n_jobs=-1, random_state=666)

In [13]:
X_balanced, y_balanced = smote.fit_resample(X_imbalanced, y_imbalanced, )



In [14]:
from sklearn.metrics import classification_report

from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

In [15]:
from sklearn.model_selection import train_test_split

In [16]:
X_train_imb, X_test_imb, y_train_imb, y_test_imb = train_test_split(X_imbalanced, y_imbalanced,
                                                                    test_size=0.2,
                                                                    random_state=666,
                                                                    stratify=y_imbalanced)

In [17]:
imb_dtree = DecisionTreeClassifier()
imb_logistic = LogisticRegression()

In [19]:
imb_dtree.fit(X_train_imb, y_train_imb)
imb_logistic.fit(X_train_imb, y_train_imb)

In [21]:
y_pred = imb_dtree.predict(X_test_imb)
print(classification_report(y_test_imb, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         4
           2       1.00      1.00      1.00        10

    accuracy                           1.00        16
   macro avg       1.00      1.00      1.00        16
weighted avg       1.00      1.00      1.00        16



In [22]:
y_pred = imb_logistic.predict(X_test_imb)
print(classification_report(y_test_imb, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         4
           2       1.00      1.00      1.00        10

    accuracy                           1.00        16
   macro avg       1.00      1.00      1.00        16
weighted avg       1.00      1.00      1.00        16



In [23]:
X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced,
                                                       test_size=0.2,
                                                       random_state=666,
                                                       stratify=y_balanced)

In [24]:
bal_logistic = LogisticRegression()
bal_dtree = DecisionTreeClassifier()

In [25]:
bal_logistic.fit(X_train, y_train)
bal_dtree.fit(X_train, y_train)

In [27]:
y_pred = bal_dtree.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.83      1.00      0.91        10
           2       1.00      0.80      0.89        10

    accuracy                           0.93        30
   macro avg       0.94      0.93      0.93        30
weighted avg       0.94      0.93      0.93        30



In [28]:
y_pred = bal_logistic.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00        10
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

