In [21]:
import pandas as pd
from sklearn.utils import resample

df = pd.DataFrame({
    'age':[22,25,27,28,30,35,40,45,50,55,60,65,70],
    'income':[2000,2500,2700,3200,3800,4000,4200,4300,4500,5000,5500,6000,7000],
    'class':['high','low','low','high','high','low','high','high','low','high','high','low','high']
})
df

Unnamed: 0,age,income,class
0,22,2000,high
1,25,2500,low
2,27,2700,low
3,28,3200,high
4,30,3800,high
5,35,4000,low
6,40,4200,high
7,45,4300,high
8,50,4500,low
9,55,5000,high


In [43]:
#Seperate Majority and Minority Classes

df_high = df[df['class'] == 'high']
df_low = df[df['class'] == 'low']



In [49]:
#DownSampling the Majority Class
df_high_downSampled = resample(df_high,replace=False,n_samples=len(df_low),random_state = 42)
df_balanced = pd.concat([df_high_downSampled,df_low])

In [51]:
df_balanced

Unnamed: 0,age,income,class
3,28,3200,high
9,55,5000,high
0,22,2000,high
12,70,7000,high
4,30,3800,high
1,25,2500,low
2,27,2700,low
5,35,4000,low
8,50,4500,low
11,65,6000,low


In [55]:
#UpSampling the Minority Class
df_low_upSampled = resample(df_low,replace=True,n_samples=len(df_high),random_state = 42)
df_balanced1 = pd.concat([df_low_upSampled,df_high])
df_balanced1

Unnamed: 0,age,income,class
8,50,4500,low
11,65,6000,low
5,35,4000,low
11,65,6000,low
11,65,6000,low
2,27,2700,low
5,35,4000,low
5,35,4000,low
0,22,2000,high
3,28,3200,high


Smode(Synthetic Minority Over-Sampling Technique)


In [70]:
import pandas as pd
from sklearn.utils import resample
from imblearn.over_sampling import SMOTE

# Sample Dataset
df = pd.DataFrame({
    'Age' : [22, 25, 27, 28, 30, 35, 40, 45, 50, 55, 60, 65, 70],
    'Income' : [2000, 2500, 2700, 3200, 3500, 3800, 4000, 4200, 4300, 4500, 5000, 5500, 6000], 
    'Class' : ['Minority', 'Majority', 'Majority', 'Majority', 'Majority', 
               'Minority', 'Minority', 'Minority', 'Majority', 'Majority', 
               'Majority', 'Majority',  'Majority']
})

In [77]:
#Step 1 : Convert Catrgorical labels to numerical values
df['Class'] = df['Class'].map({'Majority' : 0,'Minority' : 1})

In [79]:
#Step 2 : Split Features (X) and target variable (Y)
x = df[['Age','Income']]
y = df['Class']



In [85]:
#Step 3 : Apply SMOTE with k_neighbours = 3
smote = SMOTE(sampling_strategy = 'auto',random_state = 42,k_neighbors = 3)
x_resampled,y_resampled = smote.fit_resample(x,y)

In [87]:
#Step 4 : Convert numeric labels back to categorical
y_resampled = y_resampled.map({0 : 'Majority',1 : 'Minority'})

In [None]:
#Step 5 : Combine the resampled dataset
df_balanced = pd.concat([pd.DataFrame(x_resampled,columns=['Age','Income'])])