In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Step 1: Load the dataset
df = pd.read_csv('Products.csv')

In [3]:
# Step 2: One-Hot Encode the 'Category' column
df_encoded = pd.get_dummies(df, columns=['Category'])

In [4]:
# Step 3: Detect and remove outliers in the 'Price' column using the IQR method
Q1 = df_encoded['Price'].quantile(0.25)
Q3 = df_encoded['Price'].quantile(0.75)

IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

In [5]:
# Filter out the outliers
df_cleaned = df_encoded[(df_encoded['Price'] >= lower_bound) & (df_encoded['Price'] <= upper_bound)]

In [6]:
# Step 4: Normalize 'Discount' using Min-Max scaling
scaler = MinMaxScaler()
df_cleaned['Discount'] = scaler.fit_transform(df_cleaned[['Discount']])

In [7]:
# Display the cleaned and transformed dataset
print(df_cleaned)

   ProductID  Price  Discount  Category_Clothing  Category_Electronics  \
0        101   1500       0.2              False                  True   
1        102     50       0.6               True                 False   
2        103    800       0.4              False                 False   
3        104   2000       0.0              False                  True   
4        105   3000       0.8              False                 False   
5        106     40       1.0               True                 False   
6        107   5000       0.2              False                  True   

   Category_Furniture  
0               False  
1               False  
2                True  
3               False  
4                True  
5               False  
6               False  
