In [None]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, LabelEncoder

data = {
    "Age": [22, 25, 30, 28, 40, 35, 50, 45, 29, 33],
    "Salary": [25000, 40000, 50000, 42000, 60000, 58000, 72000, 68000, 45000, np.nan],
    "Department": ["HR", "IT", "Finance", "HR", "IT", "Finance", "HR", "Finance", "IT", "HR"],
    "Education": ["Bachelor", "Master", "PhD", "High School", "Master", "Bachelor", "PhD", "Master", "Bachelor", "High School"],
    "Experience": [1, 2, 5, np.nan, 10, 8, 15, np.nan, 4, 6] 
}

df = pd.DataFrame(data)
print("Initial Datasett:\n", df, "\n")

#Min-Max Normalization

scaler = MinMaxScaler()
df_minmax = df.copy()
df_minmax[["Age", "Salary"]] = scaler.fit_transform(df[["Age", "Salary"]])
print("Min-Max Normalization:\n", df_minmax[["Age", "Salary"]], "\n")

#Z-score Normalization
scaler_std = StandardScaler()
df_zscore = df.copy()
df_zscore[["Age", "Salary"]] = scaler_std.fit_transform(df[["Age", "Salary"]])
print("Z-score Normalization:\n", df_zscore[["Age", "Salary"]], "\n")

#RobustScaler
rs = RobustScaler()
df_Robust = df.copy()
df_Robust[["Age","Salary"]] = rs.fit_transform(df[["Age","Salary"]])
print("Robust Sclaer :\n",df_Robust[["Age","Salary"]],"\n")

# Label Encoding (Ordinal)

le = LabelEncoder()
df["Department"] = le.fit_transform(df['Department'])
print("Label Encoding:\n",df[['Age','Salary','Department']])

df_onehot = pd.get_dummies(df, columns=["Department"])
print("One-Hot Encoded Departments:\n", df_onehot.head(), "\n")

print("Missing Values:\n", df.isnull().sum(), "\n")

# Fill missing values

#df["Experience"].fillna(df["Experience"].mean(), inplace=True)
#df["Salary"].fillna(df["Salary"].mean(), inplace=True)

df_fill_mode = df.copy()
df_fill_mode["Department"].fillna(df_fill_mode["Department"].mode()[0], inplace=True)

df_drop = df.dropna()
print("After Filling with Mean/Mode:\n", df_fill_mean, "\n")
print("Dataset size before:", len(df), "after dropping NA:", len(df_drop), "\n")

#  Aggregation

grouped = df.groupby("Department").agg({"Salary": "mean", "Age": "mean"})
print("Mean Salary & Age by Department:\n", grouped, "\n")

# Pivot Table

pivot = pd.pivot_table(df, values="Salary", index="Education", columns="Department", aggfunc="mean")
print("Pivot Table (Salary by Education & Department):\n", pivot, "\n")

# Binning Age (Equal-width)

df["Age_Bin"] = pd.cut(df["Age"], bins=3, labels=["Young", "Middle-aged", "Old"])
print("Age Binning (Equal-width):\n", df[["Age", "Age_Bin"]], "\n")

#  Binning Salary (Equal-frequency)

df["Salary_Bin"] = pd.qcut(df["Salary"].fillna(df["Salary"].mean()), q=3, labels=["Low", "Medium", "High"])
print("Salary Binning (Equal-frequency):\n", df[["Salary", "Salary_Bin"]], "\n")


Initial Dataset:
    Age   Salary Department    Education  Experience
0   22  25000.0         HR     Bachelor         1.0
1   25  40000.0         IT       Master         2.0
2   30  50000.0    Finance          PhD         5.0
3   28  42000.0         HR  High School         NaN
4   40  60000.0         IT       Master        10.0
5   35  58000.0    Finance     Bachelor         8.0
6   50  72000.0         HR          PhD        15.0
7   45  68000.0    Finance       Master         NaN
8   29  45000.0         IT     Bachelor         4.0
9   33      NaN         HR  High School         6.0 

Min-Max Normalization:
         Age    Salary
0  0.000000  0.000000
1  0.107143  0.319149
2  0.285714  0.531915
3  0.214286  0.361702
4  0.642857  0.744681
5  0.464286  0.702128
6  1.000000  1.000000
7  0.821429  0.914894
8  0.250000  0.425532
9  0.392857       NaN 

Z-score Normalization:
         Age    Salary
0 -1.382608 -1.859349
1 -1.028093 -0.791213
2 -0.437235 -0.079121
3 -0.673578 -0.648794
4  0.7