In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler

# 1. Sample dataset
data = {
    'Age': [25, np.nan, 35, 45, 32],
    'Salary': [50000, 60000, np.nan, 80000, 72000],
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Female'],
    'City': ['Mumbai', 'Delhi', 'Delhi', 'Mumbai', 'Chennai']
}

df = pd.DataFrame(data)
print("Original Data:\n", df)

# 2. Handle missing values (fill with mean)
df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Salary'].fillna(df['Salary'].mean(), inplace=True)

# 3. Encode categorical features
# Label encoding for 'Gender'
le = LabelEncoder()
df['Gender_encoded'] = le.fit_transform(df['Gender'])

# One-hot encoding for 'City'
df = pd.get_dummies(df, columns=['City'], drop_first=True)

# 4. Create new feature: Salary per Age
df['Salary_per_Age'] = df['Salary'] / df['Age']

# 5. Feature scaling
scaler = StandardScaler()
df[['Age', 'Salary', 'Salary_per_Age']] = scaler.fit_transform(df[['Age', 'Salary', 'Salary_per_Age']])

# Final transformed data
print("\nTransformed Data:\n", df)


Original Data:
     Age   Salary  Gender     City
0  25.0  50000.0    Male   Mumbai
1   NaN  60000.0  Female    Delhi
2  35.0      NaN  Female    Delhi
3  45.0  80000.0    Male   Mumbai
4  32.0  72000.0  Female  Chennai

Transformed Data:
         Age    Salary  Gender  Gender_encoded  City_Delhi  City_Mumbai  \
0 -1.438480 -1.515535    Male               1       False         True   
1  0.000000 -0.537770  Female               0        True        False   
2  0.116634  0.000000  Female               0        True        False   
3  1.671748  1.417758    Male               1       False         True   
4 -0.349901  0.635547  Female               0       False        False   

   Salary_per_Age  
0        0.383449  
1       -0.980034  
2       -0.322927  
3       -0.837447  
4        1.756958  


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Salary'].fillna(df['Salary'].mean(), inplace=True)
