In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np

# Load dataset
df = pd.read_csv("employee.csv")

# Split into features and target
y = df['salary']
X = df.drop(columns=['salary'])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define numerical columns to scale
num_cols = ['job_years', 'hours_per_week', 'telecommute_days_per_week']

### -------------------------------
### OPTION 1: STANDARDIZATION
### -------------------------------
scaler_std = StandardScaler()
scaler_std.fit(X_train[num_cols])  # Fit on train only
X_train_std = X_train.copy()
X_test_std = X_test.copy()
X_train_std[num_cols] = scaler_std.transform(X_train[num_cols])
X_test_std[num_cols] = scaler_std.transform(X_test[num_cols])

# Train & predict
reg_std = LinearRegression()
reg_std.fit(X_train_std, y_train)
y_pred_std = reg_std.predict(X_test_std)

# Evaluate
mse_std = mean_squared_error(y_test, y_pred_std) / np.mean(y_test)
print("Standardization MSE:", mse_std)

### -------------------------------
### OPTION 2: NORMALIZATION
### -------------------------------
scaler_norm = MinMaxScaler()
scaler_norm.fit(X_train[num_cols])  # Fit on train only
X_train_norm = X_train.copy()
X_test_norm = X_test.copy()
X_train_norm[num_cols] = scaler_norm.transform(X_train[num_cols])
X_test_norm[num_cols] = scaler_norm.transform(X_test[num_cols])

# Train & predict
reg_norm = LinearRegression()
reg_norm.fit(X_train_norm, y_train)
y_pred_norm = reg_norm.predict(X_test_norm)

# Evaluate
mse_norm = mean_squared_error(y_test, y_pred_norm) / np.mean(y_test)
print("Normalization MSE:", mse_norm)
