In [None]:
# Machine Learning Fundamentals Demo
# Covering Encoding, Regularization, Normalization, and Loss Functions

# Import necessary libraries
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.linear_model import Lasso, Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.datasets import load_iris, load_diabetes
from sklearn.model_selection import train_test_split
import pandas as pd

## 1. Encoding Techniques

### Real-life Example: Customer Data Processing
# - Label Encoding: Convert country names to numbers for a survey analysis
# - One Hot Encoding: Create separate columns for product categories in e-commerce data

# Load Iris dataset (contains categorical target)
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = iris.target_names[iris.target]

print("Original Iris Dataset:")
print(iris_df.head())

### Label Encoding Example
# Convert species names to numbers
le = LabelEncoder()
iris_df['species_encoded'] = le.fit_transform(iris_df['species'])

print("\nAfter Label Encoding:")
print(iris_df[['species', 'species_encoded']].head())

### One Hot Encoding Example
# Create binary columns for each species
ohe = OneHotEncoder(sparse_output=False)
species_ohe = ohe.fit_transform(iris_df[['species']])
ohe_df = pd.DataFrame(species_ohe, columns=ohe.get_feature_names_out(['species']))

print("\nOne Hot Encoded Species:")
print(ohe_df.head())

## 2. Normalization Techniques

### Real-life Example: Medical Data Analysis
# - Normalization: Scale patient cholesterol levels (0-1 range)
# - Standardization: Normalize blood pressure readings for comparison

# Load Diabetes dataset
diabetes = load_diabetes()
diabetes_df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)

### Normalization (Min-Max Scaling)
scaler_minmax = MinMaxScaler()
normalized_data = scaler_minmax.fit_transform(diabetes_df)
normalized_df = pd.DataFrame(normalized_data, columns=diabetes.feature_names)

print("\nNormalized Data (0-1 range):")
print(normalized_df.describe().loc[['min', 'max']])

### Standardization (Z-Score)
scaler_std = StandardScaler()
standardized_data = scaler_std.fit_transform(diabetes_df)
standardized_df = pd.DataFrame(standardized_data, columns=diabetes.feature_names)

print("\nStandardized Data (mean=0, std=1):")
print(standardized_df.describe().loc[['mean', 'std']])

## 3. Regularization Techniques

### Real-life Example: Housing Price Prediction
# - Ridge Regression: Prevent overfitting when predicting prices from many features
# - Lasso Regression: Automatically select important features in economic data

# Prepare diabetes data for modeling
X = diabetes.data
y = diabetes.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Ridge Regression (L2 Regularization)
ridge = Ridge(alpha=0.5)
ridge.fit(X_train, y_train)
ridge_pred = ridge.predict(X_test)

print("\nRidge Regression Coefficients:")
print(ridge.coef_)

### Lasso Regression (L1 Regularization)
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)
lasso_pred = lasso.predict(X_test)

print("\nLasso Regression Coefficients:")
print(lasso.coef_)

## 4. Loss Functions

### Real-life Example: Weather Prediction
# - MAE: Average error in temperature forecasts
# - MSE: Punishes large errors in rainfall prediction

### Mean Absolute Error (MAE)
ridge_mae = mean_absolute_error(y_test, ridge_pred)
lasso_mae = mean_absolute_error(y_test, lasso_pred)

print(f"\nRidge MAE: {ridge_mae:.2f}")
print(f"Lasso MAE: {lasso_mae:.2f}")

### Mean Squared Error (MSE)
ridge_mse = mean_squared_error(y_test, ridge_pred)
lasso_mse = mean_squared_error(y_test, lasso_pred)

print(f"\nRidge MSE: {ridge_mse:.2f}")
print(f"Lasso MSE: {lasso_mse:.2f}")
