# ==================================================
# NOTEBOOK: 04_template_model.ipynb
* DESCRIPTION: Template for building Machine Learning Models
# INSTRUCTIONS:
* 1. Duplicate this notebook.
* 2. Rename it to: model_YourModelName.ipynb (e.g., model_RandomForest.ipynb)
* 3. Fill in the empty slots below.
# ===================================================

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score, mean_absolute_error
import joblib

# --------------------------------------------------------
# 1. LOAD THE GOLDEN DATA
# --------------------------------------------------------
# We use the processed data directly. No need to clean again.
print("Loading Golden Data...")
X_train = pd.read_csv('../data/X_train.csv')
X_test = pd.read_csv('../data/X_test.csv')
y_train = pd.read_csv('../data/y_train.csv').values.ravel()
y_test = pd.read_csv('../data/y_test.csv').values.ravel()

print(f"Training Data Shape: {X_train.shape}")
print(f"Testing Data Shape:  {X_test.shape}")

# --------------------------------------------------------
# 2. DEFINE YOUR MODEL
# --------------------------------------------------------
# Import your specific model here.
# Example: from sklearn.linear_model import LinearRegression
# Example: from sklearn.ensemble import RandomForestRegressor

# TODO: UNCOMMENT AND CHANGE THE LINE BELOW FOR YOUR MODEL
# from sklearn.linear_model import LinearRegression 
# model = LinearRegression()

print(f"Model Selected: {model}")

# --------------------------------------------------------
# 3. TRAIN THE MODEL
# --------------------------------------------------------
print("\nTraining the model... Please wait.")
model.fit(X_train, y_train)
print("Training Complete! âœ…")

# --------------------------------------------------------
# 4. EVALUATE PERFORMANCE
# --------------------------------------------------------
# Predict on Test Data (The Exam)
y_pred = model.predict(X_test)

# Calculate Scores
accuracy = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("\n----------------------------------")
print(f"MODEL PERFORMANCE REPORT")
print("----------------------------------")
print(f"Accuracy (R2 Score): {accuracy * 100:.2f}%")
print(f"Avg Error (MAE):     {mae:.2f}")
print("----------------------------------")


# --------------------------------------------------------
# 5. VISUALIZE RESULTS
# --------------------------------------------------------
# A perfect prediction would fall on the Red Line.
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, alpha=0.6, color='blue')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', linewidth=2)
plt.xlabel('Actual Charges')
plt.ylabel('Predicted Charges')
plt.title('Actual vs Predicted (Closer to Red Line is Better)')
plt.show()

# --------------------------------------------------------
# 6. SAVE YOUR MODEL (Optional)
# --------------------------------------------------------
# joblib.dump(model, '../models/my_model_name.pkl')
# print("Model saved successfully!")