In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib

def load_data(file_path):
    """Loads the processed dataset."""
    return pd.read_csv(file_path)

def split_data(df):
    """Splits the dataset into training and testing sets."""
    X = df.drop(columns=['insurance_premium_pred'])
    y = df['insurance_premium_pred']
    return train_test_split(X, y, test_size=0.2, random_state=42)

def train_model(X_train, y_train):
    """Trains a Random Forest Regressor model."""
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

def evaluate_model(model, X_test, y_test):
    """Evaluates the trained model."""
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    print(f"Mean Absolute Error: {mae}")
    print(f"Mean Squared Error: {mse}")
    return mae, mse

def save_model(model, file_path):
    """Saves the trained model."""
    joblib.dump(model, file_path)

if __name__ == "__main__":
    file_path = "../data/processed_data.csv"
    df = load_data(file_path)
    X_train, X_test, y_train, y_test = split_data(df)
    model = train_model(X_train, y_train)
    evaluate_model(model, X_test, y_test)
    save_model(model, "../models/insurance_model.pkl")
    print("Model training completed and saved.")
