In [1]:
# train_model.py

# Step 1: Import Necessary Libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
import joblib  # For saving and loading the model

# Step 2: Load and Preprocess the Training Data
# Load the training dataset
training_data = pd.read_csv("sales.csv")

# Basic preprocessing: Convert date column and create features
if 'date' in training_data.columns:
    training_data['date'] = pd.to_datetime(training_data['date'])
    training_data['year'] = training_data['date'].dt.year
    training_data['month'] = training_data['date'].dt.month
    training_data['day'] = training_data['date'].dt.day
    training_data['day_of_week'] = training_data['date'].dt.dayofweek

# Remove unnecessary columns and define target and features
X = training_data.drop(['sales', 'date'], axis=1)
y = training_data['sales']

# Encode categorical variables (if any)
X = pd.get_dummies(X, drop_first=True)

# Step 3: Train the Random Forest Model
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X, y)

# Step 4: Save the model to a file for reuse
joblib.dump(rf, 'random_forest_model.pkl')

# Step 5: Calculate R² Score and RMSE on the Training Data
y_train_pred = rf.predict(X)
expected_r2 = r2_score(y, y_train_pred)
rmse = np.sqrt(mean_squared_error(y, y_train_pred))

# Print the metrics
print(f"Expected R² Score on Training Data: {expected_r2}")
print(f"Root Mean Squared Error (RMSE) on Training Data: {rmse}")

# Step 6: Save R² Score and RMSE to a File
with open("Expected_R2_Score_and_RMSE.txt", "w") as file:
    file.write(f"Expected R² Score: {expected_r2}\n")
    file.write(f"RMSE: {rmse}\n")

print(f"R² Score: {expected_r2}")
print(f"RMSE: {rmse}")
print("Model training completed and saved to 'random_forest_model.pkl'. R² and RMSE saved to 'Expected_R2_Score_and_RMSE.txt'.")


Expected R² Score on Training Data: 0.9929936375715187
Root Mean Squared Error (RMSE) on Training Data: 322.37221609097196
R² Score: 0.9929936375715187
RMSE: 322.37221609097196
Model training completed and saved to 'random_forest_model.pkl'. R² and RMSE saved to 'Expected_R2_Score_and_RMSE.txt'.
