In [7]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None

import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb
import pickle

In [8]:
class MealPredictionSystem:
    def __init__(self, data_path='sources/meal_counts.csv'):
        self.data = pd.read_csv(data_path)
        self.data['date'] = pd.to_datetime(self.data['date'])
        self.scaler = StandardScaler()
        
    def prepare_features(self, target='lunch_meals'):
        """Prepare features for ML models."""
        features = [
            'bus_riders', 'bus_riders_1days_ago', 'bus_riders_2days_ago', 'bus_riders_3days_ago',
            'lunch_meals_1days_ago', 'lunch_meals_2days_ago', 'lunch_meals_3days_ago',
            'dinner_meals_1days_ago', 'dinner_meals_2days_ago', 'dinner_meals_3days_ago',
            'day_of_week', 'month'
        ]
        
        X = self.data[features]
        y = self.data[target]
        
        # Convert day_of_week to cyclical features
        X['day_sin'] = np.sin(2 * np.pi * X['day_of_week']/7)
        X['day_cos'] = np.cos(2 * np.pi * X['day_of_week']/7)
        
        # Convert month to cyclical features
        X['month_sin'] = np.sin(2 * np.pi * X['month']/12)
        X['month_cos'] = np.cos(2 * np.pi * X['month']/12)
        
        # Drop original day_of_week and month
        X = X.drop(['day_of_week', 'month'], axis=1)
        
        return X, y
    
    def train_xgboost(self, target='lunch_meals', is_load=False):
        """Train XGBoost model."""
        X, y = self.prepare_features(target)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        model = None
        
        if is_load and os.path.isfile(f'models/{target}_xg.pkl'):
            model = pickle.load(f'models/{target}_xg.pkl')
        else:
            model = xgb.XGBRegressor(
                n_estimators=100,
                learning_rate=0.1,
                max_depth=5,
                random_state=42
            )
        
        model.fit(X_train, y_train)
        pickle.dump(model, open(f'models/{target}_xg.pkl', 'wb'))
        
        # Make predictions
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        metrics = {
            'mae': mean_absolute_error(y_test, y_pred),
            'rmse': np.sqrt(mean_squared_error(y_test, y_pred)),
            'r2': r2_score(y_test, y_pred)
        }
        
        return model, metrics

In [None]:
predictor1 = MealPredictionSystem("sources/lunch_meal_counts.csv")
predictor2 = MealPredictionSystem("sources/dinner_meal_counts.csv")

# Train and evaluate XGBoost
print("Training XGBoost model...")
xgb_model, xgb_metrics = predictor1.train_xgboost(target='lunch_meals')
print("XGBoost Metrics:", xgb_metrics)

xgb_model, xgb_metrics = predictor2.train_xgboost(target='dinner_meals')
print("XGBoost Metrics:", xgb_metrics)

    
print("Train completed")











Training XGBoost model...
XGBoost Metrics: {'mae': 24.6026611328125, 'rmse': 31.604712451169267, 'r2': 0.8338130712509155}
XGBoost Metrics: {'mae': 3.514173746109009, 'rmse': 4.348558358018301, 'r2': 0.3242550492286682}










Train completed
