In [None]:
import pandas as pd
import numpy as np
import os
import glob
import math
import joblib
from datetime import timedelta
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor

print('Fetching CSV files...')

# === 1. LOAD & AGGREGATE CSV FILES ===
folder = r'C:\Users\Dree\Desktop\ITM_project\ACTUAL CODING\FOR_PREDICTION'
csv_files = glob.glob(os.path.join(folder, '*.csv'))
csv_list = [pd.read_csv(file) for file in csv_files]
agg_csv = pd.concat(csv_list, ignore_index=True)

# === 2. CLEAN & GROUP RAW DATA ===
agg_csv = agg_csv[[
    'order_date_time', 'category', 'barcode', 'product',
    'pc_quantity', 'price', 'total_product_price'
]].copy()

agg_csv['order_date'] = pd.to_datetime(agg_csv['order_date_time']).dt.floor('D')
agg_csv = (
    agg_csv.drop(columns='order_date_time')
           .groupby(['barcode', 'product', 'price', 'category', 'order_date'], as_index=False)
           .agg({'pc_quantity': 'sum', 'total_product_price': 'sum'})
)

# === 3. FEATURE ENGINEERING ===
agg_csv['day_of_week'] = agg_csv['order_date'].dt.dayofweek
agg_csv['if_weekend'] = agg_csv['day_of_week'] >= 5
agg_csv['month'] = agg_csv['order_date'].dt.month

agg_csv = agg_csv.sort_values(by=['barcode', 'order_date'])

agg_csv['sales_last_order'] = agg_csv.groupby('barcode')['total_product_price'].shift(1)

agg_csv['sales_last_3orders'] = (
    agg_csv.groupby('barcode')['total_product_price']
           .shift(1)
           .rolling(window=3, min_periods=1)
           .sum()
           .reset_index(level=0, drop=True)
)

agg_csv['rolling_mean_3orders'] = (
    agg_csv.groupby('barcode')['total_product_price']
           .shift(1)
           .rolling(window=3, min_periods=1)
           .mean()
           .reset_index(level=0, drop=True)
)

agg_csv['sales_diff_prev_order'] = (
    agg_csv['total_product_price'] - agg_csv['sales_last_order']
)

agg_csv['category_label'] = LabelEncoder().fit_transform(agg_csv['category'])

agg_csv = agg_csv.fillna(0)

# === 4. BUILD FINAL INPUT FOR PREDICTION ===
predict_csv = agg_csv[[
    'barcode', 'product', 'price', 'category_label', 'order_date',
    'day_of_week', 'if_weekend', 'month',
    'sales_last_order', 'sales_last_3orders',
    'rolling_mean_3orders', 'sales_diff_prev_order'
]].copy()

predict_csv = (
    predict_csv.loc[predict_csv.groupby('barcode')['order_date'].idxmax()]
                 .reset_index(drop=True)
)

# === 5. LOAD MODEL & RUN PREDICTIONS ===
print('Loading model...')
model = joblib.load('SKU-prediction-model.joblib')
print('Model loaded!')

forecast_days = 7
future_preds = []

for _, row in predict_csv.iterrows():
    try:
        sku_preds = []
        prediction_date = row['order_date']
        prev_sales = row['sales_last_order']

        sales_history = [
            row['sales_last_order'],
            row['sales_last_3orders'] - row['sales_last_order'],
            row['rolling_mean_3orders']
        ]

        for _ in range(forecast_days):
            prediction_date += timedelta(days=1)
            day_of_week = prediction_date.dayofweek
            is_weekend = day_of_week >= 5
            month = prediction_date.month

            X_input = pd.DataFrame([{
                'price': row['price'],
                'category_label': row['category_label'],
                'day_of_week': day_of_week,
                'if_weekend': is_weekend,
                'month': month,
                'sales_last_order': prev_sales,
                'sales_last_3orders': sum(sales_history),
                'rolling_mean_3orders': np.mean(sales_history),
                'sales_diff_prev_order': (
                    prev_sales - sales_history[-2] if len(sales_history) > 1 else 0
                ),
            }])

            predicted_sales = max(0, math.ceil(model.predict(X_input)[0]))

            sku_preds.append({
                'barcode': row['barcode'],
                'product': row['product'],
                'prediction_date': prediction_date,
                'predicted_inventory': predicted_sales
            })

            sales_history = sales_history[-2:] + [predicted_sales]
            prev_sales = predicted_sales

        future_preds.extend(sku_preds)
        print(f"Finished SKU: {row['barcode']}")

    except Exception as e:
        print(f"Error on SKU {row['barcode']}: {e}")

# === 6. OUTPUT FINAL PREDICTIONS ===
future_df = pd.DataFrame(future_preds)

output = (
    future_df.groupby(['barcode', 'product'], as_index=False)[['predicted_inventory']]
             .sum()
             .copy()
)

# === 7. EXPORT TO CSV ===
output_path = r'C:\Users\Dree\Desktop\ITM_project\ACTUAL CODING\predicted_inventory.csv'
output.to_csv(output_path, index=False)
print(f"Predictions saved to: {output_path}")