In [55]:
import pandas as pd
import warnings

In [57]:
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv("Processed Data/product_sales.csv")

In [None]:
products = pd.read_csv("Processed Data/products.csv")

In [None]:
from statsmodels.tsa.arima.model import ARIMA

forecasts2 = []

for product in products[products["UnsoldDuration"] == 0]['StockCode']:
    data = df[df['StockCode'] == product][['YearMonth', 'Count']]


    data = data.rename(columns={'YearMonth': 'ds', 'Count': 'y'}).set_index('ds')
    if len(data) < 2:
        continue

    try:

        model = ARIMA(data['y'], order=(2, 1, 1)) 
        fitted_model = model.fit()

        forecast = fitted_model.forecast(steps=6)
        
        start_date = data.index[-1]
        future_dates = pd.date_range(start=start_date, periods=7, freq='MS')[1:] 
        
        if len(future_dates) != len(forecast):
            print(f"Error: Future dates length mismatch for product {product}: {len(future_dates)} vs {len(forecast)}")

        forecast_df = pd.DataFrame({
            'ds': future_dates,
            'yhat': forecast,
            'StockCode': product
        })

        forecasts2.append(forecast_df)
    except Exception as e:
        print(f"Error processing product {product}: {e}")

forecasts_df = pd.concat(forecasts2, ignore_index=True)

Error processing product 21264: LU decomposition error.
Error processing product 23021: LU decomposition error.
Error processing product 23033: LU decomposition error.
Error processing product 23034: LU decomposition error.
Error processing product 23119: LU decomposition error.
Error processing product 23123: LU decomposition error.
Error processing product 23216: LU decomposition error.
Error processing product 23242: LU decomposition error.
Error processing product 23290: LU decomposition error.
Error processing product 23307: LU decomposition error.
Error processing product 23336: LU decomposition error.
Error processing product 23428: too many indices for array: array is 0-dimensional, but 1 were indexed
Error processing product 23429: too many indices for array: array is 0-dimensional, but 1 were indexed
Error processing product 23430: too many indices for array: array is 0-dimensional, but 1 were indexed
Error processing product 23440: too many indices for array: array is 0-dime

In [112]:
forecast_data = pd.concat(forecasts2, ignore_index=True)
forecast_data['yhat'] = forecast_data['yhat'].round(0)

In [113]:
df['YearMonth'] = pd.to_datetime(df['YearMonth'])

In [129]:
df = df.drop_duplicates(subset=['StockCode', 'YearMonth'])

In [131]:
forecast_data['ds'] = pd.to_datetime(forecast_data['ds'])

In [132]:
data = pd.merge(
    forecast_data,
    df,
    left_on=['StockCode', 'ds'],
    right_on=['StockCode', 'YearMonth'],
    how='outer'
)

In [None]:
def process_stock_data(stock_data):
    stock_data['YearMonth'] = stock_data['YearMonth'].combine_first(stock_data['ds'])
    stock_data['Count'] = stock_data['Count'].combine_first(stock_data['yhat'])


    stock_data['ProductName'] = stock_data['ProductName'].fillna(method='ffill').fillna(method='bfill')
    
    return stock_data

processed_data = data.groupby('StockCode').apply(process_stock_data).reset_index(drop=True)

In [None]:
def check_subset_validity(group):
    if group['ds'].isna().all():  
        return False  
    else:
        return True 

forecasted_products = processed_data.groupby('StockCode').apply(check_subset_validity)

In [135]:
processed_data['is_valid'] = processed_data['StockCode'].map(forecasted_products)

In [136]:
final = processed_data[processed_data['is_valid']]

In [137]:
final = final.drop(columns=["ds", "yhat", "is_valid"])

In [None]:
final['forecast'] = final['YearMonth'].apply(lambda x: 0 if x < pd.Timestamp('2012-01-01') else 1)

In [141]:
import plotly.express as px

product = final[final['StockCode']=="85123A"]
name = product["ProductName"].iloc[0]

fig = px.line(product, x="YearMonth", y="Count", color="forecast", title=name+" Sales")
fig.update_xaxes(title="")
fig.update_yaxes(title="")

