In [124]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from pmdarima.arima import auto_arima
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

In [125]:
df = pd.read_excel("AIB_History_Training_2014-2020.xlsx", sheet_name='HistoricalTransactions')

In [126]:
df = df.drop(columns=["FileFolder", "FileName", "Report_Title", "Report_RunDate", "Report_AsOfDate", "Currency", "Institution", "Branch", "TransactionBackdateFlag", "TransactionCode"])
df.head()

Unnamed: 0,Report_TransactionEffectiveDate,TransactionGroup,TransactionAmount
0,2013-12-30,D,-972.619715
1,2013-12-30,D,-75.956876
2,2013-12-30,C,127.43047
3,2013-12-30,D,-38.300469
4,2013-12-30,D,-117.467137


In [127]:
df.set_index("Report_TransactionEffectiveDate", inplace=True)
df.head()

Unnamed: 0_level_0,TransactionGroup,TransactionAmount
Report_TransactionEffectiveDate,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-12-30,D,-972.619715
2013-12-30,D,-75.956876
2013-12-30,C,127.43047
2013-12-30,D,-38.300469
2013-12-30,D,-117.467137


In [128]:
## Create Debit and Credit df
credit_df = df[df["TransactionGroup"] == "C"].groupby(["Report_TransactionEffectiveDate"])["TransactionAmount"].sum().asfreq('D')
debit_df = df[df["TransactionGroup"] == "D"].groupby(["Report_TransactionEffectiveDate"])["TransactionAmount"].sum().asfreq('D')

## Fill missing values
credit_df = credit_df.fillna(method='ffill').fillna(method='bfill')
debit_df = debit_df.fillna(0)

In [129]:
# Find best sarima params
def find_best_sarima_params(data, seasonal=True, m=7):
    model = auto_arima(data, seasonal=seasonal, m=m, stepwise=True, suppress_warnings=True, trace=False)
    return model.order, model.seasonal_order

credit_order, credit_seasonal= find_best_sarima_params(credit_df)
debit_order, debit_seasonal= find_best_sarima_params(debit_df)
print("Credit Order: ", credit_order)
print("Credit Seasonal: ", credit_seasonal)
print("Debit Order: ", debit_order)
print("Debit Seasonal: ", debit_seasonal)

KeyboardInterrupt: 

In [None]:
def fit_arima_model(data, order, seasonal):
    model = SARIMAX(data, order=order, seasonal=seasonal, enforce_stationarity=False, enforce_invertibility=False)
    results = model.fit(disp=False)
    return results

credit_model = fit_arima_model(credit_df, credit_order, credit_seasonal)
debit_model = fit_arima_model(debit_df, debit_order, debit_seasonal)

In [None]:
forecast_steps = 90
credit_forecast = credit_model.forecast(steps=forecast_steps)
debit_forecast = debit_model.forecast(steps=forecast_steps)

In [None]:
forecast_index = pd.date_range(start=credit_df.index[-1] + pd.Timedelta(days=1), periods=forecast_steps, freq='D')
forecasts_df = pd.DataFrame({
    'credit_forecast': credit_forecast,
    'debit_forecast': debit_forecast,
    'total_forecast': credit_forecast + debit_forecast  # Total transaction forecast
}, index=forecast_index)


In [None]:
# Confidence Intervals
credit_ci = credit_model.get_forecast(steps=forecast_steps).conf_int()
debit_ci = debit_model.get_forecast(steps=forecast_steps).conf_int()
forecasts_df['credit_lower_ci'] = credit_ci.iloc[:, 0]
forecasts_df['credit_upper_ci'] = credit_ci.iloc[:, 1]
forecasts_df['debit_lower_ci'] = debit_ci.iloc[:, 0]
forecasts_df['debit_upper_ci'] = debit_ci.iloc[:, 1]

In [None]:
## Evaluate model
def evaluate_model(model, actual_data, label):
    predicted = model.fittedvalues
    residuals = actual_data - predicted
    mae = mean_absolute_error(actual_data, predicted)
    rmse = np.sqrt(mean_squared_error(actual_data, predicted))
    print(f"{label} Model Evaluation: MAE={mae:.2f}, RMSE={rmse:.2f}")
    
    plt.figure(figsize=(12, 5))
    plt.plot(residuals, label=f'{label} Residuals')
    plt.axhline(y=0, color='r', linestyle='--')
    plt.title(f'Residual Plot for {label} Transactions')
    plt.legend()
    plt.show()
    return mae, rmse
    