In [1]:
#Import Libraries
import pandas as pd
#A powerful data manipulation and analysis library.
from statsmodels.tsa.arima.model import ARIMA
#A class from the statsmodels library used for time series forecasting.

# Load the data
data = pd.read_csv("Duplicated_Billing_Data_1_randomized.csv")
#The data is loaded from a CSV file named "Duplicated_Billing_Data_1_randomized.csv" into a pandas DataFrame called data.

# Convert date columns to datetime format
data['BillingPeriodEndDate'] = pd.to_datetime(data['BillingPeriodEndDate'])
#The 'BillingPeriodEndDate' column, which contains date information, 
# is converted to datetime format to facilitate time series operations.

# Set the date column as index
data.set_index('BillingPeriodEndDate', inplace=True)
#The 'BillingPeriodEndDate' column is set as the index of the DataFrame. 
# This is essential for time series analysis as it allows pandas to recognize 
# and handle the data in a time-series format.

# Resample data to monthly and sum the TotalCost
monthly_data = data['TotalCost'].resample('M').sum()
#The data is resampled to monthly frequency using the 'M' alias,
# and the sum of 'TotalCost' for each month is calculated.
# This step aggregates the total cost on a monthly basis.

# Fit ARIMA model
model = ARIMA(monthly_data, order=(1, 1, 1))  # Example order, you may need to adjust
model_fit = model.fit()
#An ARIMA model is created with the specified order (1, 1, 1). 
# The parameters (p, d, q) are:
#p: Number of lag observations included in the model (AR part).
#d: Number of times that the raw observations are differenced (I part).
#q: Size of the moving average window (MA part).

# Forecast next month's cost
forecast = model_fit.forecast(steps=1)
#The forecast method is used to predict future values.
# steps=1 indicates that we are forecasting the cost for the next month.

# Print the forecasted cost for the next month
next_month = monthly_data.index[-1] + pd.DateOffset(months=1)
print(f"Predicted cost for {next_month.strftime('%B %Y')}: ${forecast[0]}")
#next_month: Calculates the next month's date by taking the last date in the monthly_data index and adding one month using pd.DateOffset.
#print(...): Prints the forecasted cost for the next month in a readable format, showing the predicted cost and the corresponding month and year.


#ARIMA
#ARIMA, which stands for AutoRegressive Integrated Moving Average, is a popular statistical method used for analyzing and forecasting time series data. It combines three components:

#AutoRegressive (AR) Component:
#Uses the dependency between an observation and a number of lagged (previous) observations.

#Integrated (I) Component:
#Uses differencing of raw observations to make the time series stationary (i.e., removing trends or seasonality).

#Moving Average (MA) Component:
#Uses dependency between an observation and a residual error from a moving average model applied to lagged observations.



  monthly_data = data['TotalCost'].resample('M').sum()


Predicted cost for July 2024: $14.975900244194577


  print(f"Predicted cost for {next_month.strftime('%B %Y')}: ${forecast[0]}")
