In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("Ak-new-csv.csv")

In [None]:
df['DATE'] = pd.to_datetime(df["DATE"], format=r"%d/%m/%Y")
df.set_index("DATE", inplace=True)
df.sort_index(inplace=True)

In [None]:
df.info()

In [None]:
mean_t = df['TOTAL'].mean()
std_t = df['TOTAL'].std()

In [None]:
def fillna(value):
    if value != np.nan:
        value = np.random.normal(loc=mean_t, scale=std_t)
    return np.abs(value)

In [None]:
df['TOTAL'] = df['TOTAL'].apply(lambda x: fillna(x))

In [None]:
df.info()

In [None]:
y = df['TOTAL']
fig, ax = plt.subplots(figsize=(20, 6))
ax.plot(y,marker='.', linestyle='-', linewidth=0.5, label='servicely')
ax.plot(y.resample('M').mean(),marker='o', markersize=8, linestyle='-', label='Monthly Mean Resample')
ax.set_ylabel('Total attendance')
ax.legend()

In [None]:
# from sklearn.impute import KNNImputer

# # Use KNNImputer to fill missing values
# imputer = KNNImputer(n_neighbors=5)
# ts_filled = imputer.fit_transform(df['TOTAL'].to_numpy().reshape(-1, 1))
# ts_filled = pd.Series(ts_filled.flatten(), index=df.index)

In [None]:
# df['TOTAL'] = ts_filled

In [None]:
sunday = df[df['Day of Week'] == "Sunday"].copy()
wednesday = df[df['Day of Week'] == "Wednesday"].copy()
friday = df[df['Day of Week'] == "Friday"].copy()

In [None]:
y = sunday['TOTAL']
fig, ax = plt.subplots(figsize=(20, 6))
ax.plot(y,marker='.', linestyle='-', linewidth=0.5, label='Weekly Sunday')
ax.plot(y.resample('M').mean(),marker='o', markersize=8, linestyle='-', label='Monthly Mean Resample')
ax.set_ylabel('Total attendance')
ax.legend()

In [None]:
y = sunday['Amount']
fig, ax = plt.subplots(figsize=(20, 6))
ax.plot(y,marker='.', linestyle='-', linewidth=0.5, label='Weekly sunday')
ax.plot(y.resample('M').mean(),marker='o', markersize=8, linestyle='-', label='Monthly Mean Resample')
ax.set_ylabel('Total Amount')
ax.legend()

In [None]:
y = wednesday['TOTAL']
fig, ax = plt.subplots(figsize=(20, 6))
ax.plot(y,marker='.', linestyle='-', linewidth=0.5, label='Weekly Wednesday')
ax.plot(y.resample('M').mean(),marker='o', markersize=8, linestyle='-', label='Monthly Mean Resample')
ax.set_ylabel('Total attendance')
ax.legend()

In [None]:
y = wednesday['Amount']
fig, ax = plt.subplots(figsize=(20, 6))
ax.plot(y,marker='.', linestyle='-', linewidth=0.5, label='Weekly Wednesday')
ax.plot(y.resample('M').mean(),marker='o', markersize=8, linestyle='-', label='Monthly Mean Resample')
ax.set_ylabel('Total Amount')
ax.legend()

In [None]:
y = friday['TOTAL']
fig, ax = plt.subplots(figsize=(20, 6))
ax.plot(y,marker='.', linestyle='-', linewidth=0.5, label='Weekly Friday')
ax.plot(y.resample('M').mean(),marker='o', markersize=8, linestyle='-', label='Monthly Mean Resample')
ax.set_ylabel('Total attendance')
ax.legend()

In [None]:
y = friday['Amount']
fig, ax = plt.subplots(figsize=(20, 6))
ax.plot(y,marker='.', linestyle='-', linewidth=0.5, label='Weekly Friday')
ax.plot(y.resample('M').mean(),marker='o', markersize=8, linestyle='-', label='Monthly Mean Resample')
ax.set_ylabel('Total Amount')
ax.legend()

### Check for Stationarity

A dataset is stationary if its statistical properties like mean, variance, and autocorrelation do not change over time.

#### Visualization 
This method graphs the rolling statistics (mean and variance) to show at a glance whether the standard deviation changes substantially over time:

In [None]:
### plot for Rolling Statistic for testing Stationarity
def test_stationarity(timeseries, title):
    
    #Determing rolling statistics
    rolmean = pd.Series(timeseries).rolling(window=12).mean() 
    rolstd = pd.Series(timeseries).rolling(window=12).std()
    
    fig, ax = plt.subplots(figsize=(16, 4))
    ax.plot(timeseries, label= title)
    ax.plot(rolmean, label='rolling mean');
    ax.plot(rolstd, label='rolling std (x10)');
    ax.legend()

In [None]:

pd.options.display.float_format = '{:.8f}'.format
y = df['TOTAL']
test_stationarity(y,'raw data')

In [None]:
y = df['Amount']
test_stationarity(y,'raw data(Amount)')

In [None]:
y = sunday['TOTAL']
test_stationarity(y,'raw data(week sunday total attendance)')

In [None]:
y = sunday['Amount']
test_stationarity(y,'raw data(week sunday total amount)')

In [None]:
y = wednesday['TOTAL']
test_stationarity(y,'raw data(week Wednesday total attendance)')

In [None]:
y = wednesday['Amount']
test_stationarity(y,'raw data(week Wednesday total amount)')

In [None]:
y = friday['TOTAL']
test_stationarity(y,'raw data(week Friday total attendance)')

In [None]:
y = friday['Amount']
test_stationarity(y,'raw data(week Friday total amount)')

#### Augmented Dickey-Fuller Test
The ADF approach is essentially a statistical significance test that compares the p-value with the critical values and does hypothesis testing. Using this test, we can determine whether the processed data is stationary or not with different levels of confidence.

In [None]:
# Augmented Dickey-Fuller Test
from statsmodels.tsa.stattools import adfuller

def ADF_test(timeseries, dataDesc):
    print(' > Is the {} stationary ?'.format(dataDesc))
    dftest = adfuller(timeseries.dropna(), autolag='AIC')
    print('Test statistic = {:.3f}'.format(dftest[0]))
    print('P-value = {:.3f}'.format(dftest[1]))
    print('Critical values :')
    for k, v in dftest[4].items():
        print('\t{}: {} - The data is {} stationary with {}% confidence'.format(k, v, 'not' if v<dftest[0] else '', 100-int(k[:-1])))

In [None]:
y = df['TOTAL']
ADF_test(y, "raw data(Total attendabce)")

In [None]:
y = df['Amount']
ADF_test(y, "raw data(Total Amount)")