# Data Load
Notebook that showcases the required steps to explore and manipulate the data

## Import Libraries

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import sys

sys.path.append("../src")
import ts_preprocess as edp
import adhoc
import ts_plots, ts_preprocess

pd.set_option('display.max_rows', 500)

## Data Preparation

### Read Data Files

In [None]:
all_files = adhoc.adhoc_read_data()

In [None]:
all_files = adhoc.adhoc_merge_columns(all_files)

In [None]:
ts_df = ts_preprocess.create_timeseries_series(all_files, ['Time','CUB.Share.top.8'], 'W')

## Data Visualisation

In [None]:
ts_preprocess.validate_datetime_index(ts_df)

In [None]:
ts_plots.plot_timeseries_columns(all_files)

In [None]:
ts_plots.plot_timeseries_columns(ts_df)

**Observations**
* Slightly Positive trend throughout time
* Seasonal pattern observed with peaks ocurring in December
* Potentially a cyclic pattern

## Timeseries Plots

In [None]:
ts_plots.plot_timerseries_decomposition(ts_df, model='multiplicative',path_html = '../data/reports/data_decomposition.html')

In [None]:
ts_plots.plot_autocorrelation(ts_df,nlags = 52,path_html = '../data/reports/data_autocorrelation.html')

In [None]:
df = ts_df.to_frame()
df['week'] = ts_df.index.week
df['month'] = ts_df.index.month
df['year'] = ts_df.index.year
df = df.sort_values(by = ['year','week','month'])

ts_plots.plot_timeseries_columns(df)

## Save data

In [None]:
# Save data
all_files.to_parquet('../data/02_processed_data/df_full.parquet')
all_files.to_csv('../data/02_processed_data/df_full.csv')

# Save data for Azure
ts_df.to_csv('../data/02_processed_data/df_azure.csv',header = True)

# Save data for AWS Forecast
aws_forecast = ts_df.to_frame().copy()
aws_forecast['item_id'] = 1
aws_forecast.to_csv('../data/02_processed_data/df_aws.csv', header = True)