## Import of dependencies

In [None]:
# display full output in Notebook, instead of only the last result
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# customized preprocessing functions
import util

# standard libraries
import pandas as pd
import numpy as np
import os
from datetime import datetime
import time
import matplotlib.pyplot as plt

#viz
import seaborn as sns
# plt.style.use('fivethirtyeight')
sns.set_style("whitegrid")

#store model
import pickle

## Import data

In [None]:
locations = ['burgdorf', 'rapperswil']

In [None]:
data_path = "/Users/christopherkindl/working/start-hack-2021/00_data" # sorry but I ran into a strange directory error

dfs = {}

for loc in locations:
    dfs[loc] = pd.read_csv(os.path.join(data_path, "features_{}.csv".format(loc)), sep=",")
    dfs[loc]['date'] = pd.to_datetime(dfs[loc]['date'])

    #  declare categorical columns
    for col in ['hour', 'day_of_week', 'quarter', 'month', 'day_of_year', 'day_of_month',
                'week_of_year', 'weather', 'weather_t-1', 'weather_t-2', 'weather_t-3', 'weather_t-7', 
                'holiday']:
        
        dfs[loc][col] = dfs[loc][col].astype(object)

    # set datetime column as index
    dfs[loc].set_index('date', inplace = True)

In [None]:
for loc in locations:
    print('Dataset shape of {} data: {}'.format(loc.capitalize(), dfs[loc].shape))

## Boxplots

In [None]:
plt_params = {
    'text.usetex' : True,
    'font.size' : 20,
    'xtick.labelsize' : 18,
    'ytick.labelsize' : 18,
    'lines.linewidth': 1,
    'grid.linewidth':   2,
}
plt.rcParams.update(plt_params)

for loc in locations:
    # set size of figure
    fig = plt.figure(figsize=(20,12));

    # provide data
    # please align colors with current colors in paper
    ax = sns.boxplot(x='hour', y='occupancy_rate', data=dfs[loc], linewidth=2, color="#B4D0E4", boxprops=dict(alpha=.7));

    # please align style, especially fontsizes
    _ = plt.xlabel(r'Hour of day');
    _ = plt.ylabel(r'Parking occupancy $\left[\%\right]$');
    #ax = sns.boxplot(x=df.groupby('hour')['occupancy_rate'].mean().index, y=df.groupby('hour')['occupancy_rate'].mean())
    #plt.show();

    # save fig
    file = 'boxplot_' + loc + '.png'
    plt.savefig('../05_visualisations_of_eda/' + file,
               facecolor=fig.get_facecolor(), edgecolor='none');