# Plots

In [1]:
import pickle
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

from sklearn.model_selection import train_test_split 
from sklearn.tree import DecisionTreeRegressor  
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn import metrics 

## Raw Data

In [2]:
colab = True
if colab:
  from google.colab import drive
  drive.mount('/content/gdrive')
  filepath = '/content/gdrive/MyDrive/ColabNotebooks/hardbruecke/frequenzen_hardbruecke_2020.zip'
else:
  filepath = 'frequenzen_hardbruecke_2020.zip'

Mounted at /content/gdrive


In [3]:
hb = pd.read_csv(filepath,compression='zip')

In [4]:
hb.head()

Unnamed: 0,In,Out,Timestamp,Name
0,1,0,2020-01-01T23:55:00,Ost-Nord total
1,2,5,2020-01-01T23:50:00,Ost-Nord total
2,1,0,2020-01-01T23:45:00,Ost-Nord total
3,1,4,2020-01-01T23:40:00,Ost-Nord total
4,1,0,2020-01-01T23:35:00,Ost-Nord total


In [5]:
def data_preparation(df):
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df = df.set_index(['Timestamp','Name']).stack().reset_index()
    df = df.rename(columns={'level_2':'direction',0:'count'})
    
    df['hour'] = df['Timestamp'].dt.hour
    df['weekday'] = df['Timestamp'].dt.weekday
    df['minute'] = df['Timestamp'].dt.minute
    df['month'] = df['Timestamp'].dt.month
    
    df['day'] = pd.to_datetime(df['Timestamp'].dt.date)
    
    df['direction_cat'] = df['direction'].replace({'In':0,'Out':1})
    
    names = {
        'Ost-Süd total' : 0,
        'Ost-Nord total' : 1,
        'Ost-SBB total' : 2,
        'West-SBB total' : 3,
        'West-Süd total' : 4,
        'Ost-VBZ Total' : 5,
        'West-Nord total' : 6,
        'West-VBZ total' : 7,
    }
    df['name_cat'] = df['Name'].replace(names)
    
    return df

In [6]:
data_preparation(hb)

Unnamed: 0,Timestamp,Name,direction,count,hour,weekday,minute,month,day,direction_cat,name_cat
0,2020-01-01 23:55:00,Ost-Nord total,In,1,23,2,55,1,2020-01-01,0,1
1,2020-01-01 23:55:00,Ost-Nord total,Out,0,23,2,55,1,2020-01-01,1,1
2,2020-01-01 23:50:00,Ost-Nord total,In,2,23,2,50,1,2020-01-01,0,1
3,2020-01-01 23:50:00,Ost-Nord total,Out,5,23,2,50,1,2020-01-01,1,1
4,2020-01-01 23:45:00,Ost-Nord total,In,1,23,2,45,1,2020-01-01,0,1
...,...,...,...,...,...,...,...,...,...,...,...
1429357,2020-12-31 00:15:00,West-VBZ total,Out,4,0,3,15,12,2020-12-31,1,7
1429358,2020-12-31 00:10:00,West-VBZ total,In,3,0,3,10,12,2020-12-31,0,7
1429359,2020-12-31 00:10:00,West-VBZ total,Out,5,0,3,10,12,2020-12-31,1,7
1429360,2020-12-31 00:05:00,West-VBZ total,In,4,0,3,5,12,2020-12-31,0,7


In [7]:
hb2 = data_preparation(hb)

In [8]:
hb2.head()

Unnamed: 0,Timestamp,Name,direction,count,hour,weekday,minute,month,day,direction_cat,name_cat
0,2020-01-01 23:55:00,Ost-Nord total,In,1,23,2,55,1,2020-01-01,0,1
1,2020-01-01 23:55:00,Ost-Nord total,Out,0,23,2,55,1,2020-01-01,1,1
2,2020-01-01 23:50:00,Ost-Nord total,In,2,23,2,50,1,2020-01-01,0,1
3,2020-01-01 23:50:00,Ost-Nord total,Out,5,23,2,50,1,2020-01-01,1,1
4,2020-01-01 23:45:00,Ost-Nord total,In,1,23,2,45,1,2020-01-01,0,1


## Load Model

In [9]:
XList=[
  'hour',
  'weekday',
  'minute',
  'month',
  'direction_cat',
  'name_cat',
]
y='count'

In [16]:
if colab:
  filename_model = '/content/gdrive/MyDrive/ColabNotebooks/hardbruecke/models/DecisionTreeRegressor.sav'
else:
  filename_model = 'models/DecisionTreeRegressor.sav'

In [17]:
regressor = pickle.load(open(filename_model, 'rb'))

In [18]:
regressor.predict(hb2[XList].head())

array([2.13333333, 1.45714286, 3.06896552, 1.45714286, 3.06896552])

# Plot

In [19]:
hb2['prediction'] = regressor.predict(hb2[XList])

In [20]:
day = hb2[hb2['day']=='2020-02-10']
day

Unnamed: 0,Timestamp,Name,direction,count,hour,weekday,minute,month,day,direction_cat,name_cat,prediction
20678,2020-02-10 23:55:00,Ost-Nord total,In,2,23,0,55,2,2020-02-10,0,1,1.222222
20679,2020-02-10 23:55:00,Ost-Nord total,Out,1,23,0,55,2,2020-02-10,1,1,1.222222
20680,2020-02-10 23:50:00,Ost-Nord total,In,2,23,0,50,2,2020-02-10,0,1,1.222222
20681,2020-02-10 23:50:00,Ost-Nord total,Out,1,23,0,50,2,2020-02-10,1,1,1.222222
20682,2020-02-10 23:40:00,Ost-Nord total,In,2,23,0,40,2,2020-02-10,0,1,1.222222
...,...,...,...,...,...,...,...,...,...,...,...,...
1280297,2020-02-10 00:20:00,West-VBZ total,Out,12,0,0,20,2,2020-02-10,1,7,5.250000
1280298,2020-02-10 00:10:00,West-VBZ total,In,0,0,0,10,2,2020-02-10,0,7,0.916667
1280299,2020-02-10 00:10:00,West-VBZ total,Out,2,0,0,10,2,2020-02-10,1,7,4.100000
1280300,2020-02-10 00:00:00,West-VBZ total,In,0,0,0,0,2,2020-02-10,0,7,1.121212


In [21]:
df = day[day['Name']=='Ost-Nord total']
#px.line(df,x='Timestamp',y='count',color='direction')

In [22]:
#px.line(df,x='Timestamp',y='prediction',color='direction')

In [23]:
melted = df.melt(id_vars=['Timestamp','direction','Name'], value_vars=['count', 'prediction'])
melted.head()

Unnamed: 0,Timestamp,direction,Name,variable,value
0,2020-02-10 23:55:00,In,Ost-Nord total,count,2.0
1,2020-02-10 23:55:00,Out,Ost-Nord total,count,1.0
2,2020-02-10 23:50:00,In,Ost-Nord total,count,2.0
3,2020-02-10 23:50:00,Out,Ost-Nord total,count,1.0
4,2020-02-10 23:40:00,In,Ost-Nord total,count,2.0


In [24]:
px.line(melted,x='Timestamp',y='value',color='variable',facet_row='direction')

In [61]:
def plot_day(df,day,name,regressor,XList):
    historic_data = pd.DataFrame({'min':[df['Timestamp'].min()],'max':[df['Timestamp'].max()]})
    if historic_data['min'] < day < historic_data['max']:
      df_filter = df[(df['day']==day)&(df['Name']==name)].copy()
      df_filter['prediction'] = regressor.predict(df_filter[XList])
      melted = df_filter.melt(id_vars=['Timestamp','direction','Name'], value_vars=['count', 'prediction'])
      fig = px.line(melted,x='Timestamp',y='value',color='variable',facet_row='direction',title=name)
      return fig  
    

In [63]:
def plot_day(df,day,name,regressor,XList):
    df_filter = df[(df['day']==day)&(df['Name']==name)].copy()
    df_filter['prediction'] = regressor.predict(df_filter[XList])
    melted = df_filter.melt(id_vars=['Timestamp','direction','Name'], value_vars=['count', 'prediction'])
    fig = px.line(melted,x='Timestamp',y='value',color='variable',facet_row='direction',title=name)
    return fig  
    

In [64]:
plot_day(hb2,'2020-07-23','Ost-Nord total',regressor,XList)