In [48]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [49]:
weather = pd.read_csv("Literature and data/KNMI_daily_0123-0923.csv")
station_name = pd.read_csv("Literature and data/KNMI_station.csv")
weather.head()
station_name.head()

Unnamed: 0,STN,LON(east),LAT(north),ALT(m),NAME
0,209,4.518,52.465,0.0,IJmond
1,210,4.43,52.171,-0.2,Valkenburg Zh
2,215,4.437,52.141,-1.1,Voorschoten
3,225,4.555,52.463,4.4,IJmuiden
4,235,4.781,52.928,1.2,De Kooy


In [50]:
#group data
weather.columns = ["stations", "dates", "windspeed","windspeed_max","temperature","rain_duration","rain_amount", "visibility"]
weather = weather.drop(columns=["stations"])
weather[weather.columns.difference(["dates"])] = weather[weather.columns.difference(["dates"])].apply(pd.to_numeric, errors='coerce')
weather.head()

weather_mean = weather.groupby(["dates"], as_index=False).mean(numeric_only=True)
weather_mean[weather_mean.columns.difference(["dates", "visibility"])] = weather_mean[weather_mean.columns.difference(["dates", "visibility"])]*0.1


weather_mean["dates"] = pd.to_datetime(weather_mean["dates"], format="%Y%m%d")


weather_mean["mov_wind"] = weather_mean["windspeed"].rolling(7).mean()
weather_mean["mov_temps"] = weather_mean["temperature"].rolling(7).mean()
weather_mean["mov_rains"] = weather_mean["rain_amount"].rolling(7).mean()
weather_mean.head()
#weather_mean.to_pickle("Literature and data/weather_processed_mean.pkl")

Unnamed: 0,dates,windspeed,windspeed_max,temperature,rain_duration,rain_amount,visibility,mov_wind,mov_temps,mov_rains
0,2023-01-01,7.282609,18.608696,11.714706,5.363636,4.330303,52.36,,,
1,2023-01-02,4.986957,12.369565,8.447059,4.651515,3.918182,38.52,,,
2,2023-01-03,6.228261,16.152174,6.338235,1.469697,1.424242,14.36,,,
3,2023-01-04,10.956522,19.521739,11.044118,8.351515,11.018182,33.64,,,
4,2023-01-05,6.521739,14.217391,10.144118,1.621212,0.818182,36.24,,,


In [51]:
#plot the weather
fig = px.line(weather_mean, x="dates", y="windspeed")
fig.add_scatter(x=weather_mean["dates"], y=weather_mean["mov_wind"], name="7day moving average")
fig.show()

In [52]:
fig = px.line(weather_mean, x="dates", y="temperature")
fig.add_scatter(x=weather_mean["dates"], y=weather_mean["mov_temps"], name="7day moving average")
fig.show()

In [53]:
fig = px.line(weather_mean, x="dates", y="rain_duration")
fig.add_scatter(x=weather_mean["dates"], y=weather_mean["mov_rains"], name="7day moving average")
fig.show()

In [54]:
file = "Literature and Data/20230908_Instappers_per_uur_export_V3.csv"
df_OV = pd.read_csv(file)

df_OV["Aantal_check_ins"] = df_OV["Aantal_check_ins"] * 1000
df_OV["Aantal_check_ins"] = df_OV["Aantal_check_ins"].astype('int')
df_OV_sum = df_OV.groupby(by="Datum", sort=False)["Aantal_check_ins"].sum().reset_index()
df_OV_sum["Datum"] = pd.to_datetime(df_OV_sum["Datum"], format="%d-%m-%Y")

df_OV_sum["mov_checkins"] = df_OV_sum["Aantal_check_ins"].rolling(7).mean()

df_OV_sum.head()

fig = px.line(df_OV_sum, x="Datum", y="Aantal_check_ins", title='Number of check-ins in 2023')

fig.update_layout(xaxis_title="Date", yaxis_title="Number of OV check-ins")
fig.add_scatter(x=df_OV_sum["Datum"], y=df_OV_sum["mov_checkins"], name="7day moving average")
fig.show()

In [55]:
df_OV_sum.rename(columns={"Datum":"dates"}, inplace=True)
OV_weather = df_OV_sum.merge(weather_mean, on = "dates")
OV_weather.head()

Unnamed: 0,dates,Aantal_check_ins,mov_checkins,windspeed,windspeed_max,temperature,rain_duration,rain_amount,visibility,mov_wind,mov_temps,mov_rains
0,2023-01-01,1003699,,7.282609,18.608696,11.714706,5.363636,4.330303,52.36,,,
1,2023-01-02,2074400,,4.986957,12.369565,8.447059,4.651515,3.918182,38.52,,,
2,2023-01-03,2465899,,6.228261,16.152174,6.338235,1.469697,1.424242,14.36,,,
3,2023-01-04,2446900,,10.956522,19.521739,11.044118,8.351515,11.018182,33.64,,,
4,2023-01-05,2643299,,6.521739,14.217391,10.144118,1.621212,0.818182,36.24,,,


In [56]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=OV_weather["dates"], y=OV_weather["mov_checkins"], name="Check-ins"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=OV_weather["dates"], y=OV_weather["mov_rains"], name="Rain amount"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Rain Amount vs Check-ins"
)

# Set x-axis title
fig.update_xaxes(title_text="Dates")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Check-ins</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Avg Rain amount(mm/h)</b>", secondary_y=True)

fig.show()

In [57]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=OV_weather["dates"], y=OV_weather["mov_checkins"], name="Check-ins"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=OV_weather["dates"], y=OV_weather["mov_wind"], name="Wind Speed"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Wind speed vs Check-ins"
)

# Set x-axis title
fig.update_xaxes(title_text="Dates")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Check-ins</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Avg Wind Speed(m/s)</b>", secondary_y=True)

fig.show()

In [58]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=OV_weather["dates"], y=OV_weather["mov_checkins"], name="Check-ins"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=OV_weather["dates"], y=OV_weather["mov_temps"], name="Temperature"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Temperature vs Check-ins"
)

# Set x-axis title
fig.update_xaxes(title_text="Dates")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Check-ins</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Avg temperature(C)</b>", secondary_y=True)

fig.show()

In [59]:
unpickled_df_cong = pd.read_pickle('Literature and data/df_cong_pickle.pkl').reset_index() 
df_cong_filt = unpickled_df_cong[['DatumFileBegin', 'TijdFileBegin', 'TijdFileEind', 'FileZwaarte', 'Oorzaak_4']]
df_cong_filt['FileZwaarte'] = df_cong_filt['FileZwaarte'].str.replace(',', '.', regex=True).astype(float)
# df_cong_filt.head(30)
df_cong_grouped = df_cong_filt.groupby('DatumFileBegin')['FileZwaarte'].sum().reset_index()

df_cong_grouped["mov_cong"] = df_cong_grouped["FileZwaarte"].rolling(7).mean()

df_cong_grouped.head()
#The warning this gives is not a thing to worry about. 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,DatumFileBegin,FileZwaarte,mov_cong
0,2023-01-01,93.5,
1,2023-01-02,2549.248,
2,2023-01-03,4404.093,
3,2023-01-04,6634.145,
4,2023-01-05,7113.57,


In [60]:
df_cong_grouped.rename(columns={"DatumFileBegin":"dates"}, inplace=True)
cong_weather = df_cong_grouped.merge(weather_mean, on = "dates")
cong_weather.head()

Unnamed: 0,dates,FileZwaarte,mov_cong,windspeed,windspeed_max,temperature,rain_duration,rain_amount,visibility,mov_wind,mov_temps,mov_rains
0,2023-01-01,93.5,,7.282609,18.608696,11.714706,5.363636,4.330303,52.36,,,
1,2023-01-02,2549.248,,4.986957,12.369565,8.447059,4.651515,3.918182,38.52,,,
2,2023-01-03,4404.093,,6.228261,16.152174,6.338235,1.469697,1.424242,14.36,,,
3,2023-01-04,6634.145,,10.956522,19.521739,11.044118,8.351515,11.018182,33.64,,,
4,2023-01-05,7113.57,,6.521739,14.217391,10.144118,1.621212,0.818182,36.24,,,


In [61]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=cong_weather["dates"], y=cong_weather["mov_cong"], name="Congestions"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=cong_weather["dates"], y=cong_weather["mov_rains"], name="Rain amount"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Rain amount vs Congestion severity"
)

# Set x-axis title
fig.update_xaxes(title_text="Dates")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Congestion severity (km*min)</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Avg Rain amount(mm/h)</b>", secondary_y=True)

fig.show()

In [62]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=cong_weather["dates"], y=cong_weather["mov_cong"], name="Congestions"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=OV_weather["dates"], y=OV_weather["mov_wind"], name="Wind speed"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Wind speed vs Congestion severity"
)

# Set x-axis title
fig.update_xaxes(title_text="Dates")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Congestion severity (km*min)</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Avg Wind Speed(m/s)</b>", secondary_y=True)

fig.show()

In [64]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=cong_weather["dates"], y=cong_weather["mov_cong"], name="Congestions"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=OV_weather["dates"], y=OV_weather["mov_temps"], name="Temperature"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Temperature vs Congestion severity"
)

# Set x-axis title
fig.update_xaxes(title_text="Dates")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Congestion severity (km*min)</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Avg Temperature(C)</b>", secondary_y=True)

fig.show()