In [287]:
# Import dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

In [288]:
###### READ DATA ANEW ######
df = pd.read_csv('../data/boulders/AttendanceHistorys_clean.csv', sep=";")

In [289]:
# Make into datetime object
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

In [290]:
df[df["Aarhus City Indgang"] == 1][:50]

Unnamed: 0,Timestamp,Aarhus City Indgang,Aarhus Nord Indgang,Hvidovre Indgang,København Indgang,Odense Indgang,Valby Indgang
81020,2021-10-22 14:44:48,1,0,0,0,0,0
81022,2021-10-22 14:45:01,1,0,0,0,0,0
81023,2021-10-22 14:45:08,1,0,0,0,0,0
81024,2021-10-22 14:45:21,1,0,0,0,0,0
81031,2021-10-22 14:54:34,1,0,0,0,0,0
81037,2021-10-22 14:59:53,1,0,0,0,0,0
81218,2021-10-22 18:14:49,1,0,0,0,0,0
81219,2021-10-22 18:14:51,1,0,0,0,0,0
81220,2021-10-22 18:15:10,1,0,0,0,0,0
81229,2021-10-22 18:24:41,1,0,0,0,0,0


In [291]:
# Sort by timestamp
df.sort_values("Timestamp", inplace=True)

# Resample by the hour
df.set_index("Timestamp", inplace=True)

In [292]:
# Count by hour
df_hourly = df.resample("15Min").sum()

In [293]:
df_hourly[:50]

Unnamed: 0_level_0,Aarhus City Indgang,Aarhus Nord Indgang,Hvidovre Indgang,København Indgang,Odense Indgang,Valby Indgang
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-05-06 08:00:00,0,0,0,4,0,0
2021-05-06 08:15:00,0,0,0,4,0,0
2021-05-06 08:30:00,0,0,0,3,0,0
2021-05-06 08:45:00,0,0,0,1,0,0
2021-05-06 09:00:00,0,0,0,6,0,0
2021-05-06 09:15:00,0,0,0,9,0,0
2021-05-06 09:30:00,0,0,0,5,0,0
2021-05-06 09:45:00,0,2,0,2,0,0
2021-05-06 10:00:00,0,7,0,12,2,0
2021-05-06 10:15:00,0,1,0,3,0,0


In [294]:
# Start time
start_time = pd.Timestamp("2021-05-06 00:00:00")

# Add empty rows for missing hours
idx = pd.date_range(start=start_time, end=df_hourly.index[-1], freq="15Min")

# Add rows according to the idx list, fill with 0
df_hourly = df_hourly.reindex(idx, fill_value=0)

In [295]:
# Window size
window_size = 8 # 2 hours

# Calculate the rolling activity in each gym, setting the window to 2 hours. Make all columns into integers
df_hourly["Aarhus City Activity"] = df_hourly["Aarhus City Indgang"].rolling(window=window_size).sum()
df_hourly["Aarhus Nord Activity"] = df_hourly["Aarhus Nord Indgang"].rolling(window=window_size).sum()
df_hourly["Hvidovre Activity"] = df_hourly["Hvidovre Indgang"].rolling(window=window_size).sum()
df_hourly["København Activity"] = df_hourly["København Indgang"].rolling(window=window_size).sum()
df_hourly["Odense Activity"] = df_hourly["Odense Indgang"].rolling(window=window_size).sum()
df_hourly["Valby Activity"] = df_hourly["Valby Indgang"].rolling(window=window_size).sum()

# Fill nans with 0
df_hourly.fillna(0, inplace=True)

df_hourly = df_hourly.astype(int)

In [299]:
df_hourly[:50]

Unnamed: 0,Aarhus City Indgang,Aarhus Nord Indgang,Hvidovre Indgang,København Indgang,Odense Indgang,Valby Indgang,Aarhus City Activity,Aarhus Nord Activity,Hvidovre Activity,København Activity,Odense Activity,Valby Activity
2021-05-06 08:00:00,0,0,0,4,0,0,0,0,0,4,0,0
2021-05-06 08:15:00,0,0,0,4,0,0,0,0,0,8,0,0
2021-05-06 08:30:00,0,0,0,3,0,0,0,0,0,11,0,0
2021-05-06 08:45:00,0,0,0,1,0,0,0,0,0,12,0,0
2021-05-06 09:00:00,0,0,0,6,0,0,0,0,0,18,0,0
2021-05-06 09:15:00,0,0,0,9,0,0,0,0,0,27,0,0
2021-05-06 09:30:00,0,0,0,5,0,0,0,0,0,32,0,0
2021-05-06 09:45:00,0,2,0,2,0,0,0,2,0,34,0,0
2021-05-06 10:00:00,0,7,0,12,2,0,0,9,0,42,2,0
2021-05-06 10:15:00,0,1,0,3,0,0,0,10,0,41,2,0


In [297]:
# Exclude all hours from 23:00 to 08:00, as gyms are open from 08:00 to 23:00 at the latest
df_hourly = df_hourly.between_time("08:00", "22:45")