In [None]:
import pandas as pd
from pathlib import Path
import json
import gzip
import glob
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [None]:
#%matplotlib widget

In [None]:
DATA_FOLDER = "data"
CHARGE_FOLDER = "charge"
STATIONS_FOLDER = "stations"

In [None]:
df_charge = pd.read_parquet(Path("data/charge/"), engine="pyarrow")
df_charge = df_charge.drop('DATE', axis=1) # only used for partitioning
df_charge.columns

In [None]:
# Load the newest file in the folder ()

stations_path = Path(DATA_FOLDER) / STATIONS_FOLDER 
static_path = Path(DATA_FOLDER) / STATIONS_FOLDER
files = glob.glob(str(static_path / "stations_*.json.gz"))
latest_file = max(files, key=lambda x: Path(x).stat().st_mtime)

with gzip.open(latest_file, "rt", encoding="utf-8") as file:
	stations = json.load(file)["EVSEData"]

In [None]:
# Series of operator IDs to names
ser_operators = pd.Series({operator['OperatorID']: operator['OperatorName'] for operator in stations})

In [None]:
# A row for each station
rows = []
for operator in stations:
    for station in operator['EVSEDataRecord']:
        row = station
        row['Operator'] = operator['OperatorID']
        rows.append(row)
df_stations = pd.DataFrame(rows)

In [None]:
len(df_stations) # number of stations

In [None]:
df_stations.columns

In [None]:
# df_charge.groupby('STATION_ID').size().sort_values(ascending=False).head(50) # To see the stations that change status more often

In [None]:
df_charge.columns

In [None]:
#df_stat = df_charge[df_charge["STATION_ID"] == "+41*029*169*1"]
df_stat = pd.DataFrame(df_charge[df_charge["STATION_ID"] == "+41*029*1785*2"])
df_stat = pd.DataFrame(df_charge[df_charge["STATION_ID"] == "CH*AVI*E10048"])
df_stat.sort_values("TIME", inplace=True)

In [None]:
fig = plt.figure(figsize=(15, 5))
plt.step(df_stat["TIME"], df_stat["STATUS"], where='post');

In [None]:
df_stat.set_index('TIME').resample('30s').ffill()

In [None]:
# Pivot (might be heavy on memory)
#df_data_pvt = df_data.pivot(index='TIME', columns='STATION_ID', values='STATUS')
#df_data_pvt.head()

In [None]:
df_charge_resampled = df_charge.set_index('TIME').groupby('STATION_ID').resample('30s', include_groups=False).ffill()

In [None]:
df_charge_resampled = df_charge_resampled.reset_index()

In [None]:
df_charge_resampled

In [None]:
df_counts = df_charge_resampled.groupby('TIME')[['STATUS']].value_counts()

In [None]:
df_counts

In [None]:
plt.figure(figsize=(15, 5))
plt.plot(df_counts[:, 'Occupied'], label='Occupied')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%m %H:%M'))
plt.grid()
#plt.plot(df_counts[:, 'Available'], label='Available')
plt.legend();