In [None]:
import pandas as pd
from pathlib import Path
import json
import gzip
import glob
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [None]:
#%matplotlib widget

In [None]:
DATA_FOLDER = "data"
CHARGE_FOLDER = "charge"
STATIONS_FOLDER = "stations"

In [None]:
df_charge = pd.read_parquet(Path("data/charge/"), engine="pyarrow")
df_charge = df_charge.drop('DATE', axis=1) # only used for partitioning
df_charge.sort_values(by=["TIME", "STATION_ID"], inplace=True)
df_charge.head(5)

In [None]:
# Load the newest file in the folder ()

stations_path = Path(DATA_FOLDER) / STATIONS_FOLDER 
static_path = Path(DATA_FOLDER) / STATIONS_FOLDER
files = glob.glob(str(static_path / "stations_*.json.gz"))
latest_file = max(files, key=lambda x: Path(x).stat().st_mtime)

with gzip.open(latest_file, "rt", encoding="utf-8") as file:
	stations = json.load(file)["EVSEData"]

In [None]:
# Series of operator IDs to names
ser_operators = pd.Series({operator['OperatorID']: operator['OperatorName'] for operator in stations})
ser_operators.head(5)

In [None]:
len(ser_operators)

In [None]:
# Statons static info
rows = []
for operator in stations:
    for station in operator['EVSEDataRecord']:
        row = station
        row['Operator'] = operator['OperatorID']
        rows.append(row)
df_stations = pd.DataFrame(rows)

df_stations.head(5) # 17512 stations

In [None]:
df_stations.columns

In [None]:
len(df_stations)

In [None]:
is_occupied = df_charge["STATUS"].eq("Occupied").astype(int)
df_charge['delta'] = (
    is_occupied
    .groupby(df_charge["STATION_ID"])
    .diff()
    .fillna(is_occupied)
)

occupied_count = (
    df_charge.groupby("TIME")['delta']
      .sum()
      .cumsum()
)

df_charge.tail(5)

In [None]:
plt.figure(figsize=(15, 5))
plt.plot(occupied_count, label='Occupied')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%m %H:%M'))
plt.grid()
plt.title("Total occupied charging points over time")
plt.legend();

In [None]:
for operator_id, operator_name in ser_operators.items():
    points = df_stations.query('Operator == @operator_id')['EvseID']

    occupied_count = (
        df_charge[df_charge['STATION_ID'].isin(points)]
        .groupby("TIME")['delta']
        .sum()
        .cumsum()
    )
    plt.figure(figsize=(15, 5))
    plt.plot(occupied_count)
    plt.title(operator_name)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%m %H:%M'))
    plt.grid()

In [None]:
# Swisscharge status always unknown?
points_sc = df_stations.query('Operator == "CH*SWISSCHARGE"')['EvseID']
df_charge_sc = df_charge[df_charge['STATION_ID'].isin(points_sc)]
df_charge_sc['STATUS'].value_counts()