In [26]:
import datetime
import os
import pandas as pd
from typing import Sequence

In [27]:
def read_partition(path: str) -> pd.DataFrame:
    def _read_partition(path: str, partition_cols: Sequence[Sequence[str]]) -> pd.DataFrame:
        if path.lower().endswith(".xlsx"):
            partitions = dict(partition_cols)
            if f"device{partitions["Device"]}" not in path:
                raise Exception(f"File {path} in wrong Device directory")
            df = pd.read_excel(path)
            return [ df.assign(**partitions) ]
        elif os.path.isdir(path):
            return [
                df
                for subpath in os.listdir(path)
                for df in _read_partition(f"{path}/{subpath}", partition_cols + ([subpath.split("=")] if "=" in subpath else []))
            ]
        else:
            return []
    
    dfs = _read_partition(path, [])
    return pd.concat(dfs) if dfs else pd.DataFrame()


def parse_tamtop_datetime(input: str) -> datetime.datetime:
    return datetime.datetime.fromisoformat(
        input.replace(" ", "T").replace("(", "").replace(")", "")
    )

In [28]:
df = read_partition("data")
df["Time"] = df["Time"].apply(parse_tamtop_datetime)
df = df.reset_index().drop(["NO."], axis=1)
display(
    df.groupby("Device").agg(
        {
            "Time": ["min", "max", "count"],
            "Floor": ["nunique"],
            "Location": ["nunique"]
        }
    )
)

Unnamed: 0_level_0,Time,Time,Time,Floor,Location
Unnamed: 0_level_1,min,max,count,nunique,nunique
Device,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
1,2024-08-18 17:00:00+08:00,2024-08-24 14:00:00+08:00,135,1,2
2,2024-08-18 17:00:00+08:00,2024-08-24 14:00:00+08:00,136,1,3
3,2024-08-17 20:00:00+08:00,2024-09-07 14:00:00+08:00,489,1,3
