In [1]:
import datetime
import os
import pandas as pd
from typing import Sequence

In [2]:
def read_partition(path: str) -> pd.DataFrame:
    def _read_partition(path: str, partition_cols: Sequence[Sequence[str]]) -> pd.DataFrame:
        if path.lower().endswith(".xlsx"):
            df = pd.read_excel(path)
            return [ df.assign(**dict(partition_cols)) ]
        elif os.path.isdir(path):
            return [
                df
                for subpath in os.listdir(path)
                for df in _read_partition(f"{path}/{subpath}", partition_cols + ([subpath.split("=")] if "=" in subpath else []))
            ]
        else:
            return []
    
    dfs = _read_partition(path, [])
    return pd.concat(dfs) if dfs else pd.DataFrame()


def parse_tamtop_datetime(input: str) -> datetime.datetime:
    return datetime.datetime.fromisoformat(
        input.replace(" ", "T").replace("(", "").replace(")", "")
    )

In [3]:
df = read_partition("data")
df["Time"] = df["Time"].apply(parse_tamtop_datetime)
df = df.set_index(["Device", "Time"]).drop(["NO."], axis=1)
display(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,PM2.5(ug/m³),AQI,CO₂(ppm),TVOC,Temperature(℉),Humidity(%RH),Floor,Location
Device,Time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3,2024-08-17 20:00:00+08:00,47.8,131,580,28,77.7,57,2,West Hallway
3,2024-08-17 21:00:00+08:00,61.1,155,493,67,81.9,51,2,West Hallway
3,2024-08-17 22:00:00+08:00,35.8,102,514,29,81.7,51,2,West Hallway
3,2024-08-17 23:00:00+08:00,35.5,101,513,18,80.1,53,2,West Hallway
3,2024-08-18 00:00:00+08:00,19.3,70,504,22,79.7,53,2,West Hallway
3,...,...,...,...,...,...,...,...,...
3,2024-08-20 08:00:00+08:00,3.8,21,674,40,78.8,60,2,Unit 210
3,2024-08-20 09:00:00+08:00,4.1,23,667,41,78.8,61,2,Unit 210
3,2024-08-20 10:00:00+08:00,10.9,54,597,54,79.0,61,2,Unit 210
3,2024-08-20 11:00:00+08:00,9.1,51,610,57,76.5,55,2,Unit 210
