In [None]:
from pathlib import Path
import pandas as pd
from IPython.display import display

# New Jupyter cell (index 0) â€” preview a parquet dataset to inspect structure and sample rows

p = Path("data/processed/gps_match_points.parquet")
if not p.exists():
    raise FileNotFoundError(f"Parquet file not found: {p}")

# load (try pyarrow first, fallback to pandas default)
try:
    df = pd.read_parquet(p, engine="pyarrow")
except Exception:
    df = pd.read_parquet(p)

# Basic overview
print("path:", p)
print("shape:", df.shape)
print("columns:", list(df.columns))
print("\ndtypes:")
print(df.dtypes)

# Show top rows and a few summaries
print("\nFirst 10 rows:")
display(df.head(10))

print("\nMissing values per column:")
print(df.isna().sum())

print("\nMemory usage (deep):")
print(df.memory_usage(deep=True))

print("\nDescriptive summary (numeric + object):")
display(df.describe(include="all").transpose())

# If there is a geometry/point column, show a few examples and types
geom_cols = [c for c in df.columns if "geom" in c.lower() or "geometry" in c.lower() or "point" in c.lower()]
if geom_cols:
    print("\nPossible geometry columns:", geom_cols)
    for c in geom_cols:
        print(f"Sample values for {c}:")
        display(df[c].head(5))
        print("Sample types:", df[c].dropna().map(type).unique()[:10])

: 