In [3]:
# ╔══════════════════════════════════════════════════╗
# ║ 📦 Cell 1 – Imports & Path Setup                  ║
# ╚══════════════════════════════════════════════════╝
import os
from pathlib import Path
import pandas as pd

# Get project root from .env
root_dir = Path(os.getenv("PROJECT_ROOT"))

# Define data zones
staging_dir = root_dir / "staging"
curated_dir = root_dir / "curated"

# Ensure curated directory exists
curated_dir.mkdir(parents=True, exist_ok=True)


In [4]:
# ╔══════════════════════════════════════════════════╗
# ║ 📊 Cell 2 – Load Staged Daily Streams CSVs       ║
# ╚══════════════════════════════════════════════════╝
# Filenames in staging
distrokid_path = staging_dir / "daily_streams_distrokid.csv"
toolost_path   = staging_dir / "daily_streams_toolost.csv"

# Read into DataFrames
distrokid_df = pd.read_csv(distrokid_path)
toolost_df   = pd.read_csv(toolost_path)


In [5]:
# ╔══════════════════════════════════════════════════╗
# ║ 🔀 Cell 3 – Combine & Save to Curated Zone        ║
# ╚══════════════════════════════════════════════════╝
# Tag each source
distrokid_df["source"] = "distrokid"
toolost_df["source"]   = "toolost"

# Concatenate into one tidy table
tidy_streams = pd.concat([distrokid_df, toolost_df], ignore_index=True, sort=False)

# (Optional) sort by date column if present
# tidy_streams["date"] = pd.to_datetime(tidy_streams["date"])
# tidy_streams = tidy_streams.sort_values("date")

# Save to curated
output_path = curated_dir / "tidy_daily_streams.csv"
tidy_streams.to_csv(output_path, index=False)

print(f"Tidy daily streams saved to {output_path}")


Tidy daily streams saved to C:\Users\Earth\BEDROT PRODUCTIONS\BEDROT DATA LAKE\data_lake\curated\tidy_daily_streams.csv
