# Example Notebook to Load Data

In [5]:
import pandas as pd
import os
from huggingface_hub import HfApi
import dotenv
import pyarrow
dotenv.load_dotenv()

True

In [6]:
api = HfApi()
repo_id = "hblim/top_reddit_posts_daily"

In [13]:
# ——— Download and load today's shard ———
date_str = "2025-04-18"
today_path = api.hf_hub_download(
    repo_id=repo_id,
    filename=f"data_raw/{date_str}.parquet",
    repo_type="dataset"
)
df_today = pd.read_parquet(today_path)
print(f"Records for {date_str}:")
pd.set_option('display.max_colwidth', 100)
df_today.loc[[0]].T

Records for 2025-04-18:


Unnamed: 0,0
subreddit,apple
created_at,2025-04-17 19:59:44-05:00
retrieved_at,2025-04-18 12:46:10.631577-05:00
type,post
text,Apple wanted people to vibe code Vision Pro apps with Siri\n\n
score,427
post_id,1k1sn9w
parent_id,


In [6]:
# ——— Download all historical data ———

# 1. List all parquet files in the dataset repo
all_files = api.list_repo_files(repo_id, repo_type="dataset")
parquet_files = sorted([f for f in all_files if f.startswith("data_raw/") and f.endswith(".parquet")])

# 2. Download each shard and load with pandas
dfs = []
for shard in parquet_files:
    local_path = api.hf_hub_download(repo_id=repo_id, filename=shard, repo_type="dataset")
    dfs.append(pd.read_parquet(local_path))

# 3. Concatenate into one DataFrame
df_all = pd.concat(dfs, ignore_index=True)
print(f"Total records across {len(dfs)} days: {len(df_all)}")

df_today = pd.read_parquet(today_path)
print(f"Records for {date_str}:")
df_today.head()

Total records across 5 days: 1443
Records for 2025-04-18:


Unnamed: 0,subreddit,created_at,retrieved_at,type,text,score,post_id,parent_id
0,apple,2025-04-17 19:59:44-05:00,2025-04-18 12:46:10.631577-05:00,post,Apple wanted people to vibe code Vision Pro ap...,427,1k1sn9w,
1,apple,2025-04-17 20:17:24-05:00,2025-04-18 12:46:10.631577-05:00,comment,"Using Siri? You want me to build, test and rel...",793,mnor2mf,t3_1k1sn9w
2,apple,2025-04-17 20:02:06-05:00,2025-04-18 12:46:10.631577-05:00,comment,Wtf is vibe coding?? \n \nWe're reaching incre...,216,mnoom31,t3_1k1sn9w
3,apple,2025-04-17 20:05:37-05:00,2025-04-18 12:46:10.631577-05:00,comment,What.,154,mnop6rz,t3_1k1sn9w
4,apple,2025-04-17 20:37:08-05:00,2025-04-18 12:46:10.631577-05:00,comment,Here's something I found on the web about vibe...,132,mnou85s,t3_1k1sn9w
