In [1]:
import os
import time
import requests
import pandas as pd
from tqdm import tqdm

CSV_PATH       = './track_genre_balanced_url.csv'
DOWNLOAD_DIR   = 'audio_previews'
SLEEP_INTERVAL = 60   # seconds to wait between checks

os.makedirs(DOWNLOAD_DIR, exist_ok=True)

# Keep track of which track_ids we've already attempted
processed = set()

while True:
    # 1) Load current CSV
    try:
        df = pd.read_csv(CSV_PATH)
    except FileNotFoundError:
        print(f"{CSV_PATH} not found, waiting...")
        time.sleep(SLEEP_INTERVAL)
        continue

    # 2) Identify new tracks we haven't processed yet
    df_new = df[~df['track_id'].isin(processed)].copy()
    if df_new.empty:
        # nothing new → wait and retry
        time.sleep(SLEEP_INTERVAL)
        continue

    # 3) Download each new preview with a progress bar
    for _, row in tqdm(df_new.iterrows(),
                       total=len(df_new),
                       desc="Downloading previews",
                       unit="track"):
        track_id   = row['track_id']
        preview_url = row['preview']
        out_path   = os.path.join(DOWNLOAD_DIR, f"{track_id}.mp3")

        # mark as processed so we don't retry on crashes
        processed.add(track_id)

        # skip if file already exists
        if os.path.exists(out_path):
            continue

        # attempt download
        try:
            resp = requests.get(preview_url, timeout=30)
            if resp.status_code == 200:
                with open(out_path, 'wb') as f:
                    f.write(resp.content)
            else:
                print(f"HTTP {resp.status_code} for {track_id}")
        except Exception as e:
            print(f"Error downloading {track_id}: {e}")

    # 4) Pause before next check
    time.sleep(SLEEP_INTERVAL)


Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 7814.41track/s]
Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 8865.11track/s]
Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 8410.16track/s]
Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 10286.20track/s]
Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6751.92track/s]
Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 7016.85track/s]
Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 9608.71track/s]
Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 569.98track/s]
Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 8934.23track/s]
Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 3487.43track/s]
Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 8381.08track/s]
Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 3057.72track/s]
Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6150.47track/s]
Downloading previews: 100

KeyboardInterrupt: 