In [1]:
import os
import requests
from tqdm import tqdm
import time

In [2]:
BASE_URL = "https://www.astrouw.edu.pl/ogle/ogle4/ews/2025/"
OUTPUT_DIR = "data/raw/ogle4/2025"

os.makedirs(f"{OUTPUT_DIR}/photometry", exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/params", exist_ok=True)

print("Directories ready:", OUTPUT_DIR)

Directories ready: data/raw/ogle4/2025


In [3]:
def download_event_files(event_id, base_url=BASE_URL, output_dir=OUTPUT_DIR, delay=0.25):
    """Download phot.dat and params.dat for a given OGLE event (e.g., 'blg-0001')."""
    url_prefix = f"{base_url}{event_id}/"
    files = ["phot.dat", "params.dat"]
    results = {}

    for fname in files:
        dest_dir = "photometry" if "phot" in fname else "params"
        dest_path = f"{output_dir}/{dest_dir}/{event_id}_{fname}"

        if os.path.exists(dest_path):
            results[fname] = "exists"
            continue

        file_url = url_prefix + fname
        try:
            r = requests.get(file_url, timeout=10)
            if r.status_code == 200 and len(r.content) > 100:
                with open(dest_path, "wb") as f:
                    f.write(r.content)
                results[fname] = "ok"
            else:
                results[fname] = f"missing ({r.status_code})"
        except Exception as e:
            results[fname] = f"error ({e})"

        time.sleep(delay)
    return results

In [4]:
success_log, fail_log = [], []

for i in tqdm(range(1, 1497), desc="Downloading OGLE 2025 events"):
    event_id = f"blg-{i:04d}"
    res = download_event_files(event_id)
    if all(v == "ok" or v == "exists" for v in res.values()):
        success_log.append(event_id)
    else:
        fail_log.append((event_id, res))

print(f"\n✅ Completed downloads: {len(success_log)} successful, {len(fail_log)} failed.")


Downloading OGLE 2025 events: 100%|██████████| 1496/1496 [36:46<00:00,  1.48s/it]


✅ Completed downloads: 1495 successful, 1 failed.





In [5]:
import pandas as pd

missing_records = []
for eid, res in fail_log:
    for fname, status in res.items():
        if status not in ("ok", "exists"):
            missing_records.append({"event": eid, "file": fname, "status": status})

if missing_records:
    df_missing = pd.DataFrame(missing_records)
    df_missing.to_csv("data/raw/ogle4/2025/missing_files.csv", index=False)
    display(df_missing.head())
    print(f"⚠️  {len(df_missing)} files missing or errored (see missing_files.csv).")
else:
    print("🎉 All files downloaded successfully!")


Unnamed: 0,event,file,status
0,blg-1224,params.dat,error (HTTPSConnectionPool(host='www.astrouw.e...


⚠️  1 files missing or errored (see missing_files.csv).
