In [1]:
import requests
import json

from pathlib import Path
from urllib.parse import urljoin
from datetime import date, timedelta
from itertools import product


In [2]:
def http(method:str, path:str, BASE="https://rata.digitraffic.fi/", **kwargs):
    path = path.lstrip("/")
    url = urljoin(BASE, path)
    response = requests.request(method, url, **kwargs)
    response.raise_for_status()
    return response.json()

In [3]:
project_dir = Path().absolute().parent.parent
print(project_dir)

/workspaces/VR-data


In [4]:
vr_data_path = project_dir / "data" / "datalake" / "staging" / "vr_data"
vr_data_path.mkdir(exist_ok=True, parents=True)



In [5]:
START_DATE = "2023-11-01"
END_DATE = "2023-11-30"


start_date = date.fromisoformat(START_DATE)
custom_end_date = date.fromisoformat(END_DATE) if END_DATE else None
end_date = custom_end_date or (date.today() - timedelta(days=1))
daterange = range((end_date - start_date).days + 1)
iso_dates = [(start_date + timedelta(days=x)).isoformat() for x in daterange]
print(f"Fetching from {iso_dates[0]} to {iso_dates[-1]} ({len(iso_dates)} days)")

Fetching from 2023-11-01 to 2023-11-30 (30 days)


In [6]:
for date in iso_dates:
    data = http("GET", f"/api/v1/trains/{date}")
    result_json = json.dumps(data)

    json_path = vr_data_path / f"{date}.json"
    json_path.write_text(result_json, encoding='utf-8')
    print(json_path)

/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-01.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-02.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-03.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-04.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-05.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-06.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-07.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-08.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-09.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-10.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-11.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-12.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-13.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-14.json
/workspaces/VR-data/data/datalake/staging/vr_data/2023-11-15.json
/workspace