In [1]:
import os
os.chdir('/home/denisalpino/dev/FinABYSS')

import polars as pl
import pandas as pd
from alpha_vantage.timeseries import TimeSeries

from utils.api_key_manager import APIKeyManager

In [2]:
akm = APIKeyManager(eval(os.getenv("AV_API_KEY"))) # type: ignore
ts = TimeSeries(key=akm.get_current_key(), output_format='pandas', indexing_type='integer')

In [3]:
ohlcv_df = pl.DataFrame()

dates = pd.date_range("2023-09-01", "2025-03-01", freq="MS", inclusive="both").strftime("%Y-%m").values

for date in dates:
    # Fetch OHLCV-data for month
    try:
        data, meta_data = ts.get_intraday( # type: ignore
            symbol="NVDA", interval="1min",
            outputsize="full", month=date, # to 2025-03
            extended_hours="true"
        )
    except ValueError:
        ts = TimeSeries(key=akm.get_next_key(), output_format='pandas', indexing_type='integer')

        data, meta_data = ts.get_intraday( # type: ignore
            symbol="NVDA", interval="1min",
            outputsize="full", month=date, # to 2025-03
            extended_hours="true"
        )

    # Rename columns
    data.rename(columns=lambda x: x.split(". ")[-1] if ". " in x else x, inplace=True) # type: ignore
    data.rename(columns=lambda x: "datetime" if x == "index" else x, inplace=True) # type: ignore

    # Convert string datetime to type datetime and convert into UTC timezone
    data['datetime'] = pd.to_datetime(data['datetime'])
    data['datetime'] = data['datetime'].dt.tz_localize(meta_data["6. Time Zone"]).dt.tz_convert('UTC')

    data = pl.from_pandas(data)

    # Merge month with general DataFrame
    ohlcv_df = pl.concat([ohlcv_df, data])

ohlcv_df.sort("datetime")

datetime,open,high,low,close,volume
"datetime[ns, UTC]",f64,f64,f64,f64,f64
2023-09-01 08:00:00 UTC,49.1308,49.3267,49.1308,49.2127,25030.0
2023-09-01 08:01:00 UTC,49.2517,49.2897,49.1847,49.1847,13230.0
2023-09-01 08:02:00 UTC,49.2337,49.2337,49.2067,49.2317,4090.0
2023-09-01 08:03:00 UTC,49.2067,49.2327,49.1917,49.2037,5210.0
2023-09-01 08:04:00 UTC,49.2037,49.2277,49.1758,49.1917,5950.0
…,…,…,…,…,…
2025-03-19 23:55:00 UTC,118.74,118.75,118.72,118.74,21543.0
2025-03-19 23:56:00 UTC,118.73,118.75,118.7,118.7316,31322.0
2025-03-19 23:57:00 UTC,118.74,118.77,118.73,118.7696,34832.0
2025-03-19 23:58:00 UTC,118.769,118.8,118.76,118.7801,31223.0


In [4]:
ohlcv_df.estimated_size('mb')

16.960968017578125

In [5]:
ohlcv_df.write_parquet("data/ohlcv.parquet")