In [None]:
import os
os.chdir('/home/denisalpino/dev/FinABYSS') # change path to the current directory

import polars as pl
import pandas as pd
from alpha_vantage.timeseries import TimeSeries

from utils.api_key_manager import APIKeyManager

##### **Initialization**

In [None]:
akm = APIKeyManager(eval(os.getenv("AV_API_KEY"))) # type: ignore
ts = TimeSeries(key=akm.get_current_key(), output_format='pandas', indexing_type='integer')
dates = pd.date_range("2023-09-01", "2025-03-01", freq="MS", inclusive="both").strftime("%Y-%m").values

In [None]:
ohlcv_df = pl.DataFrame()

##### **Collecting 1-min OHLCV-data for each month due the period**

In [None]:
for date in dates:
    # Fetch OHLCV-data for month
    try:
        data, meta_data = ts.get_intraday( # type: ignore
            symbol="NVDA", interval="1min",
            outputsize="full", month=date,
            extended_hours="true"
        )
    except ValueError:
        ts = TimeSeries(key=akm.get_next_key(), output_format='pandas', indexing_type='integer')

        data, meta_data = ts.get_intraday( # type: ignore
            symbol="NVDA", interval="1min",
            outputsize="full", month=date,
            extended_hours="true"
        )

    # Rename columns
    data.rename(columns=lambda x: x.split(". ")[-1] if ". " in x else x, inplace=True) # type: ignore
    data.rename(columns=lambda x: "datetime" if x == "index" else x, inplace=True) # type: ignore

    # Convert string datetime to type datetime and convert into UTC timezone
    data['datetime'] = pd.to_datetime(data['datetime'])
    data['datetime'] = data['datetime'].dt.tz_localize(meta_data["6. Time Zone"]).dt.tz_convert('UTC')

    data = pl.from_pandas(data)

    # Merge month with general DataFrame
    ohlcv_df = pl.concat([ohlcv_df, data])

In [4]:
ohlcv_df.estimated_size('mb')

16.960968017578125

##### **Save collected data**

In [5]:
ohlcv_df.write_parquet("data/ohlcv.parquet")