In [2]:
from vectorbtpro import *
import tzlocal
import pytz

# Data Fetching

In this notebook we fetch the data necessary for the ML strategies, and technical analysis (TA) strategies. We use third-party data providers, with which you will have to sign up and register keys (for free).

* [Dune Analytics](https://docs.dune.com/api-reference/overview/authentication)
* [Federal Reserve Bank of St. Louis (FRED)](https://fred.stlouisfed.org/docs/api/api_key.html)

Then create a file in the current directory named `.env` with the following:
```
DUNE_API_KEY=<key>
FRED_API_KEY=<key>
```
Finally, install dependencies: `uv pip install dune_client fedfred`

In [4]:
from dune_client.client import DuneClient
import fedfred as fd

In [5]:
tzlocal.get_localzone = lambda: pytz.timezone('UTC')

In [6]:
# retrieve API keys from environment
api_keys = dict()
with open(".env") as f:
    for line in f.readlines():
        elems = line.split("=")
        api_keys[elems[0].strip()] = elems[1].strip()
api_keys

{'DUNE_API_KEY': 'hafQP5vPw8oF8qovHAiwbLTbxpdN95gW',
 'FRED_API_KEY': '3a535693a7c1bb047269acdb2e4d7a0a'}

In [7]:
dune = DuneClient(api_key=api_keys['DUNE_API_KEY'])
fred = fd.FredAPI(api_key=api_keys['FRED_API_KEY'])

In [8]:
START_DATE = "2024-04-01"
END_DATE = "2025-12-18"
res_df = pd.DataFrame(index=pd.date_range(START_DATE, END_DATE, tz='UTC'))

### Target Price-series

In [5]:
data = vbt.YFData.pull(
    "SOL-USD",
    start=START_DATE,
    end=END_DATE,
    timeframe="1 day",
    tz="UTC"
)

data = data.rename_symbols({'SOL-USD':'BTCUSD', 
                            'X:ETHUSD' : 'ETHUSD'})

df = data.get()
df.index = pd.to_datetime(df.index, utc=True).normalize()
res_df = res_df.join(df, lsuffix="_SOLUSD")

### Features

In [6]:
for tix in ["SPY", "NDX", "^VIX", "QQQ", "BTC-USD"]:
    data = vbt.YFData.pull(
        tix,
        start=START_DATE,
        end=END_DATE,
        timeframe="1 day",
        tz="UTC"
    )
    df = data.get()
    df.index = pd.to_datetime(df.index, utc=True).normalize()
    res_df = res_df.join(df, lsuffix=f"_{tix}")

In [7]:
for tix in ['DGS10','GDP','FEDFUNDS','WALCL','M2V','M2SL', 'DTWEXBGS']:
    b10 = fred.get_series_observations(tix)
    ser = b10['value'][START_DATE:]
    ser.index = pd.to_datetime(ser.index, utc=True).normalize()
    res_df[tix] = ser
    
res_df = res_df.ffill() # forward-fill infrequently-observed timeseries

In [8]:
query_result = dune.get_latest_result(3930956)
metrics_df = pd.DataFrame(query_result.result.rows)
metrics_df.set_index('time_period', inplace=True)
metrics_df.index = pd.to_datetime(metrics_df.index, utc=True).normalize()
res_df = res_df.join(metrics_df)

In [9]:
res_df.to_csv("data/dataset1.csv", index_label="timestamp")

In [11]:
res_df.columns

Index(['Open_SPY', 'High_SPY', 'Low_SPY', 'Close_SPY', 'Volume_SPY',
       'Trade count', 'VWAP', 'Open_NDX', 'High_NDX', 'Low_NDX', 'Close_NDX',
       'Volume_NDX', 'Dividends_NDX', 'Stock Splits_NDX', 'Capital Gains_QQQ',
       'Open_^VIX', 'High_^VIX', 'Low_^VIX', 'Close_^VIX', 'Volume_^VIX',
       'Dividends_^VIX', 'Stock Splits_^VIX', 'Open_QQQ', 'High_QQQ',
       'Low_QQQ', 'Close_QQQ', 'Volume_QQQ', 'Dividends_QQQ',
       'Stock Splits_QQQ', 'Open_BTC-USD', 'High_BTC-USD', 'Low_BTC-USD',
       'Close_BTC-USD', 'Volume_BTC-USD', 'Dividends_BTC-USD',
       'Stock Splits_BTC-USD', 'Capital Gains', 'Open', 'High', 'Low', 'Close',
       'Volume', 'Dividends', 'Stock Splits', 'DGS10', 'GDP', 'FEDFUNDS',
       'WALCL', 'M2V', 'M2SL', 'DTWEXBGS', 'market_capitalization', 'mvrv',
       'nupl', 'price', 'realized_capitalization', 'realized_price', 'supply'],
      dtype='object')

In [12]:
res_df

Unnamed: 0,Open_SPY,High_SPY,Low_SPY,Close_SPY,Volume_SPY,Trade count,VWAP,Open_NDX,High_NDX,Low_NDX,...,M2V,M2SL,DTWEXBGS,market_capitalization,mvrv,nupl,price,realized_capitalization,realized_price,supply
2024-04-01 00:00:00+00:00,202.41,204.47,186.91,192.35,1.794294e+06,251217.0,194.4521,512.780997,513.319384,509.981277,...,1.389,20929.1,121.8884,,,,,,,
2024-04-02 00:00:00+00:00,192.37,192.39,175.89,181.54,2.202533e+06,320267.0,182.7262,507.308921,508.033303,505.586035,...,1.389,20929.1,121.6881,,,,,,,
2024-04-03 00:00:00+00:00,181.58,192.10,177.00,185.11,1.352704e+06,196709.0,186.9931,506.799898,509.961810,506.750965,...,1.389,20929.1,121.4129,,,,,,,
2024-04-04 00:00:00+00:00,185.12,190.22,180.19,184.15,1.252898e+06,185822.0,185.2732,512.477610,512.820204,501.944557,...,1.389,20929.1,121.1646,,,,,,,
2024-04-05 00:00:00+00:00,184.11,185.23,168.00,174.46,1.948604e+06,236453.0,175.1279,503.608731,509.462578,503.168211,...,1.389,20929.1,121.3671,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-12-14 00:00:00+00:00,133.15,133.72,127.80,129.41,7.651389e+05,174922.0,131.0006,686.142492,686.850422,677.169008,...,1.406,22411.0,120.5383,1.724289e+09,0.585737,-0.707250,0.002727,2.943791e+09,0.004656,6.322758e+11
2025-12-15 00:00:00+00:00,129.39,135.43,123.58,127.80,2.124395e+06,316704.0,128.5095,683.719676,683.739636,677.248806,...,1.406,22411.0,120.3501,1.671754e+09,0.569896,-0.754707,0.002644,2.933438e+09,0.004639,6.322760e+11
2025-12-16 00:00:00+00:00,127.79,130.22,124.87,129.11,1.633202e+06,326017.0,127.7210,677.228859,679.073445,672.991380,...,1.406,22411.0,120.2830,1.529147e+09,0.526156,-0.900578,0.002418,2.906264e+09,0.004597,6.322752e+11
2025-12-17 00:00:00+00:00,129.13,134.21,121.26,123.21,2.263454e+06,301056.0,126.4252,677.886913,678.435280,669.222513,...,1.406,22411.0,120.5451,1.439961e+09,0.498376,-1.006517,0.002277,2.889306e+09,0.004569,6.323045e+11
