# 1. Backfill Yahoo Finance Data
Fetch historical OHLCV data for QQQ, XLK, and VIX

In [None]:
import sys
sys.path.append('..')

import pandas as pd
from utils.data_fetchers import fetch_yahoo_data, validate_ohlcv_data
from utils.hopsworks_helpers import get_feature_store, create_feature_group
from dotenv import load_dotenv
import yaml

load_dotenv()

# Load config
with open('../config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

## Fetch QQQ Data

In [None]:
start_date = config['data']['start_date']
end_date = config['data']['end_date']

qqq_data = fetch_yahoo_data('QQQ', start_date, end_date)
validate_ohlcv_data(qqq_data)

print(f"QQQ data shape: {qqq_data.shape}")
qqq_data.head()

## Fetch XLK Data (Technology Sector ETF)

In [None]:
xlk_data = fetch_yahoo_data('XLK', start_date, end_date)
validate_ohlcv_data(xlk_data)

print(f"XLK data shape: {xlk_data.shape}")
xlk_data.head()

## Fetch VIX Data (Volatility Index)

In [None]:
vix_data = fetch_yahoo_data('^VIX', start_date, end_date)

print(f"VIX data shape: {vix_data.shape}")
vix_data.head()

## Upload to Hopsworks Feature Store

In [None]:
# Connect to Hopsworks
fs = get_feature_store()

# Prepare data for upload (rename columns with ticker prefix)
qqq_data_fg = qqq_data.copy()
qqq_data_fg.columns = ['date'] + [f'qqq_{col}' for col in qqq_data.columns if col != 'date']

xlk_data_fg = xlk_data.copy()
xlk_data_fg.columns = ['date'] + [f'xlk_{col}' for col in xlk_data.columns if col != 'date']

vix_data_fg = vix_data.copy()
vix_data_fg.columns = ['date'] + [f'vix_{col}' for col in vix_data.columns if col != 'date']

In [None]:
# Create feature groups
qqq_fg = create_feature_group(
    fs, 
    name='qqq_raw',
    df=qqq_data_fg,
    primary_key=['date'],
    description='Raw OHLCV data for QQQ ETF'
)

xlk_fg = create_feature_group(
    fs,
    name='xlk_raw',
    df=xlk_data_fg,
    primary_key=['date'],
    description='Raw OHLCV data for XLK sector ETF'
)

vix_fg = create_feature_group(
    fs,
    name='vix_raw',
    df=vix_data_fg,
    primary_key=['date'],
    description='Raw data for VIX volatility index'
)

print("Yahoo Finance data uploaded to Hopsworks!")