In [5]:
from alpaca_trade_api.rest import REST, TimeFrame
import pandas as pd

In [6]:
# API key and secret
API_KEY = ''
API_SECRET = ''
BASE_URL = 'https://paper-api.alpaca.markets'

# API connection
api = REST(API_KEY, API_SECRET, base_url = BASE_URL, api_version = 'v2')

# storage path
path = "/Users/kaiwentay/Documents/GitHub/Pair_Trading_Fun/Data"

In [9]:
def fetch_data(symbol, start_date, end_date):
    try:
        data = api.get_bars(symbol, TimeFrame.Minute, start_date, end_date, limit=1000000).df
        return data
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return pd.DataFrame()

# Define the symbols and the date range for 3 months
symbols = ['SOXL', 'SOXX']
end_date = '2023-12-01'  # Current date
start_date = '2022-6-01'  # 6 months ago from the end date

# Fetch and store minute data for each symbol
for symbol in symbols:
    print(f"Fetching minute data for {symbol}...")
    minute_data = fetch_data(symbol, start_date, end_date)
    if not minute_data.empty:
        # Save to CSV
        csv_path = f"{path}/{symbol}_minute_data.csv"
        minute_data.to_csv(csv_path)
        print(f"Data saved to {csv_path}")
    else:
        print(f"No data fetched for {symbol}")  

Fetching minute data for SOXL...
Error fetching data for SOXL: 400 Client Error: Bad Request for url: https://data.alpaca.markets/v2/stocks/SOXL/bars?timeframe=1Min&adjustment=raw&start=2022-6-01&end=2023-12-01&limit=10000
No data fetched for SOXL
Fetching minute data for SOXX...
Error fetching data for SOXX: 400 Client Error: Bad Request for url: https://data.alpaca.markets/v2/stocks/SOXX/bars?timeframe=1Min&adjustment=raw&start=2022-6-01&end=2023-12-01&limit=10000
No data fetched for SOXX


In [4]:
# combine the data into one dataframe (taking only close, and noting that timestamps might be different)
df = pd.DataFrame()
for symbol in symbols:
    csv_path = f"{path}/{symbol}_minute_data.csv"
    df_temp = pd.read_csv(csv_path, index_col='timestamp', parse_dates=True)
    df_temp = df_temp[['close']]
    df_temp.columns = [symbol]

    # Forward fill missing values
    df_temp = df_temp.ffill()

    df = pd.concat([df, df_temp], axis=1)

# Forward fill any remaining missing values in the combined DataFrame
df = df.ffill()
df.dropna(inplace = True)

# save the combined data
df.to_csv(f"{path}/minute_data.csv")