#### Data Acquisition & Environment Setup

##### Environment Variables & Imports

In [14]:
import os
import pandas as pd
from dotenv import load_dotenv
from datetime import datetime, timedelta
from alpaca.data.historical import CryptoHistoricalDataClient
from alpaca.data.requests import CryptoBarsRequest
from alpaca.data.timeframe import TimeFrame

# Load credentials from .env
load_dotenv()
ALPACA_KEY = os.getenv('ALPACA_PAPER_KEY')
ALPACA_SECRET = os.getenv('ALPACA_PAPER_SECRET')

# We use the official alpaca-py SDK
client = CryptoHistoricalDataClient(api_key=ALPACA_KEY, secret_key=ALPACA_SECRET)

print("Setup Complete. Authentication successful.")

Setup Complete. Authentication successful.


#### Define Competitor Sectors

In [15]:
# Defining sectors to track 'Competitor Intelligence'
# Symbols must be in the format 'SYMBOL/USD' for Alpaca
sectors = {
    "Layer_1": ["BTC/USD", "ETH/USD", "SOL/USD", "ADA/USD"],
    "DeFi": ["UNI/USD", "AAVE/USD", "MKR/USD"],
    "AI_Tokens": ["FET/USD", "RNDR/USD", "NEAR/USD"],
    "Meme": ["DOGE/USD", "SHIB/USD", "PEPE/USD"]
}

# Flatten the list for the API request
all_symbols = [symbol for sublist in sectors.values() for symbol in sublist]

print(f"Tracking {len(all_symbols)} assets across {len(sectors)} sectors.")

Tracking 13 assets across 4 sectors.


#### Fetch Historical Data

In [16]:
# Define timeframe: Last 1 year
end_date = datetime.now()
start_date = end_date - timedelta(days=365)

# Create the request
request_params = CryptoBarsRequest(
    symbol_or_symbols=all_symbols,
    timeframe=TimeFrame.Day,
    start=start_date,
    end=end_date
)

# Execute request and convert to DataFrame
# .df returns a MultiIndex DataFrame (symbol, timestamp)
bars = client.get_crypto_bars(request_params)
df = bars.df

# Reset index to make 'symbol' and 'timestamp' regular columns for easier BI manipulation
df = df.reset_index()

print("Data successfully retrieved.")
df.head()

Data successfully retrieved.


Unnamed: 0,symbol,timestamp,open,high,low,close,volume,trade_count,vwap
0,BTC/USD,2025-01-28 00:00:00+00:00,102120.345,103703.135,100259.877,101343.35,3.303055,350.0,102723.462154
1,BTC/USD,2025-01-29 00:00:00+00:00,101336.85,104769.0065,101319.85,103737.25,1.703058,331.0,102911.488957
2,BTC/USD,2025-01-30 00:00:00+00:00,103754.0245,106398.52224,103274.793,104805.7615,1.22292,811.0,105326.680892
3,BTC/USD,2025-01-31 00:00:00+00:00,104786.493,106027.53,101544.669,102433.53,2.318476,297.0,103266.345077
4,BTC/USD,2025-02-01 00:00:00+00:00,102374.098,102828.21,100301.25,100668.6635,1.267548,173.0,101971.700572


#### Data Cleaning & Export

In [18]:
# 1. Map symbols back to their Sectors for BI grouping
symbol_to_sector = {symbol: sector for sector, symbols in sectors.items() for symbol in symbols}
df['sector'] = df['symbol'].map(symbol_to_sector)

# 2. Basic cleaning
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values(['symbol', 'timestamp'])

# 3. Save to CSV for the next notebook
df.to_csv('processed_crypto_data.csv', index=False)

print(f"Saved {len(df)} rows to 'processed_crypto_data.csv'.")

Saved 3140 rows to 'processed_crypto_data.csv'.
