In [9]:
from alpaca.data.historical import CryptoHistoricalDataClient
from alpaca.data.requests import CryptoBarsRequest
from alpaca.data.timeframe import TimeFrame
from datetime import datetime, timedelta
import pandas as pd

# no keys required for crypto data
client = CryptoHistoricalDataClient()

years_back = 3 #Fetch 3 years of data
start_date = datetime.today() - timedelta(days=365 * years_back)
end_date = datetime.today()


request_params = CryptoBarsRequest(
                        symbol_or_symbols=["BTC/USD", "ETH/USD"],
                        timeframe=TimeFrame.Day, # Use TimeFrame.Minute for high-frequency trading
                        start=start_date,
                        end=end_date
                 )

bars = client.get_crypto_bars(request_params)

# Covert to DataFrame
df = bars.df

pd.set_option('display.width', 200)  # Set a large width
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.expand_frame_repr', False)  # Prevent line wrapping

# convert to dataframe
print(df)

# access bars as list - important to note that you must access by symbol key
# even for a single symbol request - models are agnostic to number of symbols
# bars["BTC/USD"]

                                        open          high         low       close       volume  trade_count          vwap
symbol  timestamp                                                                                                         
BTC/USD 2022-03-03 06:00:00+00:00  43343.070  44103.620000  41071.0000  41381.6400   987.526989      45631.0  42387.191245
        2022-03-04 06:00:00+00:00  41379.200  41907.210000  38568.1800  39020.9600  1069.260188      42510.0  40173.915678
        2022-03-05 06:00:00+00:00  39019.550  39720.380000  38816.7400  39505.8000   400.495882      27821.0  39299.400637
        2022-03-06 06:00:00+00:00  39506.490  39619.980000  37577.6200  38028.5200   661.143065      36339.0  38553.880344
        2022-03-07 06:00:00+00:00  38025.810  39552.310000  37166.0100  38363.4100  1052.591573      43036.0  38373.199810
...                                      ...           ...         ...         ...          ...          ...           ...
ETH/USD 2025-02-

## Format Timestamp

In [10]:

# df = df.reset_index()  # This will make timestamp a normal column
# df['timestamp'] = pd.to_datetime(df['timestamp'])  # Convert column to datetime
# df.set_index('timestamp', inplace=True)  # Set it back as index

# # Check for missing dates
# missing_dates = pd.date_range(start=start_date, end=end_date).difference(df.index)
# print("Missing Dates:", missing_dates)

# # Fill missing data if required
# df = df.asfreq('D').ffill()


# print(df)


# Reset index if needed
df = df.reset_index()

# Convert 'timestamp' to datetime if it's not already
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Remove duplicate timestamps (keep the first occurrence)
df = df.drop_duplicates(subset=['timestamp'], keep='first')

# Set 'timestamp' as index
df.set_index('timestamp', inplace=True)

# Ensure the index is sorted
df = df.sort_index()

# Check for missing dates
start_date, end_date = df.index.min(), df.index.max()
missing_dates = pd.date_range(start=start_date, end=end_date).difference(df.index)
print("Missing Dates:", missing_dates)

# Apply asfreq after removing duplicates
df = df.asfreq('D').ffill()

print(df)




Missing Dates: DatetimeIndex(['2022-03-14 06:00:00+00:00', '2022-03-15 06:00:00+00:00', '2022-03-16 06:00:00+00:00', '2022-03-17 06:00:00+00:00', '2022-03-18 06:00:00+00:00', '2022-03-19 06:00:00+00:00',
               '2022-03-20 06:00:00+00:00', '2022-03-21 06:00:00+00:00', '2022-03-22 06:00:00+00:00', '2022-03-23 06:00:00+00:00',
               ...
               '2024-10-25 06:00:00+00:00', '2024-10-26 06:00:00+00:00', '2024-10-27 06:00:00+00:00', '2024-10-28 06:00:00+00:00', '2024-10-29 06:00:00+00:00', '2024-10-30 06:00:00+00:00',
               '2024-10-31 06:00:00+00:00', '2024-11-01 06:00:00+00:00', '2024-11-02 06:00:00+00:00', '2024-11-03 06:00:00+00:00'],
              dtype='datetime64[ns, UTC]', length=714, freq=None)
                            symbol       open        high           low       close       volume  trade_count          vwap
timestamp                                                                                                                  
2022-03-03 