Downloading Historical Financial Data

In [51]:
import yfinance as yf # for retrieving historical financial data from Yahoo Finance.
import pandas as pd # for data manipulation and analysis

assets = {
    'Equity_Index': "^GSPC", # S&P 500       
    'Stock': "AAPL", # Apple              
    'Currency_Pair': "GBPUSD=X", # GBPUSD   
    'Commodity': "ZC=F", # Corn futures       
    'Crypto': "ETH-USD"  # Ethereum          
}

start_date = "2020-05-01"
end_date = "2025-05-01"

data = yf.download(list(assets.values()), start=start_date, end=end_date)["Close"]


[*********************100%***********************]  5 of 5 completed


Renaming the DataFrame columns from technical ticker symbols to more readable asset names and saving the cleaned DataFrame to a CSV file

In [52]:
data = data.rename(columns={'^GSPC':'S&P 500',
                     'AAPL':'Apple',
                     'GBPUSD=X':'GBPUSD',
                     'ZC=F':'Corn futures',
                     "ETH-USD": 'Ethereum'})

data = data.round(2)

data.to_csv('data.csv')

In [53]:
data.head()

Ticker,Apple,Ethereum,GBPUSD,Corn futures,S&P 500
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-05-01,70.06,214.22,1.26,311.5,2830.71
2020-05-02,,215.33,,,
2020-05-03,,210.93,,,
2020-05-04,71.05,208.17,1.25,310.75,2842.74
2020-05-05,72.12,206.77,1.24,313.0,2868.44


In [54]:
data.tail()

Ticker,Apple,Ethereum,GBPUSD,Corn futures,S&P 500
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-04-26,,1821.88,,,
2025-04-27,,1792.86,,,
2025-04-28,209.86,1798.85,1.33,475.5,5528.75
2025-04-29,210.93,1799.18,1.34,460.5,5560.83
2025-04-30,212.22,1793.78,1.34,467.25,5569.06


Verify that the DataFrame index is correctly parsed as datetime values

In [55]:
data.index

DatetimeIndex(['2020-05-01', '2020-05-02', '2020-05-03', '2020-05-04',
               '2020-05-05', '2020-05-06', '2020-05-07', '2020-05-08',
               '2020-05-09', '2020-05-10',
               ...
               '2025-04-21', '2025-04-22', '2025-04-23', '2025-04-24',
               '2025-04-25', '2025-04-26', '2025-04-27', '2025-04-28',
               '2025-04-29', '2025-04-30'],
              dtype='datetime64[ns]', name='Date', length=1826, freq='D')

The in-sample period is starting on 2020-05-01

In [56]:
data.iloc[:-365]

Ticker,Apple,Ethereum,GBPUSD,Corn futures,S&P 500
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-05-01,70.06,214.22,1.26,311.50,2830.71
2020-05-02,,215.33,,,
2020-05-03,,210.93,,,
2020-05-04,71.05,208.17,1.25,310.75,2842.74
2020-05-05,72.12,206.77,1.24,313.00,2868.44
...,...,...,...,...,...
2024-04-26,168.28,3130.16,1.25,440.00,5099.96
2024-04-27,,3252.17,,,
2024-04-28,,3262.77,,,
2024-04-29,172.46,3215.43,1.25,439.25,5116.17


The out-of-sample period is lasting for 365 days and is starting on 2024-05-01

In [57]:
data.iloc[-365:]

Ticker,Apple,Ethereum,GBPUSD,Corn futures,S&P 500
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-05-01,168.28,2969.78,1.25,443.25,5018.39
2024-05-02,171.99,2988.17,1.25,452.00,5064.20
2024-05-03,182.28,3103.54,1.25,447.00,5127.79
2024-05-04,,3117.58,,,
2024-05-05,,3137.25,,,
...,...,...,...,...,...
2025-04-26,,1821.88,,,
2025-04-27,,1792.86,,,
2025-04-28,209.86,1798.85,1.33,475.50,5528.75
2025-04-29,210.93,1799.18,1.34,460.50,5560.83
