In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import datetime
import yfinance as yf
from tqdm import tqdm

In [4]:
tickers_names = { "CL=F": "Crude Oil",
                  "HO=F": "Heating Oil",
                  "NG=F": "Natural Gas",
                  "RB=F": "RBOB Gasoline",
                  "BZ=F": "Brent Crude Oil"}

In [5]:
# 1. Function to fetch data for a given ticker symbol and commodity name
def fetch_data(ticker_symbol, commodity_name):
    ticker = yf.Ticker(ticker_symbol)
    today = datetime.date.today().strftime('%Y-%m-%d')
    data = ticker.history(start="1900-01-01", end=today)
    data.reset_index(inplace=True)
    
    # Check if the 'Date' column is of datetime type
    if pd.api.types.is_datetime64_any_dtype(data['Date']):
        data['Date'] = data['Date'].dt.strftime('%Y-%m-%d')
    
    # Drop the unnecessary columns
    data.drop(columns=['Dividends', 'Stock Splits', 'Adj Close'], inplace=True, errors='ignore')
    
    data['ticker'] = ticker_symbol  
    data['commodity'] = commodity_name  
    
    # Convert other column names to lowercase
    data.columns = [col.lower() for col in data.columns]
    
    # Reorder columns to have 'ticker' and 'commodity' at the beginning
    data = data[['ticker', 'commodity'] + [col for col in data.columns if col not in ['ticker', 'commodity']]]
    
    return data


# 2. Loop over each symbol and append data to the master DataFrame
all_data = []
for symbol, name in tqdm(tickers_names.items(), desc="Fetching data"):  # tqdm progress bar!
    all_data.append(fetch_data(symbol, name))

# Concatenate all the individual datasets into one
master_data = pd.concat(all_data, ignore_index=True)

Fetching data:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching data: 100%|██████████| 5/5 [00:01<00:00,  3.77it/s]


In [6]:
master_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28789 entries, 0 to 28788
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   ticker     28789 non-null  object 
 1   commodity  28789 non-null  object 
 2   date       28789 non-null  object 
 3   open       28789 non-null  float64
 4   high       28789 non-null  float64
 5   low        28789 non-null  float64
 6   close      28789 non-null  float64
 7   volume     28789 non-null  int64  
dtypes: float64(4), int64(1), object(3)
memory usage: 1.8+ MB
