## 00- ETL : Get NDX, SP500, Gold, BTCUSD and ETHUSD candlesticks 

Note: BTCUSD and ETHUSD candlesticks are obtained from cryptocompare.com

This section focuses on retrieving Bitcoin and Ether's OHLCV time series from cryptocompare.com using the ETL process. 

The main function responsible for this task is 'get_crypto_OHLCV_cc()'. 

**get_crypto_OHLCV_cc()**
> - The function checks for an existing .csv file in the 'Resources' folder and then compares the latest information extracted to complete the .csv with the last available information. 
> - The resulting merged dataset is then saved in the 'Resources' folder. 
> - It can be used by any crypto pair available on cryptocompare.com

*To-do:* 
> - IMPROVEMENT: instead of using a .csv file as a repository, it would be better to use a sqlite or mongodb database.

*Sources:* 
> - https://towardsdatascience.com/obtaining-historical-and-real-time-crypto-data-with-very-simple-web-programming-7b481f153630

In [24]:
# Import Dependencies

# Libraries for data handling and time manipulation
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import time
import pytz

# Libraries for handling system environment files and variables
import os
from pathlib import Path
from dotenv import load_dotenv

# Libraries for API calls
import requests  # HTTP library
import json      # JSON handling library

# Custom function from the functions_libraryfolder
from functions_library.functions import DFinfo


In [25]:

load_dotenv()
ccompare_api_key = os.getenv('CCOMPARE_API_KEY')


In [26]:
# Function to get crypto OHLCV data from CryptoCompare

def get_crypto_OHLCV_cc(crypto='ETH', fiat='USD', limit =2000, toTS=-1,api_key = ccompare_api_key):
    url = f"https://min-api.cryptocompare.com/data/v2/histohour?fsym={crypto}&tsym={fiat}&limit={limit}&toTs={toTS}&api_key={api_key}"

    r = requests.get(url, headers={"User-Agent": "XY"})

    if r.status_code == 200:
        with open(f"resources/{crypto}{fiat}_ccompare.json", "wb") as file:
            file.write(r.content)
        print(f"{crypto}-{fiat} JSON data downloaded successfully.")
    else:
        print(f"Failed to download {crypto}-{fiat} data.")
        print(r)

    # # Load the JSON data from the file
    pair_json = Path(f"resources/{crypto}{fiat}_ccompare.json")
    list_pair_df = pd.read_json(pair_json)
    ohlcv_list = list_pair_df['Data']['Data']

    # Create a DataFrame
    pair_1H_df = pd.DataFrame(ohlcv_list)

    # Convert 'time' column to datetime format
    pair_1H_df['timestamp'] = pd.to_datetime(pair_1H_df['time'], unit='s')
    pair_1H_df.drop(columns=['time'], inplace=True)

    # Set 'time' as the index
    pair_1H_df.set_index('timestamp', inplace=True)
    pair_1H_df.index = pair_1H_df.index.tz_localize('UTC')

    # Rename columns to match OHLCV format
    pair_1H_df.rename(columns={'volumefrom': 'volume'}, inplace=True)

    # Reorder columns
    pair_1H_df = pair_1H_df[['open', 'high', 'low', 'close', 'volume']]

    return pair_1H_df


In [27]:
test = get_crypto_OHLCV_cc(crypto='ETH', fiat='USD', limit =2000, toTS=-1,api_key = ccompare_api_key)
DFinfo(test)


ETH-USD JSON data downloaded successfully.


Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-09-10 12:00:00+00:00,2351.26,2364.47,2350.1,2350.14,11780.76
2024-09-10 13:00:00+00:00,2350.14,2350.46,2329.86,2331.79,26065.33
2024-09-10 14:00:00+00:00,2331.79,2347.04,2321.2,2336.78,31628.87


Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-12-02 18:00:00+00:00,3566.34,3634.29,3564.82,3623.67,55429.94
2024-12-02 19:00:00+00:00,3623.67,3623.79,3584.89,3611.95,43504.23
2024-12-02 20:00:00+00:00,3611.95,3631.49,3606.36,3623.72,25964.03


In [28]:
# Function to download and merge crypto OHLCV data from CryptoCompare

def download_and_merge_crypto_OHLCV_cc(crypto='ETH', fiat='USD'):
    # initialize variables and list to store dataframes
    limit=2000

    # Convert the current time to UTC
    now_utc = datetime.now(pytz.timezone('UTC'))

    # Check if the CSV file exists
    csv_path = f'Resources/{crypto}{fiat}_1H_ccompare.csv'

    if Path(csv_path).is_file():

        # Load the CSV file into a DataFrame
        result_df = pd.read_csv(csv_path, parse_dates=['timestamp'], index_col='timestamp')

        # Convert the latest timestamp to UTC
        latest_timestamp = result_df.index.max().tz_convert('UTC')
        first_timestamp = result_df.index.min().tz_convert('UTC')

        # Calculate the number of hours between now (in UTC) and the latest entry in the CSV file (in UTC)
        hours_since_latest = (now_utc - latest_timestamp).total_seconds() / 3600
        total_batches = int(hours_since_latest / limit) + 1
        hours_to_download = int(hours_since_latest)

        # Print information about the CSV file
        print(f"Detecting OHLCV data that was previously downloaded:")
        print(f" > latest timestamp available (UTC): {latest_timestamp}")
        print(f" > first timestamp available (UTC): {first_timestamp}")
        print("")

    else:
        # Initialize an empty DataFrame if the CSV file does not exist
        result_df = pd.DataFrame()

        # Calculate the number of batches needed from January 2017 until now
        start_date = datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.UTC)
        hours_since_start = (now_utc - start_date).total_seconds() / 3600
        hours_since_start = (now_utc - start_date).total_seconds() / 3600
        total_batches = int(hours_since_start / limit) + 1
        hours_to_download = int(hours_since_start)

    if hours_to_download == 0:
        print(f"Already up to date! No new {crypto}-{fiat} data to download.")

    else:
        print(f"Downloading {total_batches} batches of {limit} hours each from CryptoCompare.")
        print(f"for completing the {hours_to_download} missing hours of {crypto}-{fiat}")

        # We will start downloading from the latest timestamp available in the API (-1)
        batch_timestamp = -1

        for batch in range(total_batches):
            # Download data for the current batch
            df_batch = get_crypto_OHLCV_cc(crypto=crypto, fiat=fiat, limit=limit, toTS=batch_timestamp, api_key=ccompare_api_key)

            # Append the new batch to the DataFrame
            result_df = pd.concat([result_df, df_batch])

            # Check if the batch was downloaded successfully
            if not df_batch.empty:
                # Update the timestamp for the next batch
                earliest_timestamp = df_batch.index.min()
                latest_timestamp = df_batch.index.max()

                # Convert the datetime object to Unix time
                earliest_timestamp_unix = int(time.mktime(earliest_timestamp.timetuple()))
                batch_timestamp = earliest_timestamp_unix
                print(f"Batch {batch + 1}/{total_batches} downloaded successfully.")

            else:
                print(f"Failed to download batch {batch + 1}.")

    print(f"Latest {crypto}-{fiat} OHLCV dataset updated.")

    # Remove rows with missing values
    result_df.dropna(inplace=True)

    # Sort by timestamp and remove duplicates
    result_df = result_df.sort_index().drop_duplicates()

    # Checking if there is any misisng time steps
    ### Create a complete datetime index with 1-hour frequency
    complete_index = pd.date_range(start=result_df.index.min(), end=result_df.index.max(), freq='H')

    ### Find the difference between the complete index and the existing index
    missing_timestamps = complete_index.difference(result_df.index)

    if missing_timestamps.empty:
        print("No missing timestamps found.")

        # Save the resulting dataframe as both .csv and .json
        # When saving to CSV, reset the index
        result_df.reset_index().to_csv(csv_path, header=True, index=False)
        result_df.to_json(f'Resources/{crypto}{fiat}_1H_ccompare.json', orient='records', date_format='iso')

        print(f"All the following {crypto}-{fiat} OHLCV info has been saved to disk and is available now =)")
        DFinfo(result_df)

    else:
        print(f"{len(missing_timestamps)} missing timestamps found. Aborting - Please try again.")

    return result_df


In [29]:
# Download and merge ETH-USD data from CryptoCompare
ETHUSD_dataset = download_and_merge_crypto_OHLCV_cc(crypto='ETH', fiat='USD')

Detecting OHLCV data that was previously downloaded:
 > latest timestamp available (UTC): 2024-12-02 20:00:00+00:00
 > first timestamp available (UTC): 2016-11-01 05:00:00+00:00

Already up to date! No new ETH-USD data to download.
Latest ETH-USD OHLCV dataset updated.
No missing timestamps found.


  complete_index = pd.date_range(start=result_df.index.min(), end=result_df.index.max(), freq='H')


All the following ETH-USD OHLCV info has been saved to disk and is available now =)


Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-11-01 05:00:00+00:00,10.65,10.75,10.61,10.74,12279.18
2016-11-01 06:00:00+00:00,10.74,10.79,10.65,10.67,19010.58
2016-11-01 07:00:00+00:00,10.67,10.82,10.61,10.62,49508.65


Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-12-02 18:00:00+00:00,3566.34,3634.29,3564.82,3623.67,55429.94
2024-12-02 19:00:00+00:00,3623.67,3623.79,3584.89,3611.95,43504.23
2024-12-02 20:00:00+00:00,3611.95,3629.39,3606.36,3625.52,16434.65


In [30]:
ETHUSD_dataset

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-11-01 05:00:00+00:00,10.65,10.75,10.61,10.74,12279.18
2016-11-01 06:00:00+00:00,10.74,10.79,10.65,10.67,19010.58
2016-11-01 07:00:00+00:00,10.67,10.82,10.61,10.62,49508.65
2016-11-01 08:00:00+00:00,10.62,10.66,10.47,10.50,101202.05
2016-11-01 09:00:00+00:00,10.50,10.75,10.45,10.51,76259.74
...,...,...,...,...,...
2024-12-02 16:00:00+00:00,3674.66,3681.36,3665.06,3679.95,8086.81
2024-12-02 17:00:00+00:00,3627.28,3646.34,3556.46,3566.34,94869.95
2024-12-02 18:00:00+00:00,3566.34,3634.29,3564.82,3623.67,55429.94
2024-12-02 19:00:00+00:00,3623.67,3623.79,3584.89,3611.95,43504.23


In [31]:
# Download and merge ETH-USD data from CryptoCompare
BTCUSD_dataset = download_and_merge_crypto_OHLCV_cc(crypto='BTC', fiat='USD')


Detecting OHLCV data that was previously downloaded:
 > latest timestamp available (UTC): 2024-12-02 20:00:00+00:00
 > first timestamp available (UTC): 2016-10-24 07:00:00+00:00

Already up to date! No new BTC-USD data to download.
Latest BTC-USD OHLCV dataset updated.
No missing timestamps found.


  complete_index = pd.date_range(start=result_df.index.min(), end=result_df.index.max(), freq='H')


All the following BTC-USD OHLCV info has been saved to disk and is available now =)


Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-10-24 07:00:00+00:00,651.41,651.91,647.62,648.31,892.71
2016-10-24 08:00:00+00:00,648.31,651.51,648.01,650.41,1730.28
2016-10-24 09:00:00+00:00,650.41,649.6,646.81,647.14,1803.12


Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-12-02 18:00:00+00:00,94630.67,95882.87,94507.43,95675.9,3910.71
2024-12-02 19:00:00+00:00,95675.9,95868.96,95180.39,95827.81,2925.75
2024-12-02 20:00:00+00:00,95827.81,96353.41,95792.12,96193.82,1198.69


In [32]:
# display the resulting dataframe
BTCUSD_dataset

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-10-24 07:00:00+00:00,651.41,651.91,647.62,648.31,892.71
2016-10-24 08:00:00+00:00,648.31,651.51,648.01,650.41,1730.28
2016-10-24 09:00:00+00:00,650.41,649.60,646.81,647.14,1803.12
2016-10-24 10:00:00+00:00,647.14,649.30,646.13,648.09,1557.74
2016-10-24 11:00:00+00:00,648.09,652.81,648.74,651.84,1320.71
...,...,...,...,...,...
2024-12-02 16:00:00+00:00,97132.03,97432.99,96991.38,97395.22,608.40
2024-12-02 17:00:00+00:00,96229.32,96629.05,94419.96,94630.67,5968.73
2024-12-02 18:00:00+00:00,94630.67,95882.87,94507.43,95675.90,3910.71
2024-12-02 19:00:00+00:00,95675.90,95868.96,95180.39,95827.81,2925.75


In [33]:
# Import Dependencies
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import time
import pytz
import os
from pathlib import Path
import yfinance as yf

def get_market_OHLCV_yf(symbol='^GSPC', start='2016-10-24'):
    """
    Get daily market data from Yahoo Finance
    symbol: '^GSPC' for S&P 500 or '^NDX' for NASDAQ 100
    start: start date in 'YYYY-MM-DD' format
    """
    try:
        # Download daily data
        ticker = yf.Ticker(symbol)
        df = ticker.history(start=start, interval='1d')
        
        if df.empty:
            print(f"No data retrieved for {symbol}")
            return None
            
        # Keep only OHLCV columns
        df = df[['Open', 'High', 'Low', 'Close', 'Volume']]
        
        # Rename columns to match our format
        df.columns = df.columns.str.lower()
        
        # Make sure index is UTC
        if df.index.tz is None:
            df.index = df.index.tz_localize('UTC')
        elif df.index.tz != pytz.UTC:
            df.index = df.index.tz_convert('UTC')
            
        print(f"{symbol} data downloaded successfully.")
        return df
        
    except Exception as e:
        print(f"Error downloading {symbol} data: {str(e)}")
        return None

def download_and_merge_market_OHLCV_yf(symbol='^GSPC', start='2016-10-24'):
    """
    Download and merge daily market OHLCV data from Yahoo Finance
    Handles existing CSV files and updates with new data
    """
    # Convert symbol to a filename-friendly format
    filename = symbol.replace('^', '')
    csv_path = f'Resources/{filename}_1D_yfinance.csv'
    
    if Path(csv_path).is_file():
        # Load existing data
        result_df = pd.read_csv(csv_path, parse_dates=['timestamp'], index_col='timestamp')
        result_df.index = result_df.index.tz_localize('UTC')
        
        latest_timestamp = result_df.index.max()
        first_timestamp = result_df.index.min()
        
        print(f"Detecting OHLCV data that was previously downloaded:")
        print(f" > latest timestamp available (UTC): {latest_timestamp}")
        print(f" > first timestamp available (UTC): {first_timestamp}")
        print("")
        
        # Get new data from the last available date
        new_data = get_market_OHLCV_yf(symbol=symbol, start=latest_timestamp.strftime('%Y-%m-%d'))
        
        if new_data is not None:
            # Combine existing and new data
            result_df = pd.concat([result_df, new_data])
            # Remove duplicates and sort
            result_df = result_df[~result_df.index.duplicated(keep='last')].sort_index()
    else:
        # Get all available data if no CSV exists
        result_df = get_market_OHLCV_yf(symbol=symbol, start=start)
        
        if result_df is None:
            return None
    
    # Remove any NaN values
    result_df.dropna(inplace=True)
    
    # Check for missing timestamps (business days only)
    business_days = pd.date_range(start=result_df.index.min(), 
                                end=result_df.index.max(), 
                                freq='B',  # Business days
                                tz='UTC')
    
    missing_days = business_days.difference(result_df.index)
    
    if len(missing_days) == 0:
        print("No missing business days found.")
        
        # Save individual file
        result_df.reset_index().to_csv(csv_path, index=False)
        result_df.to_json(f'Resources/{filename}_1D_yfinance.json', 
                         orient='records', 
                         date_format='iso')
        
        # Create merged file with crypto data if available
        try:
            # Load crypto data and resample to daily
            btc_data = pd.read_csv('Resources/BTCUSD_1H_ccompare.csv', 
                                 parse_dates=['timestamp'])
            eth_data = pd.read_csv('Resources/ETHUSD_1H_ccompare.csv', 
                                 parse_dates=['timestamp'])
            
            # Resample crypto data to daily (using last hour of each day)
            btc_daily = btc_data.set_index('timestamp').resample('D').last()
            eth_daily = eth_data.set_index('timestamp').resample('D').last()
            
            # Add suffix to columns
            btc_daily = btc_daily.add_suffix('_btc')
            eth_daily = eth_daily.add_suffix('_eth')
            result_df = result_df.add_suffix(f'_{filename.lower()}')
            
            # Merge all datasets
            merged_df = pd.concat([btc_daily, eth_daily, result_df], axis=1)
            
            # Save merged file
            merged_df.reset_index().to_csv('Resources/merged_market_data_1D.csv', index=False)
            print(f"\nMerged data file created successfully with shape: {merged_df.shape}")
            
        except FileNotFoundError:
            print("\nCrypto data files not found. Only saving individual market data.")
        
        print(f"\nAll {symbol} OHLCV info has been saved to disk and is available now =)")
        print("\nFirst few rows:")
        print(result_df.head())
        print("\nLast few rows:")
        print(result_df.tail())
        print("\nDataset Info:")
        print(result_df.info())
        
    else:
        print(f"{len(missing_days)} missing business days found.")
        print("Note: Some of these might be holidays when markets were closed.")
    
    return result_df

# Example usage:
if __name__ == "__main__":
    # Download S&P 500 data
    sp500_data = download_and_merge_market_OHLCV_yf(symbol='^GSPC', start='2016-10-24')
    
    # Download NASDAQ 100 data
    nasdaq_data = download_and_merge_market_OHLCV_yf(symbol='^NDX', start='2016-10-24')

^GSPC data downloaded successfully.
781 missing business days found.
Note: Some of these might be holidays when markets were closed.
^NDX data downloaded successfully.
781 missing business days found.
Note: Some of these might be holidays when markets were closed.


In [34]:
sp500_data =download_and_merge_market_OHLCV_yf(symbol='^GSPC', start='2016-10-24')

^GSPC data downloaded successfully.
781 missing business days found.
Note: Some of these might be holidays when markets were closed.


In [35]:
sp500_data

Unnamed: 0_level_0,open,high,low,close,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-10-24 04:00:00+00:00,2148.500000,2154.790039,2146.909912,2151.330078,3359950000
2016-10-25 04:00:00+00:00,2149.719971,2151.439941,2141.929932,2143.159912,3756200000
2016-10-26 04:00:00+00:00,2136.969971,2145.729980,2131.590088,2139.429932,3778120000
2016-10-27 04:00:00+00:00,2144.060059,2147.129883,2132.520020,2133.040039,4209400000
2016-10-28 04:00:00+00:00,2132.229980,2140.719971,2119.360107,2126.409912,4028270000
...,...,...,...,...,...
2024-11-25 05:00:00+00:00,5992.279785,6020.750000,5963.910156,5987.370117,5633150000
2024-11-26 05:00:00+00:00,6000.029785,6025.419922,5992.270020,6021.629883,3835170000
2024-11-27 05:00:00+00:00,6014.109863,6020.160156,5984.870117,5998.740234,3363340000
2024-11-29 05:00:00+00:00,6003.979980,6044.169922,6003.979980,6032.379883,2444420000


In [36]:
ndx100=download_and_merge_market_OHLCV_yf(symbol='^NDX', start='2016-10-24')

^NDX data downloaded successfully.
781 missing business days found.
Note: Some of these might be holidays when markets were closed.


In [37]:
def download_and_merge_gold_OHLCV_yf(symbol='GC=F', start='2016-10-24'):
    """
    Download and merge daily gold OHLCV data from Yahoo Finance
    Handles existing CSV files and updates with new data
    """
    # Convert symbol to a filename-friendly format
    filename = 'GOLD'  # Using GOLD instead of GC=F for filename
    csv_path = f'Resources/{filename}_1D_yfinance.csv'
    
    if Path(csv_path).is_file():
        # Load existing data
        result_df = pd.read_csv(csv_path, parse_dates=['timestamp'], index_col='timestamp')
        # Handle timezone - convert if tz-aware, localize if naive
        if result_df.index.tz is None:
            result_df.index = result_df.index.tz_localize('UTC')
        elif result_df.index.tz != pytz.UTC:
            result_df.index = result_df.index.tz_convert('UTC')
        
        latest_timestamp = result_df.index.max()
        first_timestamp = result_df.index.min()
        
        print(f"Detecting Gold OHLCV data that was previously downloaded:")
        print(f" > latest timestamp available (UTC): {latest_timestamp}")
        print(f" > first timestamp available (UTC): {first_timestamp}")
        print("")
        
        # Get new data from the last available date
        ticker = yf.Ticker(symbol)
        new_data = ticker.history(start=latest_timestamp.strftime('%Y-%m-%d'), interval='1d')
        
        if not new_data.empty:
            # Keep only OHLCV columns and rename
            new_data = new_data[['Open', 'High', 'Low', 'Close', 'Volume']]
            new_data.columns = new_data.columns.str.lower()
            
            # Handle timezone for new data
            if new_data.index.tz is None:
                new_data.index = new_data.index.tz_localize('UTC')
            elif new_data.index.tz != pytz.UTC:
                new_data.index = new_data.index.tz_convert('UTC')
            
            # Combine existing and new data
            result_df = pd.concat([result_df, new_data])
            # Remove duplicates and sort
            result_df = result_df[~result_df.index.duplicated(keep='last')].sort_index()
    else:
        # Get all available data if no CSV exists
        ticker = yf.Ticker(symbol)
        result_df = ticker.history(start=start, interval='1d')
        
        if result_df.empty:
            print(f"No data retrieved for Gold")
            return None
            
        # Keep only OHLCV columns and rename
        result_df = result_df[['Open', 'High', 'Low', 'Close', 'Volume']]
        result_df.columns = result_df.columns.str.lower()
        
        # Handle timezone for new data
        if result_df.index.tz is None:
            result_df.index = result_df.index.tz_localize('UTC')
        elif result_df.index.tz != pytz.UTC:
            result_df.index = result_df.index.tz_convert('UTC')
    
    # Remove any NaN values
    result_df.dropna(inplace=True)
    
    # Create business day index in UTC
    start_date = result_df.index.min().tz_localize(None)  # Remove timezone temporarily
    end_date = result_df.index.max().tz_localize(None)    # Remove timezone temporarily
    
    business_days = pd.date_range(
        start=start_date,
        end=end_date,
        freq='B'
    ).tz_localize('UTC')  # Add timezone back
    
    # Convert result_df index to match business_days timezone
    if result_df.index.tz != pytz.UTC:
        result_df.index = result_df.index.tz_convert('UTC')
    
    missing_days = business_days.difference(result_df.index)
    
    if len(missing_days) == 0:
        print("No missing business days found.")
    else:
        print(f"{len(missing_days)} missing business days found.")
        print("Note: Some of these might be holidays when markets were closed.")
    
    # Save the data
    result_df.reset_index().to_csv(csv_path, index=False)
    result_df.to_json(f'Resources/{filename}_1D_yfinance.json', 
                     orient='records', 
                     date_format='iso')
    
    print(f"\nGold OHLCV data has been saved to disk and is available now =)")
    print("\nFirst few rows:")
    print(result_df.head())
    print("\nLast few rows:")
    print(result_df.tail())
    
    return result_df


In [38]:
gold_data = download_and_merge_gold_OHLCV_yf(symbol='GC=F', start='2016-10-24')

ValueError: Missing column provided to 'parse_dates': 'timestamp'

In [23]:
gold_data

Unnamed: 0_level_0,open,high,low,close,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-10-24 04:00:00+00:00,1269.199951,1269.199951,1261.800049,1262.000000,42
2016-10-25 04:00:00+00:00,1264.099976,1274.099976,1264.099976,1271.900024,61
2016-10-26 04:00:00+00:00,1274.000000,1274.000000,1264.800049,1264.800049,279
2016-10-27 04:00:00+00:00,1269.400024,1269.400024,1267.900024,1267.900024,338
2016-10-28 04:00:00+00:00,1270.300049,1281.500000,1262.000000,1275.500000,560
...,...,...,...,...,...
2024-11-26 05:00:00+00:00,2625.600098,2625.600098,2620.300049,2620.300049,177858
2024-11-27 05:00:00+00:00,2633.500000,2657.899902,2627.199951,2639.899902,61653
2024-11-28 05:00:00+00:00,2636.399902,2648.600098,2620.699951,2639.699951,61653
2024-11-29 05:00:00+00:00,2636.399902,2664.300049,2620.699951,2657.000000,61653
