# Loading Dataset

In [None]:
import yfinance as yf
import pandas as pd

## Bitcoin data

In [28]:
# Define the date range
start_date = "2014-09-17"
end_date = "2024-06-14"

btc_data = yf.download("BTC-USD", start=start_date, end=end_date)

# Reset the index to have 'Date' as a column
btc_data.reset_index(inplace=True)

# Drop the 'Adj Close' column
btc_data.drop(columns=['Adj Close'], inplace=True)

# Display the first few rows of the data
print("Btc Data:")
print(btc_data.head())

# Save the data to a CSV file
btc_data.to_csv("btc_data.csv", index=False)


[*********************100%%**********************]  1 of 1 completed

Btc Data:
        Date        Open        High         Low       Close    Volume
0 2014-09-17  465.864014  468.174011  452.421997  457.334015  21056800
1 2014-09-18  456.859985  456.859985  413.104004  424.440002  34483200
2 2014-09-19  424.102997  427.834991  384.532013  394.795990  37919700
3 2014-09-20  394.673004  423.295990  389.882996  408.903992  36863600
4 2014-09-21  408.084991  412.425995  393.181000  398.821014  26580100





## Dollar index data

In [30]:
# Function to get historical DXY data
def get_historical_dxy_data(start_date, end_date):
    dxy = yf.Ticker("DX-Y.NYB")
    hist = dxy.history(start=start_date, end=end_date)
    hist.reset_index(inplace=True)
    hist['Date'] = pd.to_datetime(hist['Date']).dt.strftime('%d/%m/%Y')
    return hist[['Date', 'Close']]

# Fetch historical DXY data
dxy_data = get_historical_dxy_data(start_date, end_date)

# Display the first few rows of the data
print("DXY Data:")
print(dxy_data.head())

# Save the DXY data to a CSV file
dxy_data.to_csv('dxy_data.csv', index=False)


DXY Data:
         Date      Close
0  17/09/2014  84.699997
1  18/09/2014  84.320000
2  19/09/2014  84.800003
3  22/09/2014  84.669998
4  23/09/2014  84.660004


## Gold data

In [32]:
# Function to get historical gold data
def get_historical_gold_data(start_date, end_date):
    gold = yf.Ticker("GC=F")
    hist = gold.history(start=start_date, end=end_date)
    hist.reset_index(inplace=True)
    hist['Date'] = pd.to_datetime(hist['Date']).dt.strftime('%d/%m/%Y')
    return hist[['Date', 'Close']]

# Fetch historical gold data
gold_data = get_historical_gold_data(start_date, end_date)

# Display the first few rows of the data
print("Gold Data:")
print(gold_data.head())

# Save the gold data to a CSV file
gold_data.to_csv('gold_data.csv', index=False)

Gold Data:
         Date        Close
0  17/09/2014  1234.400024
1  18/09/2014  1225.699951
2  19/09/2014  1215.300049
3  22/09/2014  1216.800049
4  23/09/2014  1221.000000


## Crude oil data

In [34]:
# Function to get historical crude oil price data
def get_historical_crude_oil_data(start_date, end_date):
    oil = yf.Ticker("CL=F")
    hist = oil.history(start=start_date, end=end_date)
    return hist

# Fetch historical crude oil price data
oil_data = get_historical_crude_oil_data(start_date, end_date)

# Convert the index to Date column
oil_data.reset_index(inplace=True)

# Select the Date and Close columns
oil_data = oil_data[['Date', 'Close']]
oil_data.rename(columns={'Close': 'Crude_Oil_Price'}, inplace=True)

# Convert Date column to naive datetime (remove timezone)
oil_data['Date'] = oil_data['Date'].dt.tz_convert(None)
oil_data['Date'] = pd.to_datetime(oil_data['Date'])
oil_data['Date'] = oil_data['Date'].dt.normalize()

# Display the first few rows of the data
print("Crude oil Data:")
print(crude_oil_data.head())

# Save the crude oil price data to a new CSV file
oil_data.to_csv('crude_oil_data.csv', index=False)

Crude oil Data:
        Date  Crude_Oil_Price
0 2014-09-17        94.419998
1 2014-09-18        93.070000
2 2014-09-19        92.410004
3 2014-09-22        91.519997
4 2014-09-23        91.559998


## Treasury yield data

In [36]:
# Function to get historical 10-year US Treasury yield data
def get_historical_treasury_yield_data(start_date, end_date):
    tnx = yf.Ticker("^TNX")
    hist = tnx.history(start=start_date, end=end_date)
    return hist

# Fetch historical 10-year US Treasury yield data
tnx_data = get_historical_treasury_yield_data(start_date, end_date)

# Convert the index to Date column
tnx_data.reset_index(inplace=True)

# Select the Date and Close columns
tnx_data = tnx_data[['Date', 'Close']]
tnx_data.rename(columns={'Close': 'Treasury_Yield'}, inplace=True)

# Convert Date column to naive datetime (remove timezone)
tnx_data['Date'] = tnx_data['Date'].dt.tz_convert(None)
tnx_data['Date'] = pd.to_datetime(tnx_data['Date'])
tnx_data['Date'] = tnx_data['Date'].dt.normalize()

# Display the first few rows of the data
print("treasury_yield_data:")
print(treasury_yield_data.head())

# Save the 10-year US Treasury yield data to a new CSV file
tnx_data.to_csv('treasury_yield_data.csv', index=False)

treasury_yield_data:
        Date  Treasury_Yield
0 2014-09-17           2.600
1 2014-09-18           2.629
2 2014-09-19           2.587
3 2014-09-22           2.566
4 2014-09-23           2.535


## Sp500 data

In [38]:
# Function to get historical S&P 500 index data
def get_historical_sp500_data(start_date, end_date):
    sp500 = yf.Ticker("^GSPC")
    hist = sp500.history(start=start_date, end=end_date)
    return hist

# Fetch historical S&P 500 index data
sp500_data = get_historical_sp500_data(start_date, end_date)

# Convert the index to Date column
sp500_data.reset_index(inplace=True)

# Select the Date and Close columns
sp500_data = sp500_data[['Date', 'Close']]
sp500_data.rename(columns={'Close': 'SP500'}, inplace=True)

# Convert Date column to naive datetime (remove timezone)
sp500_data['Date'] = sp500_data['Date'].dt.tz_convert(None)
sp500_data['Date'] = pd.to_datetime(sp500_data['Date'])
sp500_data['Date'] = sp500_data['Date'].dt.normalize()

# Display the first few rows of the data
print("sp500_data:")
print(sp500_data.head())

# Save the S&P 500 index data to a new CSV file
sp500_data.to_csv('sp500_data.csv', index=False)

sp500_data:
        Date        SP500
0 2014-09-17  2001.569946
1 2014-09-18  2011.359985
2 2014-09-19  2010.400024
3 2014-09-22  1994.290039
4 2014-09-23  1982.770020


## Merged data

In [41]:
# Load the data files
btc_data = pd.read_csv('btc_data.csv')
dxy_data = pd.read_csv('dxy_data.csv')
gold_data = pd.read_csv('gold_data.csv')
crude_oil_data = pd.read_csv('crude_oil_data.csv')
sp500_data = pd.read_csv('sp500_data.csv')
treasury_yield_data = pd.read_csv('treasury_yield_data.csv')

# Rename the 'Close' column in dxy_data to something more descriptive
dxy_data.rename(columns={'Close': 'DXY_Close'}, inplace=True)
gold_data.rename(columns={'Close': 'Gold_Close'}, inplace=True)

# Convert Date columns to datetime format if needed
btc_data['Date'] = pd.to_datetime(btc_data['Date'])
dxy_data['Date'] = pd.to_datetime(dxy_data['Date'])
gold_data['Date'] = pd.to_datetime(gold_data['Date'])
crude_oil_data['Date'] = pd.to_datetime(crude_oil_data['Date'])
sp500_data['Date'] = pd.to_datetime(sp500_data['Date'])
treasury_yield_data['Date'] = pd.to_datetime(treasury_yield_data['Date'])


# Merge the data on the Date column
merged_data = pd.merge(btc_data, dxy_data, on='Date', how='inner')
merged_data = pd.merge(merged_data, gold_data, on='Date', how='inner')
merged_data = pd.merge(merged_data, crude_oil_data, on='Date', how='inner')
merged_data = pd.merge(merged_data, sp500_data, on='Date', how='inner')
merged_data = pd.merge(merged_data, treasury_yield_data, on='Date', how='inner')

merged_data['Date'] = pd.to_datetime(merged_data['Date'])


# Display the first few rows of the merged data
print("Merged Data:")
print(merged_data.head())

# Save the merged data to a new CSV file
merged_data.to_csv('merged_data.csv', index=False)

Merged Data:
        Date        Open        High         Low       Close    Volume  \
0 2014-09-17  465.864014  468.174011  452.421997  457.334015  21056800   
1 2014-09-18  456.859985  456.859985  413.104004  424.440002  34483200   
2 2014-09-19  424.102997  427.834991  384.532013  394.795990  37919700   
3 2014-09-22  399.100006  406.915985  397.130005  402.152008  24127600   
4 2014-09-23  402.092010  441.557007  396.196991  435.790985  45099500   

   DXY_Close   Gold_Close  Crude_Oil_Price        SP500  Treasury_Yield  
0  84.699997  1234.400024        94.419998  2001.569946           2.600  
1  84.320000  1225.699951        93.070000  2011.359985           2.629  
2  84.800003  1215.300049        92.410004  2010.400024           2.587  
3  84.669998  1216.800049        91.519997  1994.290039           2.566  
4  84.660004  1221.000000        91.559998  1982.770020           2.535  


  dxy_data['Date'] = pd.to_datetime(dxy_data['Date'])
  gold_data['Date'] = pd.to_datetime(gold_data['Date'])
