In [3]:
import yfinance as yf
import numpy as np
import pandas as pd
from datetime import datetime


# Create dependent variables


### 1. Create list of tickers

In [4]:
tickers = ["BTC-USD", "SPY", "GLD", "DX-Y.NYB"]


###  2. Create data frame to consist of weekly return for each ticker


[Retrieve financial data from yahoo finance](https://pypi.org/project/yfinance/)

[Deal with multi-level columns](https://stackoverflow.com/questions/63107594/how-to-deal-with-multi-level-column-names-downloaded-with-yfinance/63107801#63107801)

[Resample data using pandas](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.resample.html)

[Function pct_change()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.pct_change.html)

In [5]:
# Retrieve daily data from yahoo finance from March 7, 2017 to September 25, 2022 (5 years)
# Resample into weekly data bases on average of that week
# Calculate percent change from last week and multiply by 100 to get (%) unit 
data = yf.download(tickers, 
                   start='2017-7-3', 
                   end='2022-9-25').resample('W').mean().pct_change(periods=1)*100 

# Take only the closing price
data = data['Close']   

#print(data)



[*********************100%***********************]  4 of 4 completed


## 3. Calculate rolling 12 weeks covariance between tickers

[Accessing the index in 'for' loops](https://stackoverflow.com/questions/522563/accessing-the-index-in-for-loops)

[Calculate rolling covariance: pandas.DataFrame.rolling](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html)

[Format string literal](https://stackoverflow.com/questions/57150426/what-is-printf)

[Selecting multiple columns in a Pandas dataframe](https://stackoverflow.com/questions/11285613/selecting-multiple-columns-in-a-pandas-dataframe/11287278#11287278)

In [6]:
for i, ticker1 in enumerate(tickers[:-1]):      # function enumerate() iterates through both index and value 
                                                # of the element in the list
    for ticker2 in tickers[i+1:]:       # Loop through the list starting from the next ticker to ticker 1
        data[f"{ticker1}-{ticker2}"] = data[ticker1].rolling(12).cov(data[ticker2])
 #name the column using formatted string literal 
                                       # Calculate rolling covariance between two tickers using 12-week window
                                       # Rolling function uses up to 11 weeks prior to calculate
        data = data.iloc[ : , :7]      # Update data frame to include only first 7 columns

#print(data)
        
        


# Create independent variable

2 steps:
- Step 1: Collect data from web or csv files
- Step 2: Combine data frames

[Import csv columns into data frame](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html)

[Combine 2 data frame using concatnate](https://pandas.pydata.org/docs/reference/api/pandas.concat.html)


### 1. Collect CBOE VIX data from yahoo finance

In [7]:
# Download data from yahoo finance
data2 = yf.download('^VIX',
                     start='2017-7-3', 
                     end='2022-9-25').resample('W').mean()
data2 = data2['Close']

#print(data4.head(15))

[*********************100%***********************]  1 of 1 completed


In [10]:
# Combine VIX into existing data frame
data3 = pd.concat([data, data2], axis=1)

data3.index = data3.index.date              # Extract the date part from datetime index
data3.index = pd.to_datetime(data3.index)   # Convert the date index back into datetime format
data3 = data3.groupby(data3.index).first()  # Group by date and only take the first row
 
#print(data3)

              BTC-USD  DX-Y.NYB       GLD       SPY  BTC-USD-SPY  BTC-USD-GLD  \
2017-07-09        NaN       NaN       NaN       NaN          NaN          NaN   
2017-07-16 -13.097695 -0.412155 -0.127486  0.744076          NaN          NaN   
2017-07-23  14.188202 -1.174701  1.944076  1.116904          NaN          NaN   
2017-07-30   5.483454 -0.841793  1.287690  0.293791          NaN          NaN   
2017-08-06   8.746409 -0.799882  0.576355  0.009713          NaN          NaN   
...               ...       ...       ...       ...          ...          ...   
2022-08-28  -7.780902  1.559962 -1.088650 -3.327494    27.900086     2.215694   
2022-09-04  -3.964038  0.351345 -1.770849 -3.851876    28.321611     2.376680   
2022-09-11   1.457133  0.535260 -0.304528  0.489753     7.141350     0.772949   
2022-09-18  -0.121056 -0.207856 -1.057030 -1.005316     7.260000     0.818234   
2022-09-25  -5.772964  1.430632 -1.572493 -4.160863     9.666308     1.208858   

            BTC-USD-DX-Y.NY

### 2. Collect Bitcoin google trends and GARCH(1,1) volatility from csv files



In [11]:
# Import specific columns from csv file
df_csv = pd.read_csv('/Users/quanghuy/Documents/ECO 590 - R & Python/Nguyen_Huy_Data.csv', on_bad_lines= 'skip', 
                     delimiter = ';', usecols = ['time','gg_trend_wrld','btc_garch'], index_col = 'time')

# Reformat the date column to match the data frame in python
df_csv.index = pd.to_datetime(df_csv.index, format='%d/%m/%Y', utc = True).strftime('%Y-%m-%d')
        
# Convert the date column to datetime format
df_csv.index = pd.to_datetime(df_csv.index)

#print(df_csv.head(15))



In [13]:
# Combine data frames
data4 = pd.concat([data3, df_csv], axis=1)

#print(data4.tail(15))

              BTC-USD  DX-Y.NYB       GLD       SPY  BTC-USD-SPY  BTC-USD-GLD  \
2022-06-19 -28.672252  1.830297 -1.281946 -8.345362    27.646276     2.914254   
2022-06-26  -1.047279 -0.448395 -0.043074  1.957455    25.492382     3.605043   
2022-07-03  -5.084540  0.316250 -0.829264  0.583551    25.652939     3.579654   
2022-07-10   5.823095  1.822731 -3.726103  1.075302    27.668607     1.224225   
2022-07-17  -2.662651  1.448594 -1.482914 -1.122972    27.293117     1.004926   
2022-07-24  12.022927 -1.087576 -0.563451  2.933511    33.076292     1.701378   
2022-07-31   0.070299 -0.448793  1.504363  2.174878    33.850103     2.366860   
2022-08-07   0.638456 -0.349381  2.014376  2.760326    27.430155     0.713266   
2022-08-14   4.236408 -0.335527  1.076284  1.479920    28.012077     1.396735   
2022-08-21  -5.879156  1.236928 -1.528361  2.095229    27.414293     1.960803   
2022-08-28  -7.780902  1.559962 -1.088650 -3.327494    27.900086     2.215694   
2022-09-04  -3.964038  0.351

### 3. Create time dummy variable for covid period

In [28]:
data4['covid'] = 0

start = '2020-01-01'
end = '2020-08-31'

requirement = (data4.index >= start) & (data4.index <= end)
data4.loc[requirement,'covid'] = 1

#data4[requirement]


Unnamed: 0,BTC-USD,DX-Y.NYB,GLD,SPY,BTC-USD-SPY,BTC-USD-GLD,BTC-USD-DX-Y.NYB,Close,gg_trend_wrld,btc_garch,covid
2020-01-05,-0.749502,-0.769588,1.662748,0.152918,0.292432,1.600733,0.044458,13.7725,8,4718108452,1
2020-01-12,10.718787,0.462233,2.222687,0.645165,0.423232,3.398179,0.461262,13.238,10,5443610198,1
2020-01-19,8.514034,0.230567,-0.574025,1.434223,0.951162,2.713043,0.755357,12.31,11,5557660575,1
2020-01-26,-1.931549,0.281382,0.663185,0.452633,0.488666,2.56526,0.722874,13.325,9,4632184177,1
2020-02-02,8.815808,0.200716,0.968015,-1.662268,-1.263132,2.812867,0.936796,17.046,10,4972852125,1
2020-02-09,3.702942,0.404718,-0.850714,1.586305,-0.921944,1.456173,1.172343,15.92,11,4958661689,1
2020-02-16,4.653879,0.728814,0.564767,1.803635,-0.692038,1.226946,1.015538,14.358,12,4950585484,1
2020-02-23,-3.382916,0.595706,2.904151,-0.008619,-0.161904,-0.651476,0.886938,15.4625,11,5510225174,1
2020-03-01,-8.656893,-0.785053,0.456209,-8.415311,8.059391,-0.275398,1.594175,31.942,11,5628900339,1
2020-03-08,-1.28018,-1.865016,0.855382,-1.18351,8.820636,-0.639645,1.976614,36.757999,10,5745644844,1


# Export data frame into csv file

[Rename column names](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html#pandas.DataFrame.rename)

[Python to csv file](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html)



In [35]:
# Rename columns
data4 = data4.rename(columns ={'BTC-USD': 'r_btc(%)',
                           'DX-Y.NYB': 'r_usd(%)', 
                           'GLD': 'r_gld(%)',
                           'SPY': 'r_spy(%)',
                           'BTC-USD-SPY': 'cov_bspy',
                           'BTC-USD-GLD': 'cov_bgld',
                           'BTC-USD-DX-Y.NYB': 'cov_busd',
                           'Close': 'VIX'})

print(data4.describe())

data4


         r_btc(%)    r_usd(%)    r_gld(%)    r_spy(%)    cov_bspy    cov_bgld  \
count  272.000000  272.000000  272.000000  272.000000  261.000000  261.000000   
mean     1.183338    0.055570    0.118943    0.187165    6.193432    1.793014   
std      9.511635    0.694634    1.574905    2.098647   12.209527    5.949698   
min    -28.672252   -1.865016   -7.030331  -11.257790   -7.962287  -13.810481   
25%     -4.666970   -0.412407   -0.689357   -0.496998   -0.080055   -1.140337   
50%      0.502583    0.071516    0.199807    0.483088    2.328346    1.226946   
75%      5.937269    0.475241    0.853988    1.359456    7.141350    3.838309   
max     40.335599    4.208329    7.448337    6.890572   57.548421   23.094400   

         cov_busd         VIX       covid  
count  261.000000  273.000000  273.000000  
mean    -0.886112   20.064511    0.128205  
std      1.995727    8.515929    0.334932  
min     -6.134117    9.340000    0.000000  
25%     -1.996622   13.766000    0.000000  
50%   

Unnamed: 0,r_btc(%),r_usd(%),r_gld(%),r_spy(%),cov_bspy,cov_bgld,cov_busd,VIX,gg_trend_wrld,btc_garch,covid
2017-07-09,,,,,,,,11.5050,.,5362767006,0
2017-07-16,-13.097695,-0.412155,-0.127486,0.744076,,,,10.3420,.,7376204382,0
2017-07-23,14.188202,-1.174701,1.944076,1.116904,,,,9.6880,.,1286168412,0
2017-07-30,5.483454,-0.841793,1.287690,0.293791,,,,9.7720,.,9760370461,0
2017-08-06,8.746409,-0.799882,0.576355,0.009713,,,,10.2200,.,8227699884,0
...,...,...,...,...,...,...,...,...,...,...,...
2022-08-28,-7.780902,1.559962,-1.088650,-3.327494,27.900086,2.215694,-6.080199,23.6140,17,5624453807,0
2022-09-04,-3.964038,0.351345,-1.770849,-3.851876,28.321611,2.376680,-6.049444,25.8640,18,4531278637,0
2022-09-11,1.457133,0.535260,-0.304528,0.489753,7.141350,0.772949,-2.574915,24.4875,17,5557379792,0
2022-09-18,-0.121056,-0.207856,-1.057030,-1.005316,7.260000,0.818234,-2.645339,25.9740,17,733579912,0


In [36]:

# Export to csv file
data4.to_csv('Nguyen_Huy_Dataa.csv', index=True, index_label = 'Date')  
