### Data merging and cleaning
note: because stock market is closed on weekends prices of assets like gold etc. are taken from Fridays. 

In [2]:
import pandas as pd
import numpy as np
from functools import reduce
import matplotlib.pyplot as plt

__Bitcoin data__

In [3]:
btc=pd.read_csv('Coinbase_BTCUSD_d.csv',header=1)
btc['Date']=pd.to_datetime(btc['Date'],format='%Y-%m-%d')
btc.set_index('Date',drop=True,inplace=True)
btc.drop(columns=['Symbol','Open','High','Low','Volume USD'],inplace=True)
btc.rename(columns={'Close':'BTC price [USD]'},inplace=True)
btc=btc.iloc[::-1,:]

__Gold data__

In [4]:
gold=pd.read_csv('Gold Futures Historical Data20102020maj.csv')
gold['Date']=pd.to_datetime(gold['Date'],format='%b %d, %Y')
gold.set_index('Date',drop=True,inplace=True)
gold.drop(columns=gold.iloc[:,1:],inplace=True)
gold.rename(columns={'Price':'Gold price[USD]'},inplace=True)
gold=gold.iloc[::-1,:]

gold['Gold price[USD]']=gold['Gold price[USD]'].str.replace(",","").astype(float)

__Oil WTI data__

In [5]:
oil=pd.read_csv('wti-daily_csv.csv')
oil['Date']=pd.to_datetime(oil['Date'],format='%Y-%m-%d')
oil.set_index('Date',drop=True,inplace=True)
oil.rename(columns={'Price':'Oil WTI price[USD]'},inplace=True)

__M2 money supply in the USA data__

In [6]:
M2=pd.read_csv('FRB_H6.csv')
M2.drop(index=[0,1,2,3,4],columns=M2.iloc[:,1:4],inplace=True)
may20 = {'Series Description':'2020-05', 'M2; Not seasonally adjusted':0}
M2 = M2.append(may20, ignore_index=True)
M2['Series Description']=pd.to_datetime(M2['Series Description'],format='%Y-%m')
M2.rename(columns={'Series Description':'Date','M2; Not seasonally adjusted':'M2(Not seasonally adjusted)[1e+09 USD]'},inplace=True)
M2.set_index('Date',drop=True,inplace=True)
M2=M2.resample('D').ffill()
M2.drop(index=[M2.index[-1]],inplace=True)
M2['M2(Not seasonally adjusted)[1e+09 USD]']=M2['M2(Not seasonally adjusted)[1e+09 USD]'].astype(float)

__SP500 index data__

In [7]:
SP500=pd.read_csv('GSPC.csv')
SP500['Date']=pd.to_datetime(SP500['Date'],format='%Y-%m-%d')
SP500.set_index('Date',drop=True,inplace=True)
SP500.drop(columns=SP500.iloc[:,:3],inplace=True)
SP500.drop(columns=SP500.iloc[:,1:],inplace=True)
SP500.rename(columns={'Close':'SP500 close index'},inplace=True)

__Number of bitcoin transactions data__

In [8]:
BTC_tran=pd.read_csv('n-transactions.csv')
BTC_tran['Timestamp']=pd.to_datetime(BTC_tran['Timestamp'],format="%Y-%m-%d")
BTC_tran.rename(columns={'Timestamp':'Date','n-transactions':'BTC n-transactions'},inplace=True)
BTC_tran.set_index('Date',drop=True,inplace=True)
BTC_tran=BTC_tran.iloc[::-1,:]
BTC_tran=BTC_tran.resample('D').ffill()

__Bitcoin Google search trends__

In [9]:
search_interest=pd.read_csv("multiTimeline.csv",skiprows=1)
search_interest['Month']=pd.to_datetime(search_interest['Month'],format='%Y-%m-%d')
search_interest.set_index('Month',drop=True,inplace=True)
search_interest.index.name='Date'
search_interest.rename(columns={'bitcoin: (Worldwide)':'btc search interest'},inplace=True)
search_interest=search_interest.resample('D').ffill()

__Merging datasets__

In [10]:
def merg(df1,df2,how='left',on='Date'):
    df_res=pd.merge(df1,df2,how=how,on=on)
    return df_res

dfs=[btc,gold,oil,SP500,BTC_tran,M2,search_interest]
BTC_reg=reduce(merg, dfs)

#assigning the prices of assets on weekends
BTC_reg.fillna(method='ffill',inplace=True)


In [11]:
BTC_reg=BTC_reg.round(decimals=2)
#two datasets are saved: the project is still in progress the old dataset is still necessary

if len(dfs)==7:
    BTC_reg.to_csv('BTC_regression_data3.csv',index=True)
else:
    BTC_reg.to_csv('BTC_regression_data.csv',index=True)
BTC_reg.head()

Unnamed: 0_level_0,BTC price [USD],Volume BTC,Gold price[USD],Oil WTI price[USD],SP500 close index,BTC n-transactions,M2(Not seasonally adjusted)[1e+09 USD],btc search interest
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014-12-01,370.0,0.06,1332.5,68.98,2053.44,79098.0,11747.9,3
2014-12-02,378.0,15.01,1314.7,66.99,2066.55,99419.0,11747.9,3
2014-12-03,378.0,0.55,1325.8,67.3,2074.33,99419.0,11747.9,3
2014-12-04,377.1,0.01,1324.2,66.73,2071.92,99419.0,11747.9,3
2014-12-05,377.1,0.0,1310.8,65.89,2075.37,90900.0,11747.9,3
