In [232]:
import numpy as np
import pandas as pd
import yfinance as yf


In [233]:
def clean(ticker):
    ydf = yf.download(ticker)
    ydf.to_csv(f'y{ticker}.csv')

    btc = pd.read_csv(f'y{ticker}.csv')
    btc = btc.drop([0,1])
    btc['Date'] = pd.to_datetime(btc['Price']).dt.date
    btc= btc.set_index('Date')
    btc = btc.drop(['Price'], axis = 1)
    btc = btc.reset_index()
    btc['Ticker'] = ticker
    return btc



In [234]:
btc = clean('BTC-USD')
eth = clean('ETH-USD')
dc = clean('DOGE-USD')
usdc = clean('USDC-USD')
bnb = clean('BNB-USD')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [235]:
#combine data
data = pd.concat([btc,eth,dc,usdc,bnb])
data['Date'] = pd.to_datetime(data.Date)
data = data.sort_values('Date')
data = data.query("Date >=  '2018-10-08'")

In [236]:
#rename and fix dtype
data = data.rename(columns = {'Date': 'date',
                              'Adj Close': 'adj_prc',
                              'Close': 'prc',
                              'High': 'high',
                              'Low': 'low',
                              'Open': 'open',
                              'Volume': 'vol',
                              "Ticker": 'tick'
                            })

data = data.apply(lambda x: x.astype(float) if x.dtype == 'object' and x.name != 'tick' else x)
data.set_index('date',inplace=True)
data.reset_index(inplace = True)
data.dtypes


date       datetime64[ns]
adj_prc           float64
prc               float64
high              float64
low               float64
open              float64
vol               float64
tick               object
dtype: object

In [237]:
#returns
data['ret'] = (data.adj_prc - data.open)/data.open

In [238]:
#momentum, one month rolling window
data['logret'] = np.log(1 + data['ret'])
data['mom'] = data.groupby('tick')['logret'].rolling(30,30).sum().reset_index(drop=True)
data['mom'] = data.groupby('tick')['mom'].shift(2)
data = data.dropna(subset='mom')

In [239]:
#dispersion
data['disp'] = (data.high - data.low)/data.open

In [240]:
data.to_csv('proj_data.csv')

In [241]:
data

Unnamed: 0,date,adj_prc,prc,high,low,open,vol,tick,ret,logret,mom,disp
35,2018-10-15,6596.540039,6596.540039,6965.060059,6258.680176,6292.640137,7.370770e+09,BTC-USD,0.048294,0.047165,-0.059923,0.112255
40,2018-10-16,10.010900,10.010900,10.326700,9.916390,10.321300,3.071920e+07,BNB-USD,-0.030074,-0.030535,-0.058137,0.039754
41,2018-10-16,1.021430,1.021430,1.035510,1.018990,1.027230,2.124080e+06,USDC-USD,-0.005646,-0.005662,-0.064215,0.016082
42,2018-10-16,0.005017,0.005017,0.005222,0.004966,0.005222,1.179460e+07,DOGE-USD,-0.039257,-0.040048,0.039826,0.049023
43,2018-10-16,6596.109863,6596.109863,6673.589844,6571.370117,6601.410156,4.074800e+09,BTC-USD,-0.000803,-0.000803,-0.055999,0.015485
...,...,...,...,...,...,...,...,...,...,...,...,...
11110,2024-11-07,76010.335938,76010.335938,76850.507812,74494.718750,75634.906250,6.345395e+10,BTC-USD,0.004964,0.004951,0.000137,0.031147
11111,2024-11-07,1.000015,1.000015,1.000164,0.999286,0.999857,1.148730e+10,USDC-USD,0.000158,0.000158,-0.000133,0.000879
11112,2024-11-07,2893.454590,2893.454590,2917.944580,2704.206787,2724.422119,3.532107e+10,ETH-USD,0.062043,0.060195,-0.000112,0.078453
11113,2024-11-07,0.193678,0.193678,0.201521,0.186615,0.196642,4.364935e+09,DOGE-USD,-0.015069,-0.015184,0.000054,0.075803
