In [244]:
import numpy as np
import pandas as pd
import yfinance as yf


In [245]:
def clean(ticker):
    ydf = yf.download(ticker)
    ydf.to_csv(f'y{ticker}.csv')

    btc = pd.read_csv(f'y{ticker}.csv')
    btc = btc.drop([0,1])
    btc['Date'] = pd.to_datetime(btc['Price']).dt.date
    btc= btc.set_index('Date')
    btc = btc.drop(['Price'], axis = 1)
    btc = btc.reset_index()
    btc['Ticker'] = ticker
    return btc



In [246]:
btc = clean('BTC-USD')
eth = clean('ETH-USD')
dc = clean('DOGE-USD')
usdc = clean('USDC-USD')
bnb = clean('BNB-USD')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [247]:
#combine data
data = pd.concat([btc,eth,dc,usdc,bnb])
data['Date'] = pd.to_datetime(data.Date)
data = data.sort_values('Date')
data = data.query("Date >=  '2018-10-08'")

In [248]:
#rename and fix dtype
data = data.rename(columns = {'Date': 'date',
                              'Adj Close': 'adj_prc',
                              'Close': 'prc',
                              'High': 'high',
                              'Low': 'low',
                              'Open': 'open',
                              'Volume': 'vol',
                              "Ticker": 'tick'
                            })

data = data.apply(lambda x: x.astype(float) if x.dtype == 'object' and x.name != 'tick' else x)
data.set_index('date',inplace=True)
data.reset_index(inplace = True)
data.dtypes


date       datetime64[ns]
adj_prc           float64
prc               float64
high              float64
low               float64
open              float64
vol               float64
tick               object
dtype: object

In [249]:
#returns
data['ret'] = (data.adj_prc - data.open)/data.open

In [250]:
#momentum, one month rolling window
data['logret'] = np.log(1 + data['ret'])
data['mom'] = data.groupby('tick')['logret'].rolling(30,30).sum().reset_index(drop=True)
data['mom'] = data.groupby('tick')['mom'].shift(2)
data = data.dropna(subset='mom')

In [251]:
#dispersion
data['disp'] = (data.high - data.low)/data.open

In [259]:
#lag returns
data['lagret'] = data.groupby('tick')['ret'].shift(1)
data = data.dropna(subset='lagret')

In [260]:
data.to_csv('proj_data.csv')

In [261]:
data

Unnamed: 0,date,adj_prc,prc,high,low,open,vol,tick,ret,logret,mom,disp,lagret
43,2018-10-16,6596.109863,6596.109863,6673.589844,6571.370117,6601.410156,4.074800e+09,BTC-USD,-0.000803,-0.000803,-0.055999,0.015485,0.048294
45,2018-10-17,0.004599,0.004599,0.005025,0.004493,0.005010,2.414460e+07,DOGE-USD,-0.082036,-0.085597,-0.222149,0.106188,-0.039257
46,2018-10-17,9.904470,9.904470,10.028100,9.818820,10.025600,3.532540e+07,BNB-USD,-0.012082,-0.012156,-0.023371,0.020875,-0.030074
47,2018-10-17,207.082993,207.082993,211.125000,205.927994,210.216995,1.444130e+09,ETH-USD,-0.014908,-0.015021,-0.197132,0.024722,0.002347
48,2018-10-17,1.012190,1.012190,1.026530,1.010580,1.024040,1.102740e+06,USDC-USD,-0.011572,-0.011639,-0.199982,0.015576,-0.005646
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11110,2024-11-07,0.193190,0.193190,0.201521,0.186615,0.196642,4.338343e+09,DOGE-USD,-0.017553,-0.017709,0.000054,0.075803,0.155534
11111,2024-11-08,76048.757812,76048.757812,76850.507812,74494.718750,75634.906250,6.345985e+10,BTC-USD,0.005472,0.005457,0.000137,0.031147,0.090552
11112,2024-11-08,0.999986,0.999986,1.000164,0.999286,0.999857,1.146853e+10,USDC-USD,0.000129,0.000129,-0.000133,0.000879,-0.000062
11113,2024-11-08,2907.714355,2907.714355,2917.944580,2704.206787,2724.422119,3.533010e+10,ETH-USD,0.067277,0.065111,-0.000112,0.078453,0.124510
