In [1]:
import pandas as pd
import numpy as np
import talib

In [2]:
filename = ['ETH-USD.csv',
            'BTC-USD.csv',
            'GME.csv',
            'AMC.csv']

eth = pd.read_csv(f'../../data/csv/{filename[0]}')
btc = pd.read_csv(f'../../data/csv/{filename[1]}')
gme = pd.read_csv(f'../../data/csv/{filename[2]}')
amc = pd.read_csv(f'../../data/csv/{filename[3]}')

In [3]:
def get_indicators(df):

    mom = talib.MOM(df.Close, timeperiod=10).to_numpy().reshape(len(df),1)
    rsi = talib.RSI(df.Close, timeperiod=14).to_numpy().reshape(len(df),1)
    natr = talib.NATR(df.High, df.Low, df.Close, timeperiod=14).to_numpy().reshape(len(df),1)
    cci = talib.CCI(df.High, df.Low, df.Close, timeperiod=14).to_numpy().reshape(len(df),1)
    obv = talib.OBV(df.Close, df.Volume).to_numpy().reshape(len(df),1)
    adx = talib.ADX(df.High, df.Low, df.Close, timeperiod=14).to_numpy().reshape(len(df),1)
    aroondown, aroonup = talib.AROON(df.High, df.Low, timeperiod=14)
    aroondown = aroondown.to_numpy().reshape(len(df),1)
    aroonup = aroonup.to_numpy().reshape(len(df),1)

    return np.concatenate([mom, rsi, natr, cci, obv, adx], axis=1)


In [4]:
indicators = ['mom','rsi','natr','cci','obv','adx']
eth.loc[:,indicators] = get_indicators(eth)
btc.loc[:,indicators] = get_indicators(btc)
gme.loc[:,indicators] = get_indicators(gme)
amc.loc[:,indicators] = get_indicators(amc)

In [6]:
df = eth.copy()
df['stock'] = [filename[0][:3]]*len(df)
df['buy'] = (df.Close > df.Open).astype(int)

for i, df_tmp in enumerate([btc, gme, amc]):
    df_tmp['stock'] = [filename[i+1][:3]]*len(df_tmp)
    df_tmp['buy'] = (df_tmp.Close > df_tmp.Open).astype(int)
    df = pd.concat([df, df_tmp])

In [7]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,mom,rsi,natr,cci,obv,adx,stock,buy
0,2021-01-01,737.708374,749.201843,719.792236,730.367554,730.367554,13652004358,,,,,1.365200e+10,,ETH,0
1,2021-01-02,730.402649,786.798462,718.109497,774.534973,774.534973,19740771179,,,,,3.339278e+10,,ETH,1
2,2021-01-03,774.511841,1006.565002,771.561646,975.507690,975.507690,45200463368,,,,,7.859324e+10,,ETH,1
3,2021-01-04,977.058838,1153.189209,912.305359,1040.233032,1040.233032,56945985763,,,,,1.355392e+11,,ETH,1
4,2021-01-05,1041.498779,1129.371460,986.811279,1100.006104,1100.006104,41535932781,,,,,1.770752e+11,,ETH,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
414,2022-08-25,9.780000,9.940000,9.310000,9.570000,9.570000,31596300,-15.889999,30.015541,24.252544,-119.251729,5.203619e+09,32.815069,AMC,0
415,2022-08-26,9.580000,9.670000,8.960000,9.170000,9.170000,36395500,-15.270001,29.369141,24.055608,-100.805922,5.167224e+09,32.277447,AMC,0
416,2022-08-29,9.040000,9.610000,8.910000,9.470000,9.470000,39602600,-14.739999,30.576695,22.157709,-83.111073,5.206826e+09,31.792303,AMC,1
417,2022-08-30,9.590000,9.640000,9.030000,9.270000,9.270000,25167500,-15.539999,30.205931,21.488947,-71.260182,5.181659e+09,31.326561,AMC,0


In [8]:
df.groupby('stock', as_index=False)[['Date']].agg(['min','max'])

Unnamed: 0_level_0,Date,Date
Unnamed: 0_level_1,min,max
stock,Unnamed: 1_level_2,Unnamed: 2_level_2
AMC,2021-01-04,2022-09-01
BTC,2021-01-01,2022-09-01
ETH,2021-01-01,2022-09-01
GME,2021-01-04,2022-09-01


In [12]:
# merge on full dates
date = df[['Date']].drop_duplicates()
date['stock'] = [filename[0][:3]]* date.shape[0]

for i in range(1, len(filename)):
    date_tmp = df[['Date']].drop_duplicates()
    date_tmp['stock'] = [filename[i][:3]] * date_tmp.shape[0]
    date = pd.concat([date, date_tmp])

df_clean = pd.merge(date, df, on=['Date','stock'], how='left', copy=False)
df_clean = df_clean.sort_values(['stock','Date']).fillna(method='bfill')

In [13]:
df_clean

Unnamed: 0,Date,stock,Open,High,Low,Close,Adj Close,Volume,mom,rsi,natr,cci,obv,adx,buy
1827,2021-01-01,AMC,2.200000,2.200000,2.000000,2.010000,2.010000,29873800.0,1.050000,91.986063,9.276018,167.633382,2.987380e+07,37.330776,0.0
1828,2021-01-02,AMC,2.200000,2.200000,2.000000,2.010000,2.010000,29873800.0,1.050000,91.986063,9.276018,167.633382,2.987380e+07,37.330776,0.0
1829,2021-01-03,AMC,2.200000,2.200000,2.000000,2.010000,2.010000,29873800.0,1.050000,91.986063,9.276018,167.633382,2.987380e+07,37.330776,0.0
1830,2021-01-04,AMC,2.200000,2.200000,2.000000,2.010000,2.010000,29873800.0,1.050000,91.986063,9.276018,167.633382,2.987380e+07,37.330776,0.0
1831,2021-01-05,AMC,1.990000,2.030000,1.910000,1.980000,1.980000,28148300.0,1.050000,91.986063,9.276018,167.633382,1.725500e+06,37.330776,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1822,2022-08-28,GME,30.480000,32.750000,30.379999,31.549999,31.549999,4292700.0,-8.130001,38.182626,8.812576,-100.062650,5.019206e+09,20.595231,1.0
1823,2022-08-29,GME,30.480000,32.750000,30.379999,31.549999,31.549999,4292700.0,-8.130001,38.182626,8.812576,-100.062650,5.019206e+09,20.595231,1.0
1824,2022-08-30,GME,31.620001,31.870001,29.420000,29.840000,29.840000,5060200.0,-12.349999,34.796587,9.238506,-105.514969,5.014146e+09,19.891073,0.0
1825,2022-08-31,GME,28.000000,28.910000,26.950001,27.629999,27.629999,5293900.0,-12.890001,30.973629,10.011895,-129.729106,5.008852e+09,20.054094,0.0


In [14]:
df_clean.to_csv('../../data/csv/meme_stock_indicators.csv',index=False)