In [1]:
import pandas as pd
import numpy as np
import os

## Technical indicators for price data

In [2]:
folder_src = "../../Data/Casper/Processed/"
folder_dest = "../../Data/Casper/Final/"

df_org = pd.read_csv(folder_src + "Market.csv")
df_org.rename({"current_price":"close"}, inplace=True, axis=1)

df = df_org[["DateTime", "close"]].copy()
df_org.head()

Unnamed: 0,DateTime,close,market_cap,total_volume,BTC_price,BTC_market_caps,BTC_total_volumes,ETH_price,ETH_market_caps,ETH_total_volumes,LTC_price,LTC_market_caps,LTC_total_volumes,XRP_price,XRP_market_caps,XRP_total_volumes
0,2021-06-01,0.307023,0.0,17685480.0,37340.679266,699138600000.0,37950800000.0,2708.429866,314463200000.0,44445830000.0,187.466185,12513820000.0,4865806000.0,1.041413,48054550000.0,6691380000.0
1,2021-06-02,0.294818,0.0,18838090.0,36680.068202,688726400000.0,33536910000.0,2632.6566,306488800000.0,39898650000.0,183.14969,12225680000.0,4336082000.0,1.011334,46674110000.0,6148631000.0
2,2021-06-03,0.322123,0.0,55705880.0,37685.717982,704531300000.0,31240690000.0,2717.154037,315313300000.0,39148860000.0,188.047863,12546750000.0,4236639000.0,1.025065,47256850000.0,3994210000.0
3,2021-06-04,0.319187,0.0,23905190.0,39151.316184,733150700000.0,33396720000.0,2858.276702,333964600000.0,38104660000.0,194.229586,12965290000.0,4414448000.0,1.05072,48491810000.0,4088715000.0
4,2021-06-05,0.297641,175535400.0,18713410.0,36938.720311,691747500000.0,39345490000.0,2694.497667,312991100000.0,39996580000.0,179.20742,11962530000.0,4377618000.0,0.970685,44809100000.0,4190246000.0


### Rate of change (ROC)

Calculated for lookback periods of 3, 7 and 30 days

In [3]:
df['ROC_3'] = (df.close / df.close.shift(3) - 1) * 100
df['ROC_7'] = (df.close / df.close.shift(7) - 1) * 100
df['ROC_30'] = (df.close / df.close.shift(30) - 1) * 100


### Moving averages

Here 2 types of moving averages are chosen: simple moving average and exponential moving average. For both MAs there are 3 sizes of the sliding windows: 3, 7 and 30 days 

In [4]:
df['SMA_3'] = df.close.rolling(3).mean()
df['SMA_7'] = df.close.rolling(7).mean()
df['SMA_30'] = df.close.rolling(30).mean()


df['EMA_3'] = df.close.ewm(span=3).mean()
df['EMA_7'] = df.close.ewm(span=7).mean()
df['EMA_30'] = df.close.ewm(span=30).mean()

### Moving Average Convergence Divergence (MACD)

Comprised of the MACD line, the signal line and the histogram

In [5]:
df["MACD"] = df.EMA_7 - df.EMA_30

signal_period = 9
df["MACD_signal"] = df.MACD.ewm(span=signal_period, adjust=False).mean()
df["MACD_hist"] = df.MACD - df.MACD_signal


### Relative strength index (RSI)

Relative strength index is calculated for look back periods of 7 and 14 days.

In [6]:
def rsi(df, look_back):
    dfc = df.copy()
    dfc["priceDiff"] = dfc.close.diff()

    dfc['Gain'] = dfc.priceDiff.where(dfc.priceDiff > 0, 0)
    dfc['Loss'] = -dfc.priceDiff.where(dfc.priceDiff < 0, 0)

    avg_gain = dfc['Gain'].rolling(window=look_back).mean()
    avg_loss = dfc['Loss'].rolling(window=look_back).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

In [7]:
df['RSI_7'] = rsi(df, 7)
df['RSI_14'] = rsi(df, 14)

### Rolling standard deviation

Calculated for rolling windows of 3, 7 and 30 days.

In [8]:
df['STD_3'] = df.close.rolling(3).std()
df['STD_7'] = df.close.rolling(7).std()
df['STD_30'] = df.close.rolling(30).std()

In [9]:
df = df.drop("close", axis= 1)
df

Unnamed: 0,DateTime,ROC_3,ROC_7,ROC_30,SMA_3,SMA_7,SMA_30,EMA_3,EMA_7,EMA_30,MACD,MACD_signal,MACD_hist,RSI_7,RSI_14,STD_3,STD_7,STD_30
0,2021-06-01,,,,,,,0.307023,0.307023,0.307023,0.000000,0.000000,0.000000,,,,,
1,2021-06-02,,,,,,,0.298886,0.300049,0.300717,-0.000668,-0.000134,-0.000535,,,,,
2,2021-06-03,,,,0.307988,,,0.312165,0.309594,0.308333,0.001261,0.000145,0.001116,,,0.013678,,
3,2021-06-04,3.961963,,,0.312043,,,0.315910,0.313103,0.311324,0.001779,0.000472,0.001307,,,0.014990,,
4,2021-06-05,0.957594,,,0.312984,,,0.306481,0.308035,0.308211,-0.000176,0.000342,-0.000519,,,0.013368,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
726,2023-05-28,8.320010,4.299057,-6.660722,0.050159,0.049385,0.052672,0.050583,0.049772,0.050360,-0.000588,-0.001188,0.000600,60.931380,55.274298,0.001671,0.001413,0.004872
727,2023-05-29,8.249716,6.596239,-4.344533,0.051492,0.049849,0.052593,0.051520,0.050443,0.050496,-0.000052,-0.000961,0.000909,66.277084,60.691839,0.001150,0.001820,0.004854
728,2023-05-30,-0.759752,3.595533,-14.384181,0.051364,0.050096,0.052313,0.050679,0.050292,0.050453,-0.000161,-0.000801,0.000640,57.528829,52.972098,0.001362,0.001654,0.004760
729,2023-05-31,-6.706123,-3.505160,-23.402136,0.050207,0.049845,0.051821,0.049502,0.049800,0.050316,-0.000516,-0.000744,0.000228,42.042021,49.692090,0.002090,0.001785,0.004353


In [10]:
df = df.loc[30:, ]
df.isnull().sum()

DateTime       0
ROC_3          0
ROC_7          0
ROC_30         0
SMA_3          0
SMA_7          0
SMA_30         0
EMA_3          0
EMA_7          0
EMA_30         0
MACD           0
MACD_signal    0
MACD_hist      0
RSI_7          0
RSI_14         0
STD_3          0
STD_7          0
STD_30         0
dtype: int64

## Level the other data to same time interval

In [11]:
# Determine starting and end intervals and save TI data
START_DATE = min(df.DateTime)
END_DATE = max(df.DateTime)
print(START_DATE, END_DATE)

df.drop("DateTime", axis = 1, inplace=True)
df.to_csv(folder_dest + "TI.csv", index=False)

2021-07-01 2023-06-01


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop("DateTime", axis = 1, inplace=True)


In [12]:
# Crop all other data sets so that they are on the same interval
for file in os.listdir(folder_src):
    df_temp = pd.read_csv(folder_src + file)
    df_temp = df_temp[(df_temp.DateTime >= START_DATE) & (df_temp.DateTime <= END_DATE)]
    df_temp.drop("DateTime", axis = 1, inplace=True)
    df_temp.to_csv(folder_dest + file, index=False)

In [13]:
for file in os.listdir(folder_dest):
    df_temp = pd.read_csv(folder_dest + file)
    print(file, df_temp.shape)

TI.csv (701, 17)
Accounts.csv (701, 5)
Staking.csv (701, 4)
Market.csv (701, 15)
Transactions.csv (701, 7)
