# Silver Datasets

## Importando as bibliotecas

In [None]:
import os
import numpy             as np
import pandas            as pd
import matplotlib.pyplot as plt
import seaborn           as sns

from datetime import datetime

## Definição de caminhos e diretórios

In [None]:
raw_path    = "data/raw"
bronze_path = "data/bronze"
silver_path = "data/silver"

## Helper Functions

## Criando os datasets em arquivos .csv

In [None]:
# Grava dataframe em arquivo .csv
def df_to_csv( df, path, mode ):
    with open( path, mode ) as csv_file:
        df.to_csv( csv_file, index = False )

## Calculando os indicadores

### SMA (Simple Moving Average)

In [None]:
# Simple Moving Average 
def SMA(data, ndays, _name): 
    SMA = pd.Series(data['Close'].rolling(ndays).mean(), name = _name) 
    data = data.join(SMA) 
    return data

### EWMA (Exponentially-weighted Moving Average)

In [None]:
# Calculando a EWMA
def EWMA(data, ndays,_name): 
    EMA = pd.Series(data['Close'].ewm(span = ndays, min_periods = ndays - 1).mean(), name = _name) 
    data = data.join(EMA) 
    return data

### Bollinger Bands

In [None]:
# Calculando as bandas
def BBANDS(data, window):
    MA = data.Close.rolling(window).mean()
    SD = data.Close.rolling(window).std()
    data['UpperBand'] = MA + (2 * SD) 
    data['LowerBand'] = MA - (2 * SD)
    return data

### RSI (Relative Strength Index)

In [None]:
# Calculando o RSI
def rsi(close, periods = 14):
    
    close_delta = close.diff()

    # Make two series: one for lower closes and one for higher closes
    up = close_delta.clip(lower=0)
    down = -1 * close_delta.clip(upper=0)
    
    ma_up = up.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
    ma_down = down.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()

    rsi = ma_up / ma_down
    rsi = 100 - (100/(1 + rsi))
    return rsi

### MFI (Money Flow Index)

In [None]:
def gain(x):
    return ((x > 0) * x).sum()


def loss(x):
    return ((x < 0) * x).sum()


# Calculate money flow index
def mfi(high, low, close, volume, n=14):
    typical_price = (high + low + close)/3
    money_flow = typical_price * volume
    mf_sign = np.where(typical_price > typical_price.shift(1), 1, -1)
    signed_mf = money_flow * mf_sign
    mf_avg_gain = signed_mf.rolling(n).apply(gain, raw=True)
    mf_avg_loss = signed_mf.rolling(n).apply(loss, raw=True)
    return (100 - (100 / (1 + (mf_avg_gain / abs(mf_avg_loss))))).to_numpy()


### ATR (Average True Range)

In [None]:
def atr(high, low, close, n=14):
    tr = np.amax(np.vstack(((high - low).to_numpy(), (abs(high - close)).to_numpy(), (abs(low - close)).to_numpy())).T, axis=1)
    return pd.Series(tr).rolling(n).mean().to_numpy()

### FI (Force Index)

In [None]:
def ForceIndex(data, ndays): 
    FI = pd.Series(data['Close'].diff(ndays) * data['Volume'], name = 'ForceIndex') 
    data = data.join(FI) 
    return data

### Ease of Movement

In [None]:
# Ease of Movement
def EMV(data, ndays): 
    dm = ((data['High'] + data['Low'])/2) - ((data['High'].shift(1) + data['Low'].shift(1))/2)
    br = (data['Volume'] / 100000000) / ((data['High'] - data['Low']))
    EMV = dm / br 
    EMV_MA = pd.Series(EMV.rolling(ndays).mean(), name = 'EMV') 
    data = data.join(EMV_MA) 
    return data 

## Carregando os dados para o dataframe df_silver

In [77]:
def silver_datasets_generation():  
    file_list = []
    df_silver = pd.DataFrame()
    
    if os.path.exists( bronze_path ):
        file_list = os.listdir( bronze_path )

        for file in file_list[0:1]:
            if '.csv' in file:
                print( f'Processando o arquivo {file}', os.path.getsize(f"{bronze_path}/{file}")/(1024**2), 'Mb')
                try:
                    # loadin the bronze dataset into df_silver
                    df_silver = pd.read_csv( f"{bronze_path}/{file}" )
                    # sorting the df_silver by the open time
                    df_silver.sort_values( "Open_time" )
                    # dropping the ignore column
                    df_silver.drop( "Ignore", axis = 1, inplace = True )
                    # reset index 
                    df_silver.reset_index( inplace = True, drop = True )
                    
                    # converting Open_time and Close_time columns to datetime
                    df_silver[ 'Open_time' ] = df_silver[ 'Open_time' ].apply( lambda x: datetime.fromtimestamp( np.round( x/1000, 0 ) ) )
                    df_silver[ 'Close_time' ] = df_silver[ 'Close_time' ].apply( lambda x: datetime.fromtimestamp( np.round( x/1000, 0 ) ) )
                    
                    # Adding the mean price column
                    df_silver[ "Mean"] = df_silver[["Open","High","Low","Close"]].sum( axis = 1 ) / 4
                    df_silver = df_silver [["Open_time","Open","High","Low","Close","Mean","Volume","Close_time","Quote_asset_volume","Number_of_trades","Taker_buy_base_asset_volume","Taker_buy_quote_asset_volume"]]
                    
                    ########################################################################################################
                    # Including the Technical Indicators
                    ########################################################################################################

                    # Long term SMA (Simple Moving Average)
                    df_silver = SMA(df_silver, 50, 'LT_SMA')

                    # Short term SMA (Simple Moving Average)
                    df_silver = SMA(df_silver, 10, 'ST_SMA')

                    # Long term EWMA (Exponentially Moving Average)
                    df_silver = EWMA(df_silver, 50, 'LT_EWMA')
                    
                    # Shor term EWMA (Exponentially Moving Average)
                    df_silver = EWMA(df_silver, 10, 'ST_EWMA')

                    # Boiler bands
                    df_silver = BBANDS(df_silver, 50)

                    # RSI (Relative Strength Index)
                    df_silver["RSI"] = rsi(df_silver["Close"])

                    # MFI (Money Flow index)
                    df_silver["MFI"] = mfi(df_silver["High"], df_silver["Low"], df_silver["Close"], df_silver["Volume"], 14)

                    # ATR (Average True Range)
                    df_silver['ATR'] = atr(df_silver['High'], df_silver['Low'], df_silver['Close'], 14)

                    # Force Index
                    df_silver = ForceIndex(df_silver, 1)

                    # EMV (Ease Movement)
                    df_silver = EMV(df_silver, 14)

                    break


                except Exception as ex:
                    print( f"ERROR: {ex}" )

                
                
    return df_silver
df_silver = silver_datasets_generation()     
df_silver.head(100)

Processando o arquivo ETCUSDT-5m.csv 18.37930393218994 Mb


Unnamed: 0,Open_time,Open,High,Low,Close,Mean,Volume,Close_time,Quote_asset_volume,Number_of_trades,...,ST_SMA,LT_EWMA,ST_EWMA,UpperBand,LowerBand,RSI,MFI,ATR,ForceIndex,EMV
0,2021-06-30 21:00:00,57.549,57.645,56.744,56.744,57.17050,33446.870,2021-06-30 21:05:00,1.908515e+06,2614,...,,,,,,,,,,
1,2021-06-30 21:05:00,56.741,56.769,56.239,56.259,56.50200,21475.548,2021-06-30 21:10:00,1.214538e+06,1941,...,,,,,,,,,-10415.640780,
2,2021-06-30 21:10:00,56.243,56.500,56.214,56.380,56.33425,11521.704,2021-06-30 21:15:00,6.494414e+05,1173,...,,,,,,,,,1394.126184,
3,2021-06-30 21:15:00,56.424,56.661,56.228,56.469,56.44550,12765.234,2021-06-30 21:20:00,7.209205e+05,1136,...,,,,,,,,,1136.105826,
4,2021-06-30 21:20:00,56.484,56.912,56.479,56.617,56.62300,17825.950,2021-06-30 21:25:00,1.011476e+06,1259,...,,,,,,,,,2638.240600,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2021-07-01 04:55:00,53.841,54.013,53.420,53.500,53.69350,28608.451,2021-07-01 05:00:00,1.535650e+06,1829,...,54.2603,55.355253,54.229422,57.414572,53.644188,30.006617,21.769333,0.508857,-9612.439536,-138.384035
96,2021-07-01 05:00:00,53.529,53.990,53.220,53.966,53.67625,78120.101,2021-07-01 05:05:00,4.184693e+06,3739,...,54.2358,55.299625,54.181527,57.439740,53.575460,38.126973,42.076068,0.533429,36403.967066,-111.644270
97,2021-07-01 05:05:00,53.949,54.169,53.785,53.921,53.95600,24885.857,2021-07-01 05:10:00,1.343623e+06,1330,...,54.1897,55.244467,54.134159,57.463382,53.519098,37.672453,49.323417,0.506786,-1119.863565,-54.249093
98,2021-07-01 05:10:00,53.925,54.123,53.603,53.637,53.82200,10522.097,2021-07-01 05:15:00,5.669577e+05,1003,...,54.0854,55.180205,54.043766,57.500394,53.431686,34.848872,44.578122,0.502214,-2988.275548,-93.456299


## Incluindo os Indicadores

In [None]:
# Long term SMA (Simple Moving Average)
df_silver = SMA(df_silver, 50, 'LT_SMA')

# Short term SMA (Simple Moving Average)
df_silver = SMA(df_silver, 10, 'ST_SMA')

In [None]:
# Long term EWMA (Exponentially Moving Average)
df_silver = EWMA(df_silver, 50, 'LT_EWMA')

# Shor term EWMA (Exponentially Moving Average)
df_silver = EWMA(df_silver, 10, 'ST_EWMA')

In [62]:
# Boiler bands
df_silver = BBANDS(df_silver, 50)

In [63]:
# RSI (Relative Strength Index)
df_silver["RSI"] = rsi(df_silver["Close"])

In [64]:
# MFI (Money Flow index)
df_silver["MFI"] = mfi(df_silver["High"], df_silver["Low"], df_silver["Close"], df_silver["Volume"], 14)

In [65]:
# ATR (Average True Range)
df_silver['ATR'] = atr(df_silver['High'], df_silver['Low'], df_silver['Close'], 14)

In [66]:
# Force Index
df_silver = ForceIndex(df_silver, 1)

In [67]:
# EMV (Ease Movement)
df_silver = EMV(df_silver, 14)

In [68]:
df_silver.tail()

Unnamed: 0,Open_time,Open,High,Low,Close,Mean,Volume,Close_time,Quote_asset_volume,Number_of_trades,...,ST_SMA,LT_EWMA,ST_EWMA,UpperBand,LowerBand,RSI,MFI,ATR,ForceIndex,EMV
193046,2022-10-31 20:35:00,24.26,24.27,24.22,24.27,24.255,1385.5,2022-10-31 20:40:00,33585.9845,54,...,24.251,24.187811,24.251464,24.342031,23.988369,55.87717,47.43492,0.067143,27.71,-5.939723
193047,2022-10-31 20:40:00,24.26,24.3,24.22,24.27,24.2625,1456.7,2022-10-31 20:45:00,35348.0191,79,...,24.252,24.191034,24.254834,24.346936,23.989864,55.87717,45.771507,0.067143,0.0,-10.416417
193048,2022-10-31 20:45:00,24.27,24.34,24.26,24.3,24.2925,1450.22,2022-10-31 20:50:00,35262.3071,157,...,24.256,24.195307,24.263046,24.353793,23.989807,58.663674,55.898626,0.065,43.5066,14.086414
193049,2022-10-31 20:50:00,24.3,24.3,24.26,24.29,24.2875,1747.48,2022-10-31 20:55:00,42438.7295,93,...,24.261,24.199021,24.267947,24.359577,23.990023,57.363225,42.793909,0.060714,-17.4748,-3.582975
193050,2022-10-31 20:55:00,24.3,24.3,24.25,24.25,24.275,709.83,2022-10-31 21:00:00,17234.2807,51,...,24.266,24.20102,24.264684,24.362703,23.991297,52.362962,44.35757,0.06,-28.3932,-0.038208


## Gravando os silvers datasets

In [69]:
# Precisa revisar
try:
    if os.path.exists( silver_path ):
        # pass
        df_to_csv( df_silver, f"{silver_path}/{file}", 'a' )
    
    else:
        os.mkdir( silver_path )
        df_to_csv( df_silver, f"{df_silver}/{file}", 'a' )

except Exception as ex:
    print( f"ERROR: {ex}" )

ERROR: name 'file' is not defined
