# Feature engineering

In [38]:
!pip install talib-binary
import talib as tb
import matplotlib.pyplot as plt



In [2]:
import pandas as pd
import numpy as np
from google.colab import files

In [17]:
uploaded = files.upload()

Saving btc_raw_1498_27.csv to btc_raw_1498_27.csv


In [27]:
data_raw_1498_27 = pd.read_csv('btc_raw_1498_27.csv')

In [28]:
df_1498_27 = data_raw_1498_27.copy()
raw_features_1498_27 = list(df_1498_27.columns[1:-1])
smoothening_range_1498_27 = [7,30,90]

In [29]:
raw_features_1498_27

['transactions',
 'size',
 'sentbyaddress',
 'difficulty',
 'hashrate',
 'mining_profitability',
 'sentinusd',
 'transactionfees',
 'median_transaction_fee',
 'confirmationtime',
 'transactionvalue',
 'tweets',
 'google_trends',
 'mediantransactionvalue',
 'activeaddresses',
 'top100cap',
 'fee_to_reward',
 'avg_price',
 'fear_gear_index',
 'Open',
 'High',
 'Low',
 'Close',
 'miners_revenue',
 'coins_in_supply']

In [30]:
# feature engineering raw features using smoothening techniques for technical analysis
# https://technical-analysis-library-in-python.readthedocs.io/en/latest/

for i in raw_features_1498_27:
  for j in smoothening_range_1498_27:
    df_new = pd.DataFrame()

    # Simple moving average:
    df_new[i+'_'+str(j)+'sma'] = tb.SMA(df_1498_27[i], timeperiod=j)

    # Weighted moving average:
    df_new[i+'_'+str(j)+'wma'] = tb.WMA(df_1498_27[i], timeperiod=j)

    # Standard deviation
    df_new[i+'_'+str(j)+'std'] = tb.STDDEV(df_1498_27[i], timeperiod=j, nbdev=1)

    # Variance
    df_new[i+'_'+str(j)+'var'] = tb.VAR(df_1498_27[i], timeperiod=j, nbdev=1)

    # Exponential moving average
    df_new[i+'_'+str(j)+'ema'] = tb.EMA(df_1498_27[i], timeperiod=j)

    # Relative Strength index
    df_new[i+'_'+str(j)+'rsi'] = tb.RSI(df_1498_27[i], timeperiod=j)

    # Rate of change
    df_new[i+'_'+str(j)+'roc'] = tb.ROC(df_1498_27[i], timeperiod=j)

    # Momentum indicator
    df_new[i+'_'+str(j)+'mom'] = tb.MOM(df_1498_27[i], timeperiod=j)

    # Triple exponential  moving average
    df_new[i+'_'+str(j)+'tema'] = tb.TEMA(df_1498_27[i], timeperiod=j)

    # Double exponential moving average
    df_new[i+'_'+str(j)+'dema'] = tb.DEMA(df_1498_27[i], timeperiod=j)

    # Bollinger bands
    df_new[i+'_'+str(j)+'BBupper'], df_new[i+'_'+str(j)+'BBmiddle'], df_new[i+'_'+str(j)+'BBlower'] = tb.BBANDS(df_1498_27[i], timeperiod=j, nbdevup=2, nbdevdn=2, matype=0)
    df_1498_27 = pd.concat([df_1498_27, df_new], axis=1)


In [31]:
df_1498_27.isnull().sum().sort_values(ascending=False).head(10)

tweets_90tema                    267
hashrate_90tema                  267
Open_90tema                      267
mediantransactionvalue_90tema    267
transactionfees_90tema           267
top100cap_90tema                 267
miners_revenue_90tema            267
median_transaction_fee_90tema    267
High_90tema                      267
mining_profitability_90tema      267
dtype: int64

In [33]:
df_1498_27_final = df_1498_27.iloc[267:,:]

In [34]:
df_1498_27_final.shape

(1231, 1002)

In [36]:
df_1498_27_final.isnull().any().sum()

0

## Saving data to file

In [37]:
df_1498_27_final.to_csv('BTC_feature_engineered_data_27.csv',sep=',',index=False)