# Feature engineering

## Importing required libraries

In [None]:
!pip install talib-binary

Collecting talib-binary
  Downloading talib_binary-0.4.19-cp37-cp37m-manylinux1_x86_64.whl (2.4 MB)
[K     |████████████████████████████████| 2.4 MB 5.0 MB/s 
Installing collected packages: talib-binary
Successfully installed talib-binary-0.4.19


In [None]:
import pandas as pd
import numpy as np
from google.colab import files
import talib as tb
import matplotlib.pyplot as plt

## With Fear Gear index

In [None]:
uploaded = files.upload()

Saving btc_raw_25.csv to btc_raw_25.csv


In [None]:
data_raw_25 = pd.read_csv('btc_raw_25.csv')

In [None]:
df_25 = data_raw_25.copy()
raw_features_25 = list(df_25.columns[1:-1])
smoothening_range_25 = [3,7,14,30,90]

In [None]:
raw_features_25

['transactions',
 'size',
 'sentbyaddress',
 'difficulty',
 'hashrate',
 'mining_profitability',
 'sentinusd',
 'transactionfees',
 'median_transaction_fee',
 'confirmationtime',
 'transactionvalue',
 'tweets',
 'google_trends',
 'mediantransactionvalue',
 'activeaddresses',
 'top100cap',
 'fee_to_reward',
 'avg_price',
 'Open',
 'High',
 'Low',
 'Close',
 'miners_revenue',
 'coins_in_supply',
 'fear_gear_index']

In [None]:
# feature engineering raw features using smoothening techniques for technical analysis
# https://technical-analysis-library-in-python.readthedocs.io/en/latest/

for i in raw_features_25:
  for j in smoothening_range_25:
    df_new = pd.DataFrame()

    # Simple moving average:
    df_new[i+'_'+str(j)+'sma'] = tb.SMA(df_25[i], timeperiod=j)

    # Weighted moving average:
    df_new[i+'_'+str(j)+'wma'] = tb.WMA(df_25[i], timeperiod=j)

    # Standard deviation
    df_new[i+'_'+str(j)+'std'] = tb.STDDEV(df_25[i], timeperiod=j, nbdev=1)

    # Variance
    df_new[i+'_'+str(j)+'var'] = tb.VAR(df_25[i], timeperiod=j, nbdev=1)

    # Exponential moving average
    df_new[i+'_'+str(j)+'ema'] = tb.EMA(df_25[i], timeperiod=j)

    # Relative Strength index
    df_new[i+'_'+str(j)+'rsi'] = tb.RSI(df_25[i], timeperiod=j)

    # Rate of change
    df_new[i+'_'+str(j)+'roc'] = tb.ROC(df_25[i], timeperiod=j)

    # Momentum indicator
    df_new[i+'_'+str(j)+'mom'] = tb.MOM(df_25[i], timeperiod=j)

    # Triple exponential  moving average
    df_new[i+'_'+str(j)+'tema'] = tb.TEMA(df_25[i], timeperiod=j)

    # Double exponential moving average
    df_new[i+'_'+str(j)+'dema'] = tb.DEMA(df_25[i], timeperiod=j)

    # Bollinger bands
    df_new[i+'_'+str(j)+'BBupper'], df_new[i+'_'+str(j)+'BBmiddle'], df_new[i+'_'+str(j)+'BBlower'] = tb.BBANDS(df_25[i], timeperiod=j, nbdevup=2, nbdevdn=2, matype=0)
    df_25 = pd.concat([df_25, df_new], axis=1)


In [None]:
df_25.isnull().sum().sort_values(ascending=False).head(10)

mediantransactionvalue_90tema    267
Low_90tema                       267
coins_in_supply_90tema           267
sentinusd_90tema                 267
mining_profitability_90tema      267
transactionfees_90tema           267
Open_90tema                      267
top100cap_90tema                 267
High_90tema                      267
tweets_90tema                    267
dtype: int64

In [None]:
df_25_final = df_25.iloc[267:,:]

In [None]:
df_25_final.shape

(1230, 1652)

In [None]:
df_25_final.isnull().any().sum()

0

## Without Fear Gear index

In [None]:
uploaded1 = files.upload()

Saving btc_raw_24.csv to btc_raw_24.csv


In [None]:
data_raw_24 = pd.read_csv('btc_raw_24.csv')

In [None]:
data_raw_24.isnull().sum()

Date                         0
transactions                 0
size                         0
sentbyaddress                0
difficulty                   0
hashrate                     0
mining_profitability         0
sentinusd                    0
transactionfees              0
median_transaction_fee     270
confirmationtime             0
transactionvalue             0
tweets                    1361
google_trends                0
mediantransactionvalue       0
activeaddresses              0
top100cap                    0
fee_to_reward                0
avg_price                    0
Open                         0
High                         0
Low                          0
Close                        0
miners_revenue               0
coins_in_supply              0
next_day_BTC_price           0
dtype: int64

In [None]:
data_raw_24 = data_raw_24.iloc[1361:,:]

In [None]:
df_24 = data_raw_24.copy()
raw_features_24 = list(df_24.columns[1:-1])
smoothening_range_24 = [3,7,14,30,90]

In [None]:
raw_features_24

['transactions',
 'size',
 'sentbyaddress',
 'difficulty',
 'hashrate',
 'mining_profitability',
 'sentinusd',
 'transactionfees',
 'median_transaction_fee',
 'confirmationtime',
 'transactionvalue',
 'tweets',
 'google_trends',
 'mediantransactionvalue',
 'activeaddresses',
 'top100cap',
 'fee_to_reward',
 'avg_price',
 'Open',
 'High',
 'Low',
 'Close',
 'miners_revenue',
 'coins_in_supply']

In [None]:
# feature engineering raw features using smoothening techniques for technical analysis
# https://technical-analysis-library-in-python.readthedocs.io/en/latest/

for i in raw_features_24:
  for j in smoothening_range_24:
    df_new = pd.DataFrame()

    # Simple moving average:
    df_new[i+'_'+str(j)+'sma'] = tb.SMA(df_24[i], timeperiod=j)

    # Weighted moving average:
    df_new[i+'_'+str(j)+'wma'] = tb.WMA(df_24[i], timeperiod=j)

    # Standard deviation
    df_new[i+'_'+str(j)+'std'] = tb.STDDEV(df_24[i], timeperiod=j, nbdev=1)

    # Variance
    df_new[i+'_'+str(j)+'var'] = tb.VAR(df_24[i], timeperiod=j, nbdev=1)

    # Exponential moving average
    df_new[i+'_'+str(j)+'ema'] = tb.EMA(df_24[i], timeperiod=j)

    # Relative Strength index
    df_new[i+'_'+str(j)+'rsi'] = tb.RSI(df_24[i], timeperiod=j)

    # Rate of change
    df_new[i+'_'+str(j)+'roc'] = tb.ROC(df_24[i], timeperiod=j)

    # Momentum indicator
    df_new[i+'_'+str(j)+'mom'] = tb.MOM(df_24[i], timeperiod=j)

    # Triple exponential  moving average
    df_new[i+'_'+str(j)+'tema'] = tb.TEMA(df_24[i], timeperiod=j)

    # Double exponential moving average
    df_new[i+'_'+str(j)+'dema'] = tb.DEMA(df_24[i], timeperiod=j)

    # Bollinger bands
    df_new[i+'_'+str(j)+'BBupper'], df_new[i+'_'+str(j)+'BBmiddle'], df_new[i+'_'+str(j)+'BBlower'] = tb.BBANDS(df_24[i], timeperiod=j, nbdevup=2, nbdevdn=2, matype=0)
    df_24 = pd.concat([df_24, df_new], axis=1)


In [None]:
df_24.isnull().sum().sort_values(ascending=False).head(10)

difficulty_90tema                267
tweets_90tema                    267
Close_90tema                     267
sentbyaddress_90tema             267
median_transaction_fee_90tema    267
Low_90tema                       267
confirmationtime_90tema          267
transactionvalue_90tema          267
High_90tema                      267
google_trends_90tema             267
dtype: int64

In [None]:
df_24_final = df_24.iloc[267:,:]

In [None]:
df_24_final.shape

(2684, 1586)

In [None]:
df_24_final.isnull().any().sum()

0

## Saving data to file

In [None]:
df_25_final.to_csv('BTC_feature_engineered_data_25.csv',sep=',',index=False)
df_24_final.to_csv('BTC_feature_engineered_data_24.csv',sep=',',index=False)