In [1]:
import pandas as pd
import numpy as np
import fix_yahoo_finance as fyf
from pandas_datareader import data as web
from datetime import datetime

In [2]:
#fetching data
fyf.pdr_override()
today_date=datetime.today().strftime('%Y-%m-%d')
yf_data=web.get_data_yahoo('BTC-USD', start = '2013-01-01', end = today_date)
#bitcoin dataframe
bitcoin_df=pd.DataFrame(yf_data)
bitcoin_df.reset_index(inplace=True)
bitcoin_df

[*********************100%***********************]  1 of 1 downloaded


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-09-17,465.86,468.17,452.42,457.33,457.33,21056800
1,2014-09-18,456.86,456.86,413.10,424.44,424.44,34483200
2,2014-09-19,424.10,427.83,384.53,394.80,394.80,37919700
3,2014-09-20,394.67,423.30,389.88,408.90,408.90,36863600
4,2014-09-21,408.08,412.43,393.18,398.82,398.82,26580100
...,...,...,...,...,...,...,...
2445,2021-06-01,37293.79,37896.73,35787.09,36684.93,36684.93,34639423297
2446,2021-06-02,36699.92,38231.34,35966.31,37575.18,37575.18,33070867190
2447,2021-06-03,37599.41,39478.95,37243.97,39208.77,39208.77,35460750427
2448,2021-06-04,39242.48,39242.48,35717.72,36894.41,36894.41,41831090187


In [3]:
#checking for null values
bitcoin_df.rename(columns={"Adj Close": "Adj_Close"}, inplace=True)
bitcoin_df.isnull().values.any()

False

In [4]:
## Feature Engineering

In [5]:
#simple moving average 10,30 days
def SMA(df, x):
  SMA_calc=df.Adj_Close.rolling(window=x, min_periods=x).mean()
  return SMA_calc
bitcoin_df['SMA10']=SMA(bitcoin_df,10)
bitcoin_df['SMA30']=SMA(bitcoin_df,30)

#exponential moving average 20,30,200 days
def EMA(df, x):
  EMA_calc=df.Adj_Close.ewm(span=x, min_periods=x, adjust=False).mean()
  return EMA_calc

bitcoin_df['EMA20']=EMA(bitcoin_df,20)
bitcoin_df['EMA30']=EMA(bitcoin_df,30)
bitcoin_df['EMA200']=EMA(bitcoin_df,200)

In [6]:
#rate of change ROC 10,30 days
def ROC(df, n):
  ROC_diff=df.Adj_Close.diff(n)
  ROC_shift=df.Adj_Close.shift(n)
  #roc formula
  ROC_calc=((ROC_diff/ROC_shift)*100)
  return ROC_calc
  
bitcoin_df['ROC10']=ROC(bitcoin_df,10)
bitcoin_df['ROC30']=ROC(bitcoin_df,30)

In [7]:
#volume rate of change 10,30 days
def VROC(df, n):
  VROC_diff=df.Volume.diff(n)
  VROC_shift=df.Volume.shift(n)
  #roc formula
  VROC_calc=((VROC_diff/VROC_shift)*100)
  return VROC_calc
  
bitcoin_df['VROC10']=VROC(bitcoin_df,10)
bitcoin_df['VROC30']=VROC(bitcoin_df,30)

In [8]:
#relative strength index RSI 14 days
def RSI(df, n=14):
  delta=df.Adj_Close.diff()
  dup, ddown=delta.copy(), delta.copy()
  #average gains and losses
  dup[dup<0]=0
  ddown[ddown>0]=0
  rolup=dup.rolling(window=n, min_periods=n).mean()
  roldown=ddown.rolling(window=n, min_periods=n).mean().abs()
  #relative strength
  RS_calc=rolup/roldown
  #rsi formula
  RSI_calc=(100-(100/(1+RS_calc)))
  return RSI_calc

bitcoin_df['RSI14']=RSI(bitcoin_df)

In [9]:
#RSI indicators %30 %70
def RSI_Ind(df):
  rsi_vals=[]
  for i in df['RSI14'].iloc[:]:
    if i <= 30:
      rsi_vals.append(0)
    elif i >= 70:
      rsi_vals.append(1)
    else:
      rsi_vals.append(2)
  return rsi_vals

bitcoin_df['RSI14_IND']=RSI_Ind(bitcoin_df)

In [10]:
#STO%K%D Stochastic oscillator 14,200 days
def STOK(close, low, high, n):
  STOK_calc=((close-low.rolling(n).min())/(high.rolling(n).max()-low.rolling(n).min()))*100
  return STOK_calc

bitcoin_df['STOK14']=STOK(bitcoin_df.Adj_Close, bitcoin_df.Low, bitcoin_df.High, 14)
bitcoin_df['STOK200']=STOK(bitcoin_df.Adj_Close, bitcoin_df.Low, bitcoin_df.High, 200)

def STOD(stod, n):
  STOD_calc=stod.rolling(3).mean()
  return STOD_calc

bitcoin_df['STOD14']=STOD(bitcoin_df.STOK14, 14)
bitcoin_df['STOD14']=STOD(bitcoin_df.STOK200, 200)

In [11]:
#bollinger bands upper
def BBandsUp(close, n, mult):
  BBandsUp_calc=close.rolling(window=n, min_periods=n).mean() + close.rolling(window=n, min_periods=n).std() * mult
  return BBandsUp_calc

bitcoin_df['BBandUpper']=BBandsUp(bitcoin_df.Adj_Close,20,2)
#bollinger bands lower
def BBandsLow(close, n, mult):
  BBandsLow_calc=close.rolling(window=n, min_periods=n).mean() - close.rolling(window=n, min_periods=n).std() * mult
  return BBandsLow_calc

bitcoin_df['BBandLower']=BBandsLow(bitcoin_df.Adj_Close,20,2)

In [12]:
#filling NaNs of feature engineered columns at the end not to alter calculations
bitcoin_df.fillna(method='bfill', inplace=True)

In [13]:
#dataframe after feature engineering phase
bitcoin_df

Unnamed: 0,Date,Open,High,Low,Close,Adj_Close,Volume,SMA10,SMA30,EMA20,...,ROC30,VROC10,VROC30,RSI14,RSI14_IND,STOK14,STOK200,STOD14,BBandUpper,BBandLower
0,2014-09-17,465.86,468.17,452.42,457.33,457.33,21056800,416.142,384.112000,383.881775,...,-16.086852,-28.624957,-35.409464,31.458753,2,15.323674,27.705117,29.436033,461.023578,318.794422
1,2014-09-18,456.86,456.86,413.10,424.44,424.44,34483200,416.142,384.112000,383.881775,...,-16.086852,-28.624957,-35.409464,31.458753,2,15.323674,27.705117,29.436033,461.023578,318.794422
2,2014-09-19,424.10,427.83,384.53,394.80,394.80,37919700,416.142,384.112000,383.881775,...,-16.086852,-28.624957,-35.409464,31.458753,2,15.323674,27.705117,29.436033,461.023578,318.794422
3,2014-09-20,394.67,423.30,389.88,408.90,408.90,36863600,416.142,384.112000,383.881775,...,-16.086852,-28.624957,-35.409464,31.458753,2,15.323674,27.705117,29.436033,461.023578,318.794422
4,2014-09-21,408.08,412.43,393.18,398.82,398.82,26580100,416.142,384.112000,383.881775,...,-16.086852,-28.624957,-35.409464,31.458753,2,15.323674,27.705117,29.436033,461.023578,318.794422
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2445,2021-06-01,37293.79,37896.73,35787.09,36684.93,36684.93,34639423297,36961.955,45398.923667,40582.780037,...,-35.221207,-39.628669,-9.267214,39.391539,2,46.666205,42.626905,42.469442,49721.687242,30428.906758
2446,2021-06-02,36699.92,38231.34,35966.31,37575.18,37575.18,33070867190,37242.415,44744.753333,40296.341938,...,-34.309459,-57.855011,-36.049391,51.177480,2,56.497360,44.389323,43.654124,48026.831889,30909.661111
2447,2021-06-03,37599.41,39478.95,37243.97,39208.77,39208.77,35460750427,37292.694,44273.927667,40192.763658,...,-26.483841,-47.356043,-48.281336,46.450845,2,72.924203,47.625402,44.880543,45951.805595,31917.510405
2448,2021-06-04,39242.48,39242.48,35717.72,36894.41,36894.41,41831090187,37141.913,43589.607667,39878.634739,...,-35.750899,-25.583233,-39.586518,49.023616,2,59.311609,42.346934,44.787220,44457.989695,32424.748305


In [14]:
#Price Difference
bitcoin_df['Price_Diff']=bitcoin_df['Adj_Close']-bitcoin_df['Adj_Close'].shift(-1)

In [15]:
#Direction: 0-Negative 1-Positive
bitcoin_df['PD_Direction'] = np.where(bitcoin_df.eval("Price_Diff < 0"), 0, 1)

In [16]:
#filter dates to only april
bitcoin_edited_df = bitcoin_df.loc[(bitcoin_df.Date >= '2021-04-01 00:00:00') & (bitcoin_df.Date <= '2021-04-30 23:59:59')]
bitcoin_edited_df

Unnamed: 0,Date,Open,High,Low,Close,Adj_Close,Volume,SMA10,SMA30,EMA20,...,VROC30,RSI14,RSI14_IND,STOK14,STOK200,STOD14,BBandUpper,BBandLower,Price_Diff,PD_Direction
2384,2021-04-01,58926.56,59586.07,58505.28,59095.81,59095.81,61669163792,56096.148,55534.127667,56212.601774,...,29.745422,54.12654,2,89.803722,94.978855,94.753487,61743.471597,52222.693403,-288.5,0
2385,2021-04-02,59098.88,60267.19,58869.28,59384.31,59384.31,58727860620,56560.684,55828.996667,56514.669224,...,10.347547,53.508505,2,90.618259,95.534689,95.049678,61364.31904,52415.96796,1780.42,1
2386,2021-04-03,59397.41,60110.27,57603.89,57603.89,57603.89,59641344484,57043.646,56130.420667,56618.404536,...,13.941528,47.85382,2,71.698995,92.077434,94.196993,61149.202166,52461.241834,-1154.66,0
2387,2021-04-04,57604.84,58913.75,57168.68,58758.55,58758.55,50749662970,57749.085,56458.129,56822.227913,...,4.367493,53.654311,2,83.96875,94.319576,93.977233,61354.382991,52541.196009,-299.33,0
2388,2021-04-05,58760.88,59891.3,57694.82,59057.88,59057.88,60706272115,58141.142,56796.312333,57035.14716,...,76.658832,65.941014,2,87.149518,94.900821,93.765944,61565.735569,52555.141431,865.52,1
2389,2021-04-06,59171.93,59479.58,57646.81,58192.36,58192.36,66058027988,58363.027,57029.168,57145.357907,...,53.133794,61.619513,2,77.95225,93.220137,94.146844,61484.374941,52568.649059,2143.42,1
2390,2021-04-07,58186.51,58731.14,55604.02,56048.94,56048.94,75645303584,58372.846,57155.915333,57040.937154,...,55.65701,60.887166,2,55.175642,89.058002,92.392986,61396.211651,52475.814349,-2275.01,0
2391,2021-04-08,56099.91,58338.74,55879.09,58323.95,58323.95,53053855641,58430.221,57272.576333,57163.128853,...,4.206511,70.37607,1,77.631332,93.475661,91.917933,61393.576504,52476.179496,78.95,1
2392,2021-04-09,58326.56,58937.05,57807.86,58245.0,58245.0,46655208546,58362.952,57347.124667,57266.1642,...,-18.571013,62.054814,2,66.432669,93.322354,91.952006,61385.779504,52477.112496,-1548.23,0
2393,2021-04-10,58253.78,61276.66,58038.71,59793.23,59793.23,58238470525,58450.392,57413.395,57506.837134,...,2.582467,64.041168,2,76.095108,96.328738,94.375584,61674.974255,52414.898745,-411.73,0


In [17]:
#final Bitcoin-subdf
bitcoin_sub_df=bitcoin_edited_df[bitcoin_edited_df.columns[0:]]
bitcoin_sub_df

Unnamed: 0,Date,Open,High,Low,Close,Adj_Close,Volume,SMA10,SMA30,EMA20,...,VROC30,RSI14,RSI14_IND,STOK14,STOK200,STOD14,BBandUpper,BBandLower,Price_Diff,PD_Direction
2384,2021-04-01,58926.56,59586.07,58505.28,59095.81,59095.81,61669163792,56096.148,55534.127667,56212.601774,...,29.745422,54.12654,2,89.803722,94.978855,94.753487,61743.471597,52222.693403,-288.5,0
2385,2021-04-02,59098.88,60267.19,58869.28,59384.31,59384.31,58727860620,56560.684,55828.996667,56514.669224,...,10.347547,53.508505,2,90.618259,95.534689,95.049678,61364.31904,52415.96796,1780.42,1
2386,2021-04-03,59397.41,60110.27,57603.89,57603.89,57603.89,59641344484,57043.646,56130.420667,56618.404536,...,13.941528,47.85382,2,71.698995,92.077434,94.196993,61149.202166,52461.241834,-1154.66,0
2387,2021-04-04,57604.84,58913.75,57168.68,58758.55,58758.55,50749662970,57749.085,56458.129,56822.227913,...,4.367493,53.654311,2,83.96875,94.319576,93.977233,61354.382991,52541.196009,-299.33,0
2388,2021-04-05,58760.88,59891.3,57694.82,59057.88,59057.88,60706272115,58141.142,56796.312333,57035.14716,...,76.658832,65.941014,2,87.149518,94.900821,93.765944,61565.735569,52555.141431,865.52,1
2389,2021-04-06,59171.93,59479.58,57646.81,58192.36,58192.36,66058027988,58363.027,57029.168,57145.357907,...,53.133794,61.619513,2,77.95225,93.220137,94.146844,61484.374941,52568.649059,2143.42,1
2390,2021-04-07,58186.51,58731.14,55604.02,56048.94,56048.94,75645303584,58372.846,57155.915333,57040.937154,...,55.65701,60.887166,2,55.175642,89.058002,92.392986,61396.211651,52475.814349,-2275.01,0
2391,2021-04-08,56099.91,58338.74,55879.09,58323.95,58323.95,53053855641,58430.221,57272.576333,57163.128853,...,4.206511,70.37607,1,77.631332,93.475661,91.917933,61393.576504,52476.179496,78.95,1
2392,2021-04-09,58326.56,58937.05,57807.86,58245.0,58245.0,46655208546,58362.952,57347.124667,57266.1642,...,-18.571013,62.054814,2,66.432669,93.322354,91.952006,61385.779504,52477.112496,-1548.23,0
2393,2021-04-10,58253.78,61276.66,58038.71,59793.23,59793.23,58238470525,58450.392,57413.395,57506.837134,...,2.582467,64.041168,2,76.095108,96.328738,94.375584,61674.974255,52414.898745,-411.73,0


In [18]:
bitcoin_sub_df.to_csv(r'data/bitcoin_price.csv')