In [1]:
import pandas as pd
import numpy  as np
import gc; gc.collect()

import warnings
warnings.filterwarnings("ignore")

In [2]:
YFLOAD_PATH = '/Users/frkornet/CDA/Project/stockie/data/yfin/'
TRADE_PERIOD    = "10y"

In [3]:
def get_stock_n_smooth(ticker, period):
    """
    Copy of what is in util.py. Except this version reads what has been read 
    from yfinance and stored on file. The stored version is smoothed already, 
    and reading from disk should be much faster as it avoids the expensive 
    smoothing operation. The reading from file, will only return success if 
    there is at least 5 years worth of data to work with. 
    """
    gc.collect()
    try:
        hist = pd.read_csv(f'{YFLOAD_PATH}{ticker}.csv')
        hist.index = hist.Date.values
        del hist['Date']
        success = len(hist) > 5 * 252
        print(f'Successfully retrieved smoothed price data for {ticker} '+
            f'(len(hist)={len(hist)}, success={success})')
    except:
        hist = None
        success = False
        print(f'Failed to find {ticker}.csv in {YFLOAD_PATH}!')
    return success, hist

In [4]:
def features(data, target):
    """ 
    Given a standard yfinance data dataframe, add features that will help
    the balanced scorecard to recognize buy and sell signals in the data.
    The features are added as columns in the data dataframe. 
    
    The original hist dataframe from yfinance is provided, so we can copy
    the target to the data dataframe. The data dataframe with the extra 
    features is returned. The target argument contains the name of the 
    column that contains the the target.
    """
    windows = [3, 5, 10, 15, 20, 30, 45, 60] 

    for i in windows:
        ma = data.Close.rolling(i).mean()
        # Moving Average Convergence Divergence (MACD)
        data[f'MACD_{i}']    = ma - data.Close
        data[f'PctDiff_{i}'] = data.Close.diff(i)
        data[f'StdDev_{i}']  = data.Close.rolling(i).std()
        data[f'RSI_{i}']     = RSI(data, i)
        data[f'WPR_{i}']     = WPR(data, i)
        data[f'MFI_{i}']     = MFI(data, i)
        data[f'BB_{i}']      = BB(data, i)

    exclude_cols = [target, 'smooth', 'Close', 'Date', 'Volume', 'Dividends', 'Stock Splits'] 
    factor = data.Close.copy()
    for c in data.columns.tolist():
        if c in exclude_cols:
           continue
        data[c] = data[c] / factor

    data = data.dropna()
    
    return data

In [5]:
def RSI_Frank(df, window):
    price = hist['Close']
    rsi = price.copy()
    
    daily_rets = price.copy()
    daily_rets.values[1:] = price.values[1:] - price.values[:-1]
    daily_rets.values[0]  = np.nan

    up_rets = price.copy()
    up_rets[:] = np.nan
    up_rets[daily_rets >= 0] = daily_rets[daily_rets >= 0]
    up_rets = up_rets.fillna(0).cumsum()
    
    down_rets = price.copy()
    down_rets[:] = np.nan
    down_rets[daily_rets <  0] = -daily_rets[daily_rets <  0] # .fillna(0).cumsum() * -1.0
    down_rets = down_rets.fillna(0).cumsum() 
    
    up_gain = price.copy()
    up_gain[:window] = 0
    up_gain.values[window:] = up_rets.values[window:] - up_rets.values[:-window]

    down_loss = price.copy()
    down_loss.iloc[:window] = 0 
    down_loss.values[window:] = down_rets.values[window:] - down_rets.values[:-window]
    
    rs = (up_gain / window) / (down_loss / window)
    
    rsi = 100 - (100 / (1 + rs))
    rsi.iloc[:window] = np.nan

    rsi[rsi == np.inf] == 100 
    return rsi 

In [6]:
def RSI_Harshad(df, window):
    Gain=df['Close'].copy()
    Loss=df['Close'].copy()
    Avg_gain=df['Close'].copy()
    Avg_loss=df['Close'].copy()
    rsi=df['Close'].copy()

    Gain[:]=0.0
    Loss[:]=0.0
    Avg_gain[:]=0.0
    Avg_loss[:]=0.0
    rsi[:]=np.nan

    for i in range(1,len(df)):
        if df.Close.iloc[i] > df.Close.iloc[i-1]:
            Gain[i]=df.Close.iloc[i]-df.Close.iloc[i-1]
        else:
            # For loss save the absolute value on loss
            Loss[i]=abs(df.Close.iloc[i]-df.Close.iloc[i-1])
        if i>window:
            Avg_gain[i]=(Avg_gain[i-1]*(window-1)+Gain[i])/window
            Avg_loss[i]=(Avg_loss[i-1]*(window-1)+Loss[i])/window
            rsi[i]=(100*Avg_gain[i]/(Avg_gain[i]+Avg_loss[i])).round(2)

    return rsi

In [7]:
def WPR(df, window):

    Highest_High = df['High'].rolling(window,min_periods=window).max()
    Lowest_Low   = df['Low'].rolling(window,min_periods=window).min()
    wpr=100 *(df['Close'] - Highest_High)/(Highest_High - Lowest_Low)

    return wpr

In [8]:
def MFI(df, window):

    typical_price = (df['High'] + df['Low'] + df['Close'])/3
    raw_money_flow = typical_price*df['Volume']

    idx = typical_price>typical_price.shift(1)

    pos_money_flow = raw_money_flow.copy()
    pos_money_flow.iloc[:] = 0.0
    pos_money_flow.loc[idx] = raw_money_flow.loc[idx]

    neg_money_flow = raw_money_flow.copy()
    neg_money_flow.iloc[:] = 0.0
    neg_money_flow.loc[~idx] = raw_money_flow.loc[~idx]

    mfi_pos = pos_money_flow.rolling(window).sum()
    mfi_neg = neg_money_flow.rolling(window).sum()
    mfi = 100 * mfi_pos / (mfi_pos+mfi_neg)

    return mfi

In [9]:
def BB(df, window):   
    MA=df['Close'].rolling(window).mean()
    Std_Dev=df['Close'].rolling(window).std()

    BOLU=MA+2*Std_Dev
    BOLL=MA-2*Std_Dev

    return ( df['Close'] - BOLL) / (BOLU - BOLL)

In [10]:
ticker = 'AAPL'
target = 'target'
success, hist = get_stock_n_smooth(ticker, TRADE_PERIOD)
assert success == True, "Unable to get historical price data and smooth price!"
hist[target] = 0

Successfully retrieved smoothed price data for AAPL (len(hist)=2517, success=True)


In [11]:
hist

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,smooth,target
2011-02-14,10.98,11.06,10.97,11.05,310416400,0.0,0.0,10.952331,0
2011-02-15,11.05,11.07,11.00,11.07,284174800,0.0,0.0,10.937638,0
2011-02-16,11.10,11.23,11.09,11.17,481157600,0.0,0.0,10.922946,0
2011-02-17,10.99,11.08,10.97,11.02,530583200,0.0,0.0,10.908253,0
2011-02-18,11.03,11.06,10.75,10.78,816057200,0.0,0.0,10.893560,0
...,...,...,...,...,...,...,...,...,...
2021-02-08,136.03,136.96,134.92,136.91,71297200,0.0,0.0,136.688054,0
2021-02-09,136.62,137.88,135.85,136.01,76774200,0.0,0.0,136.930016,0
2021-02-10,136.48,136.99,134.40,135.39,73046600,0.0,0.0,137.171978,0
2021-02-11,135.90,136.39,133.77,135.13,64154400,0.0,0.0,137.413940,0


In [12]:
#hist = features(hist, target)

In [13]:
#hist.columns

In [14]:
len(hist)

2517

In [15]:
Gain=hist['Close'].copy()
Loss=hist['Close'].copy()
Avg_gain=hist['Close'].copy()
Avg_loss=hist['Close'].copy()
rsi=hist['Close'].copy()

Gain[:]=0.0
Loss[:]=0.0
Avg_gain[:]=0.0
Avg_loss[:]=0.0
rsi[:]=np.nan

In [16]:
change = hist.Close - hist.Close.shift(1)
change = change.fillna(0)
change

2011-02-14    0.00
2011-02-15    0.02
2011-02-16    0.10
2011-02-17   -0.15
2011-02-18   -0.24
              ... 
2021-02-08    0.15
2021-02-09   -0.90
2021-02-10   -0.62
2021-02-11   -0.26
2021-02-12    0.24
Name: Close, Length: 2517, dtype: float64

In [17]:
gain = change.copy()
gain [ gain <= 0] = 0
gain

2011-02-14    0.00
2011-02-15    0.02
2011-02-16    0.10
2011-02-17    0.00
2011-02-18    0.00
              ... 
2021-02-08    0.15
2021-02-09    0.00
2021-02-10    0.00
2021-02-11    0.00
2021-02-12    0.24
Name: Close, Length: 2517, dtype: float64

In [18]:
loss = change.copy()
loss [ loss > 0] = 0
loss = np.abs(loss)
loss

2011-02-14    0.00
2011-02-15    0.00
2011-02-16    0.00
2011-02-17    0.15
2011-02-18    0.24
              ... 
2021-02-08    0.00
2021-02-09    0.90
2021-02-10    0.62
2021-02-11    0.26
2021-02-12    0.00
Name: Close, Length: 2517, dtype: float64

In [19]:
window = 14
Avg_gain[:] = 0.0
Avg_loss[:] = 0.0
for i in range(1,len(hist)):
    if hist.Close.iloc[i] > hist.Close.iloc[i-1]:
        Gain[i]=hist.Close.iloc[i]-hist.Close.iloc[i-1]
    else:
        # For loss save the absolute value on loss
        Loss[i]=abs(hist.Close.iloc[i]-hist.Close.iloc[i-1])
    if i>=window:
        Avg_gain[i]=(Avg_gain[i-1]*(window-1)+Gain[i]) / window
        Avg_loss[i]=(Avg_loss[i-1]*(window-1)+Loss[i]) / window
#         Avg_gain[i] = np.sum(Gain[i-window+1:i+1]) / window
#         Avg_loss[i] = np.sum(Loss[i-window+1:i+1]) / window
        rsi[i]=(100*Avg_gain[i]/(Avg_gain[i]+Avg_loss[i]))

In [20]:
np.all(Gain == gain)

True

In [21]:
np.all(Loss == loss)

True

In [22]:
Avg_gain [ Avg_gain > 0]

2011-03-08    0.000714
2011-03-09    0.000663
2011-03-10    0.000616
2011-03-11    0.012715
2011-03-14    0.015378
                ...   
2021-02-08    1.178553
2021-02-09    1.094371
2021-02-10    1.016201
2021-02-11    0.943616
2021-02-12    0.893357
Name: Close, Length: 2502, dtype: float64

In [23]:
Avg_gain[15:16]

2011-03-08    0.000714
Name: Close, dtype: float64

In [24]:
gain.fillna(0).rolling(14).mean()[14:]

2011-03-07    0.063571
2011-03-08    0.062857
2011-03-09    0.055714
2011-03-10    0.055714
2011-03-11    0.067857
                ...   
2021-02-08    1.566429
2021-02-09    1.267143
2021-02-10    0.921429
2021-02-11    0.765000
2021-02-12    0.507143
Name: Close, Length: 2503, dtype: float64

In [25]:
def RSI_Harshad_fixed(df, window):
    rsi=df['Close'].copy()
    rsi[:]=np.nan

    change = df.Close - df.Close.shift(1)
    change = change.fillna(0)

    gain = change.copy()
    gain [ gain <= 0] = 0
    
    loss = change.copy()
    loss [ loss > 0] = 0
    loss = np.abs(loss)
    
    Avg_gain = gain.fillna(0).rolling(14).mean()
    Avg_loss = loss.fillna(0).rolling(14).mean()
    rsi = 100 * (Avg_gain / (Avg_gain + Avg_loss))

    return rsi

In [26]:
RSI_Harshad_fixed(hist, 14)

2011-02-14          NaN
2011-02-15          NaN
2011-02-16          NaN
2011-02-17          NaN
2011-02-18          NaN
                ...    
2021-02-08    63.399827
2021-02-09    56.677316
2021-02-10    47.636632
2021-02-11    42.584493
2021-02-12    32.961931
Name: Close, Length: 2517, dtype: float64

In [27]:
RSI_Frank(hist, 14)

2011-02-14          NaN
2011-02-15          NaN
2011-02-16          NaN
2011-02-17          NaN
2011-02-18          NaN
                ...    
2021-02-08    63.399827
2021-02-09    56.677316
2021-02-10    47.636632
2021-02-11    42.584493
2021-02-12    32.961931
Name: Close, Length: 2517, dtype: float64

In [28]:
np.allclose(RSI_Harshad_fixed(hist, 14)[14:], RSI_Frank(hist, 14)[14:])

True

In [29]:
%time RSI_Harshad_fixed(hist, 14)

CPU times: user 6.74 ms, sys: 1.43 ms, total: 8.17 ms
Wall time: 6.95 ms


2011-02-14          NaN
2011-02-15          NaN
2011-02-16          NaN
2011-02-17          NaN
2011-02-18          NaN
                ...    
2021-02-08    63.399827
2021-02-09    56.677316
2021-02-10    47.636632
2021-02-11    42.584493
2021-02-12    32.961931
Name: Close, Length: 2517, dtype: float64

In [30]:
%time RSI_Frank(hist, 14)

CPU times: user 10.4 ms, sys: 1.07 ms, total: 11.4 ms
Wall time: 10.6 ms


2011-02-14          NaN
2011-02-15          NaN
2011-02-16          NaN
2011-02-17          NaN
2011-02-18          NaN
                ...    
2021-02-08    63.399827
2021-02-09    56.677316
2021-02-10    47.636632
2021-02-11    42.584493
2021-02-12    32.961931
Name: Close, Length: 2517, dtype: float64