In [133]:
# imports

import time
import random

from finta import TA

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import talib
import yfinance as yf

In [134]:
# constants

TICKERS = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']
FFILL_COLS = ['Open', 'High', 'Low', 'Close']

In [135]:
# import stock data from downloaded csv

ticker_dict = {}
for ticker in TICKERS:
    df = pd.read_csv(f'data_2022\\{ticker}_2022.csv', index_col = 'Date', parse_dates = True)
    df.index = pd.to_datetime(df.index, utc = True)
    #df = df.loc['2022-01-01': '2022-01-31']
    df = df.loc['2022-01-01': '2022-12-31']
    # print(ticker, df.isnull().sum())
    # print(ticker, df.isnull().any())

    # no point summing if im gna not look at it

    df[FFILL_COLS] = df[FFILL_COLS].ffill()
    #df[FFILL_COLS] = df[FFILL_COLS].ffill().bfill() # bfill for leading NaNs
    ticker_dict[ticker] = df

    

In [136]:
# get sample data from ticker_dict

#aapl_close = ticker_dict['AAPL'].loc['2022-01-03':'2022-01-31']['Close']
aapl_close = ticker_dict['AAPL'].loc['2022-01-03':'2022-12-31']['Close']
print(aapl_close)

Date
2022-01-03 05:00:00+00:00    178.443115
2022-01-04 05:00:00+00:00    176.178421
2022-01-05 05:00:00+00:00    171.492065
2022-01-06 05:00:00+00:00    168.629318
2022-01-07 05:00:00+00:00    168.795975
                                ...    
2022-12-23 05:00:00+00:00    130.026199
2022-12-27 05:00:00+00:00    128.221649
2022-12-28 05:00:00+00:00    124.287155
2022-12-29 05:00:00+00:00    127.807495
2022-12-30 05:00:00+00:00    128.123047
Name: Close, Length: 251, dtype: float64


In [137]:
# Simple Moving Average (SMA) func, but rolling window

#def simpleMovingAverage_rolling(ticker, start, end, window = 5, ticker_dict = ticker_dict):
def simpleMovingAverage(data, window = 5):
    """ Calculates the average closing price over a user defined period for the specified stock

    Args:
        data (pd.Series): stock close prices for specified ticker over user-defined period
        window (int): the amount of data points being calculated, defaults to 5
    
    Returns:
        rolling_sma (list[np.nan, np.float]): list of floats representing average of close prices of last 5 entries (including self)
    """

    if data.size < window:
        raise IndexError('Too little data selected for window size')

    # we can use this as one of the test cases
    print('Test SMA:', data.rolling(window).mean())

    window_sum = data.iloc[:window].sum()
    rolling_sma = [np.nan] * (window - 1) + [window_sum / window]

    for i in range(window, data.size):
        window_sum = window_sum + data.iloc[i] - data.iloc[i - window]
        rolling_sma.append(window_sum / window)

    # maybe return rolling_sma with window - 1 of NaNs at the start?
    # rolling_sma = [np.nan] * (window - 1) + rolling_sma

    return rolling_sma

In [138]:
print(simpleMovingAverage(aapl_close, 5))

Test SMA: Date
2022-01-03 05:00:00+00:00           NaN
2022-01-04 05:00:00+00:00           NaN
2022-01-05 05:00:00+00:00           NaN
2022-01-06 05:00:00+00:00           NaN
2022-01-07 05:00:00+00:00    172.707779
                                ...    
2022-12-23 05:00:00+00:00    130.994568
2022-12-27 05:00:00+00:00    130.533072
2022-12-28 05:00:00+00:00    129.298483
2022-12-29 05:00:00+00:00    128.146716
2022-12-30 05:00:00+00:00    127.693109
Name: Close, Length: 251, dtype: float64
[nan, nan, nan, nan, np.float64(172.70777893066406), np.float64(170.7822723388672), np.float64(169.87637634277343), np.float64(169.99598388671876), np.float64(170.03323669433593), np.float64(170.2097137451172), np.float64(169.74107666015624), np.float64(168.00576782226562), np.float64(165.84496154785157), np.float64(163.927294921875), np.float64(161.68216857910156), np.float64(159.71744689941406), np.float64(158.4350799560547), np.float64(157.39781799316407), np.float64(158.95077209472657), np.float

In [139]:
# Upward and Downward Runs
#def upwardDownwardRuns(ticker, start, end, ticker_dict = ticker_dict):
def upwardDownwardRuns(data):
    """ Calculates highest no. of consecutive upward and downward trending closing prices

    Args:
        ticker (str): stock ticker
        start (str): first entry in interval
        end (str): last entry in interval
        ticker_dict (dict: str[df]): defaults to ticker_dict
    
    Returns:
        upward (int): no. of days in longest trend upwards of closing price
        downward (int): no. of days in longest trend downwards of closing price
    """

    upward = downward = lowscore = highscore = 0
    i = 1
    n = len(data)
    while i < n:
        diff = data.iloc[i] - data.iloc[i - 1]
        if diff > 0:
            upward += 1
            downward = 0
            if upward > highscore:
                highscore = upward
            # highscore = max(highscore, upward)
        elif diff < 0:
            downward += 1
            upward = 0
            if downward > lowscore:
                lowscore = downward
            # lowscore = max(lowscore, downward)
        else:
            upward = 0
            downward = 0
        i += 1
        
    return highscore, lowscore

In [140]:
print(upwardDownwardRuns(aapl_close))

(11, 8)


In [None]:
def relativeStrengthIndex(data, interval = 14):
    """ Measures the speed and magnitude of recent price changes to detect overbought/oversold conditions

    Args:
        data (pd.Series): stock close prices for specified ticker over user-defined period
        interval (int): the amount of data points being calculated, defaults to 5
    
    Returns:
        rsi_values (list[np.nan, np.float]): list of floats signalling a trend upwards or downwards
    """
    # offset = pd.concat([pd.Series([0]), truncated], ignore_index = True)
    # #print(offset)
    # differences = truncated - offset.iloc[:-1]
    differences = data[1:].values - data.values[:-1]  # contains length - 1 elements

    print(talib.RSI(data, timeperiod=interval))

    gains = 0
    losses = 0
    for i in differences[:interval]:
        gains += max(i, 0)     # if i is positive, use i else use 0
        losses += max(-i, 0)   # if i is negative, use i else use 0
    
    avg_gain = gains / interval
    avg_loss = losses / interval
    if avg_loss == 0:
        rsi = 100
    else:
        rsi = 100 - 100/(1 + avg_gain / avg_loss)
    # rs = ( sum(gains) / len(gains) ) / ( sum(losses) / len(losses) )
    # rsi = 100 - 100/(1 + rs) 
    rsi_values = [np.nan] * interval + [rsi]

    # we use smoothing as it counters having a period of no losses, which may cause avg_loss to be 0, causing divide by zero error
    # as long as there is one entry > 0, it will never divide by zero (if there is none, we use rsi = 100)
    for i in differences[interval:]:
        # obvious thing to do here is to loop thru differences every time, but we can use sliding window here again
        # use remainder as index?
        avg_gain = (avg_gain * (interval - 1) + max(i, 0)) / interval # i-1 because differences has one less entry than truncated
        avg_loss = (avg_loss * (interval - 1) + max(-i, 0)) / interval

        if avg_loss == 0:
            rsi = 100
        else:
            rsi = 100 - 100/(1 + avg_gain/avg_loss)

        # try:    # shud replace this with proper handling later, currently meant to weed out dividing by zero errors -> no loss = strength is max
        #     #rs = ( sum(gains) / len(gains) ) / ( sum(losses) / len(losses) )
        #     rs = sum(gains) / sum(losses)
        #     rsi = 100 - 100/(1 + rs) 
        # except:
        #     rsi = 100
        rsi_values.append(rsi)

    #to implement smoothing

    return rsi_values

In [142]:
output = relativeStrengthIndex(aapl_close)
print(output)

Date
2022-01-03 05:00:00+00:00          NaN
2022-01-04 05:00:00+00:00          NaN
2022-01-05 05:00:00+00:00          NaN
2022-01-06 05:00:00+00:00          NaN
2022-01-07 05:00:00+00:00          NaN
                               ...    
2022-12-23 05:00:00+00:00    35.694655
2022-12-27 05:00:00+00:00    33.675346
2022-12-28 05:00:00+00:00    29.726677
2022-12-29 05:00:00+00:00    36.860468
2022-12-30 05:00:00+00:00    37.473195
Length: 251, dtype: float64
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, np.float64(15.097549643326786), np.float64(14.138430327966063), np.float64(14.091280790701077), np.float64(13.831829975979701), np.float64(41.330294206916534), np.float64(48.42932132041199), np.float64(48.189385621242835), np.float64(50.11480087106165), np.float64(45.73943175662544), np.float64(45.31857130792094), np.float64(44.215832055073164), np.float64(49.91548555092102), np.float64(52.3154559949989), np.float64(45.568868118120385), np.float64(40.825734547651

In [143]:
def exponentialMovingAverage(data, interval = 10, smoothing = 2.0):
    """ Calculates a weighted moving average of closing prices that gives more importance to recent prices

    Args:
        ticker (str): stock ticker
        start (str): first entry in interval
        end (str): last entry in interval
        interval (int): amount of preceding data points to use in calculating EMA, defaults to 10
        smoothing (float): strength of weightage for recent data points, defaults to 2.0
        ticker_dict (dict: str[df]): dictionary, with key-value pairs of ticker to dataframe containing stock history, defaults to ticker_dict
    
    Returns:
        rolling_ema (list[np.float]): list of floats representing weighted average of close prices
        or
        rolling_ema (list[np.nan, np.float]): list of floats representing weighted average of close prices
    """

    # pandas_method = truncated.ewm(span=interval, adjust=False).mean()
    # print(pandas_method.iloc[(interval - 1):].reset_index(drop = True))
    # print(talib.EMA(data, timeperiod = interval))
    # talib_ema = talib.EMA(data, timeperiod = interval)

    # print(data.size)
    # print(interval)
    if data.size < interval:
        
        raise IndexError('Data slice is too small given interval size of {interval}')
    
    # weight -> % of ema represented by the most recent entry
    #weight = smoothing / (interval + 1)     # weighting more recent entries is greater for shorter intervals
    weight = 2 / (interval + 1)     # weighting more recent entries is greater for shorter intervals
    sma = data.iloc[:interval].sum() / interval    # using sma as an initial state, afterwards we can implement EMA
    
    #return rolling_ema should figure out why interval must minus one
    rolling_ema = [np.nan] * (interval-1) + [sma]
    #rolling_ema = [sma]

    for i in range(interval, data.size):
        ema = data.iloc[i] * weight + rolling_ema[-1] * (1 - weight)
        rolling_ema.append(ema)
    
    # print(type(talib_ema.iloc[0]))
    # print(talib_ema)
    # print(type(rolling_ema[0]))
    
    return rolling_ema

In [144]:
output = exponentialMovingAverage(aapl_close)
print(output)

[nan, nan, nan, nan, nan, nan, nan, nan, nan, np.float64(171.45874633789063), np.float64(170.55213734019884), np.float64(169.17400270572375), np.float64(167.73983344601828), np.float64(166.19209084592546), np.float64(164.78493401420891), np.float64(163.305634821107), np.float64(162.07925796886735), np.float64(160.99207687687158), np.float64(162.08297556349862), np.float64(163.76876966825313), np.float64(165.11774625023267), np.float64(166.44071483861367), np.float64(166.99906808883733), np.float64(167.40413920514249), np.float64(167.60527871223303), np.float64(168.33562942666367), np.float64(169.19198371325749), np.float64(169.15015841631578), np.float64(168.4948162255652), np.float64(168.0014682914425), np.float64(168.29568486096716), np.float64(168.49357470069899), np.float64(168.00045249837018), np.float64(167.31498148748042), np.float64(166.22227289494134), np.float64(164.56968534710398), np.float64(163.69411966520866), np.float64(163.354340206371), np.float64(163.12452886114303), 

In [145]:
def movingAverageConvergenceDiversion(data, fast_period = 12, slow_period = 26, signal_period = 9):
    """ A technical indicator used for identifying points for buying and selling

    Args:
        data ()
        fast_period (int) = 
        slow_period (int) = 
        signal_period (int) = 
    
    Returns:
        rolling_ema (list[np.float]): list of floats representing weighted average of close prices
        or
        rolling_ema (list[np.nan, np.float]): list of floats representing weighted average of close prices
    """

    macd, macd_signal, macd_hist = talib.MACD(data, fastperiod=12, slowperiod=26, signalperiod=9)
    # print('MACD:', macd, '\nMACD Signal:', macd_signal, '\nMACD Hist:', macd_hist)
    

    
    ema_fast = exponentialMovingAverage(data, interval = fast_period)
    ema_slow = exponentialMovingAverage(data, interval = slow_period)
    
    # ema_fast = talib.EMA(data, timeperiod = fast_period)
    # ema_slow = talib.EMA(data, timeperiod = slow_period)
    # macd_line = np.array(aligned_fast) - np.array(aligned_slow)
    macd_line = pd.Series(ema_fast) - pd.Series(ema_slow)
    #signal_line = exponentialMovingAverage(macd_line, signal_period)
    first_non_nan = macd_line.first_valid_index()
    # print('first index:', first_non_nan)
    # print('length:', macd_line.size)
    signal_line_vals = exponentialMovingAverage(macd_line[first_non_nan:], signal_period)
    signal_line = [np.nan] * first_non_nan + signal_line_vals
    histogram = macd_line - signal_line


    print('taLib:', macd, macd_signal, macd_hist)


    # print(np.isclose(macd, macd_line, rtol= 0.1,equal_nan=True))

    #signal_line = exponentialMovingAverage(macd_line, interval = 9)
    #hist = np.array(macd_line) - np.array(signal_line)
    print('My macd:', macd_line, 'My Signal:', signal_line, 'My histogram:', histogram)
    return macd_line, signal_line, histogram

In [146]:
movingAverageConvergenceDiversion(aapl_close, fast_period = 12, slow_period = 26, signal_period = 9)
print('')


taLib: Date
2022-01-03 05:00:00+00:00         NaN
2022-01-04 05:00:00+00:00         NaN
2022-01-05 05:00:00+00:00         NaN
2022-01-06 05:00:00+00:00         NaN
2022-01-07 05:00:00+00:00         NaN
                               ...   
2022-12-23 05:00:00+00:00   -3.603966
2022-12-27 05:00:00+00:00   -3.866917
2022-12-28 05:00:00+00:00   -4.342729
2022-12-29 05:00:00+00:00   -4.385201
2022-12-30 05:00:00+00:00   -4.343331
Length: 251, dtype: float64 Date
2022-01-03 05:00:00+00:00         NaN
2022-01-04 05:00:00+00:00         NaN
2022-01-05 05:00:00+00:00         NaN
2022-01-06 05:00:00+00:00         NaN
2022-01-07 05:00:00+00:00         NaN
                               ...   
2022-12-23 05:00:00+00:00   -2.512669
2022-12-27 05:00:00+00:00   -2.783518
2022-12-28 05:00:00+00:00   -3.095361
2022-12-29 05:00:00+00:00   -3.353329
2022-12-30 05:00:00+00:00   -3.551329
Length: 251, dtype: float64 Date
2022-01-03 05:00:00+00:00         NaN
2022-01-04 05:00:00+00:00         NaN
2022-01-05