In [1]:
import time
import random

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import talib
import yfinance as yf

In [2]:
TICKERS = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']
FFILL_COLS = ['Open', 'High', 'Low', 'Close']

In [5]:
# import stock data from downloaded csv

ticker_dict = {}
for ticker in TICKERS:
    df = pd.read_csv(f'data_2022\\{ticker}_2022.csv', index_col = 'Date', parse_dates = True)
    df.index = pd.to_datetime(df.index, utc = True)
    df = df.loc['2022-01-01': '2022-01-31']
    # print(ticker, df.isnull().sum())
    # print(ticker, df.isnull().any())

    # no point summing if im gna not look at it

    df[FFILL_COLS] = df[FFILL_COLS].ffill()
    #df[FFILL_COLS] = df[FFILL_COLS].ffill().bfill() # bfill for leading NaNs
    ticker_dict[ticker] = df

    

In [10]:
# Simple Moving Average (SMA) func, but rolling window

#def simpleMovingAverage_rolling(ticker, start, end, window = 5, ticker_dict = ticker_dict):
def simpleMovingAverage(ticker, start, end, window = 5, ticker_dict = ticker_dict):
    """ Calculates the average closing price over a user defined period for the specified stock

    Args:
        ticker (str): stock ticker
        start (str): first entry in interval
        end (str): last entry in interval
        window (int): the amount of data points being calculated, defaults to 5
        ticker_dict (dict: str[df]): defaults to ticker_dict
    
    Returns:
        rolling_sma (list[np.float]): list of floats representing average of close prices of last 5 entries (including self)
        or
        rolling_sma (list[np.nan, np.float]): list of floats representing average of close prices of last 5 entries (including self)
    """
    truncated = ticker_dict[ticker].loc[start: end]['Close']

    if truncated.size < window:
        raise IndexError('Window is too small')

    # we can use this as one of the test cases
    # print(truncated.rolling(window).mean())

    window_sum = truncated.iloc[:window].sum()
    rolling_sma = [window_sum / window]

    for i in range(window, truncated.size):
        window_sum = window_sum + truncated.iloc[i] - truncated.iloc[i - window]
        rolling_sma.append(window_sum / window)

    # maybe return rolling_sma with window - 1 of NaNs at the start?
    # rolling_sma = [np.nan] * (window - 1) + rolling_sma

    return rolling_sma

In [11]:
output = simpleMovingAverage('AAPL', '2022-01-03', '2022-01-31')
print(output)

[np.float64(172.70777893066406), np.float64(170.7822723388672), np.float64(169.87637634277343), np.float64(169.99598388671876), np.float64(170.03323669433593), np.float64(170.2097137451172), np.float64(169.74107666015624), np.float64(168.00576782226562), np.float64(165.84496154785157), np.float64(163.927294921875), np.float64(161.68216857910156), np.float64(159.71744689941406), np.float64(158.4350799560547), np.float64(157.39781799316407), np.float64(158.95077209472657), np.float64(161.53119506835938)]


In [12]:
def exponentialMovingAverage(ticker, start, end, interval = 10, smoothing = 2.0, ticker_dict = ticker_dict):
    """ Calculates a weighted moving average of closing prices that gives more importance to recent prices

    Args:
        ticker (str): stock ticker
        start (str): first entry in interval
        end (str): last entry in interval
        interval (int): amount of preceding data points to use in calculating EMA, defaults to 10
        smoothing (float): strength of weightage for recent data points, defaults to 2.0
        ticker_dict (dict: str[df]): dictionary, with key-value pairs of ticker to dataframe containing stock history, defaults to ticker_dict
    
    Returns:
        rolling_ema (list[np.float]): list of floats representing weighted average of close prices
        or
        rolling_ema (list[np.nan, np.float]): list of floats representing weighted average of close prices
    """
    truncated = ticker_dict[ticker].loc[start: end]['Close']

    # pandas_method = truncated.ewm(span=interval, adjust=False).mean()
    # print(pandas_method.iloc[(interval - 1):].reset_index(drop = True))
    print(talib.EMA(truncated, timeperiod = interval))

    if truncated.size < interval:
        raise IndexError('Window is too small')
    
    # weight -> % of ema represented by the most recent entry
    weight = smoothing / (interval + 1)     # weighting more recent entries is greater for shorter intervals
    sma = truncated.iloc[:interval].sum() / interval    # using sma as an initial state, afterwards we can implement EMA
    
    rolling_ema = [sma]

    for i in range(interval, truncated.size):
        ema = truncated.iloc[i] * weight + rolling_ema[-1] * (1 - weight)
        rolling_ema.append(ema)
    

    #return rolling_ema should figure out why interval must minus one
    return [np.nan] * (interval-1) + rolling_ema

In [13]:
output = exponentialMovingAverage('AAPL', '2022-01-03', '2022-01-31')
print(output)

Date
2022-01-03 05:00:00+00:00           NaN
2022-01-04 05:00:00+00:00           NaN
2022-01-05 05:00:00+00:00           NaN
2022-01-06 05:00:00+00:00           NaN
2022-01-07 05:00:00+00:00           NaN
2022-01-10 05:00:00+00:00           NaN
2022-01-11 05:00:00+00:00           NaN
2022-01-12 05:00:00+00:00           NaN
2022-01-13 05:00:00+00:00           NaN
2022-01-14 05:00:00+00:00    171.458746
2022-01-18 05:00:00+00:00    170.552137
2022-01-19 05:00:00+00:00    169.174003
2022-01-20 05:00:00+00:00    167.739833
2022-01-21 05:00:00+00:00    166.192091
2022-01-24 05:00:00+00:00    164.784934
2022-01-25 05:00:00+00:00    163.305635
2022-01-26 05:00:00+00:00    162.079258
2022-01-27 05:00:00+00:00    160.992077
2022-01-28 05:00:00+00:00    162.082976
2022-01-31 05:00:00+00:00    163.768770
dtype: float64
[nan, nan, nan, nan, nan, nan, nan, nan, nan, np.float64(171.45874633789063), np.float64(170.55213734019884), np.float64(169.17400270572375), np.float64(167.73983344601828), np.flo

In [None]:
def movingAverageConvergenceDiversion(ticker, start, end, ticker_dict = ticker_dict):
    truncated = ticker_dict[ticker].loc[start: end]['Close']

    
    #print(truncated)
    macd, macd_signal, macd_hist = talib.MACD(truncated, fastperiod=4, slowperiod=8, signalperiod=6)
    print(macd, macd_signal, macd_hist)

    ema_fast = exponentialMovingAverage('AAPL', '2022-01-03', '2022-01-31', interval = 4)
    ema_slow = exponentialMovingAverage('AAPL', '2022-01-03', '2022-01-31', interval = 8)
    macd_line = np.array(ema_fast) - np.array(ema_slow)
    #print(macd_line)

    #signal_line = exponentialMovingAverage(macd_line, interval = 9)
    #hist = np.array(macd_line) - np.array(signal_line)
    return 

In [23]:
movingAverageConvergenceDiversion('AAPL', '2022-01-03', '2022-01-31')

Date
2022-01-03 05:00:00+00:00         NaN
2022-01-04 05:00:00+00:00         NaN
2022-01-05 05:00:00+00:00         NaN
2022-01-06 05:00:00+00:00         NaN
2022-01-07 05:00:00+00:00         NaN
2022-01-10 05:00:00+00:00         NaN
2022-01-11 05:00:00+00:00         NaN
2022-01-12 05:00:00+00:00         NaN
2022-01-13 05:00:00+00:00         NaN
2022-01-14 05:00:00+00:00         NaN
2022-01-18 05:00:00+00:00         NaN
2022-01-19 05:00:00+00:00         NaN
2022-01-20 05:00:00+00:00   -2.560445
2022-01-21 05:00:00+00:00   -2.885616
2022-01-24 05:00:00+00:00   -2.918556
2022-01-25 05:00:00+00:00   -2.995202
2022-01-26 05:00:00+00:00   -2.780414
2022-01-27 05:00:00+00:00   -2.514952
2022-01-28 05:00:00+00:00   -0.231117
2022-01-31 05:00:00+00:00    1.630830
dtype: float64 Date
2022-01-03 05:00:00+00:00         NaN
2022-01-04 05:00:00+00:00         NaN
2022-01-05 05:00:00+00:00         NaN
2022-01-06 05:00:00+00:00         NaN
2022-01-07 05:00:00+00:00         NaN
2022-01-10 05:00:00+00:00

In [14]:
def relativeStrengthIndex(ticker, start, end, interval = 14, ticker_dict = ticker_dict):
    
    # offset = pd.concat([pd.Series([0]), truncated], ignore_index = True)
    # #print(offset)
    # differences = truncated - offset.iloc[:-1]
    truncated = ticker_dict[ticker].loc[start: end]['Close']
    differences = truncated[1:].values - truncated.values[:-1]  # contains length - 1 elements

    print(talib.RSI(truncated, timeperiod=interval))

    gains = 0
    losses = 0
    for i in differences[:interval]:
        gains += max(i, 0)     # if i is positive, use i else use 0
        losses += max(-i, 0)   # if i is negative, use i else use 0
    
    avg_gain = gains / interval
    avg_loss = losses / interval
    if avg_loss == 0:
        rsi = 100
    else:
        rsi = 100 - 100/(1 + avg_gain / avg_loss)
    # rs = ( sum(gains) / len(gains) ) / ( sum(losses) / len(losses) )
    # rsi = 100 - 100/(1 + rs) 
    rsi_values = [rsi]

    # we use smoothing as it counters having a period of no losses, which may cause avg_loss to be 0, causing divide by zero error
    # as long as there is one entry > 0, it will never divide by zero (if there is none, we use rsi = 100)
    for i in differences[interval:]:
        # obvious thing to do here is to loop thru differences every time, but we can use sliding window here again
        # use remainder as index?
        avg_gain = (avg_gain * (interval - 1) + max(i, 0)) / interval # i-1 because differences has one less entry than truncated
        avg_loss = (avg_loss * (interval - 1) + max(-i, 0)) / interval

        if avg_loss == 0:
            rsi = 100
        else:
            rsi = 100 - 100/(1 + avg_gain/avg_loss)

        # try:    # shud replace this with proper handling later, currently meant to weed out dividing by zero errors -> no loss = strength is max
        #     #rs = ( sum(gains) / len(gains) ) / ( sum(losses) / len(losses) )
        #     rs = sum(gains) / sum(losses)
        #     rsi = 100 - 100/(1 + rs) 
        # except:
        #     rsi = 100
        rsi_values.append(rsi)

    #to implement smoothing

    return rsi_values

In [15]:
output = relativeStrengthIndex('AAPL', '2022-01-03', '2022-01-31')
print(output)

Date
2022-01-03 05:00:00+00:00          NaN
2022-01-04 05:00:00+00:00          NaN
2022-01-05 05:00:00+00:00          NaN
2022-01-06 05:00:00+00:00          NaN
2022-01-07 05:00:00+00:00          NaN
2022-01-10 05:00:00+00:00          NaN
2022-01-11 05:00:00+00:00          NaN
2022-01-12 05:00:00+00:00          NaN
2022-01-13 05:00:00+00:00          NaN
2022-01-14 05:00:00+00:00          NaN
2022-01-18 05:00:00+00:00          NaN
2022-01-19 05:00:00+00:00          NaN
2022-01-20 05:00:00+00:00          NaN
2022-01-21 05:00:00+00:00          NaN
2022-01-24 05:00:00+00:00    15.097550
2022-01-25 05:00:00+00:00    14.138430
2022-01-26 05:00:00+00:00    14.091281
2022-01-27 05:00:00+00:00    13.831830
2022-01-28 05:00:00+00:00    41.330294
2022-01-31 05:00:00+00:00    48.429321
dtype: float64
[np.float64(15.097549643326786), np.float64(14.138430327966063), np.float64(14.091280790701077), np.float64(13.831829975979701), np.float64(41.330294206916534), np.float64(48.42932132041199)]


In [16]:
# Upward and Downward Runs
def upwardDownwardRuns(ticker, start, end, ticker_dict = ticker_dict):
    """ Calculates highest no. of consecutive upward and downward trending closing prices

    Args:
        ticker (str): stock ticker
        start (str): first entry in interval
        end (str): last entry in interval
        ticker_dict (dict: str[df]): defaults to ticker_dict
    
    Returns:
        upward (int): no. of days in longest trend upwards of closing price
        downward (int): no. of days in longest trend downwards of closing price
    """
    truncated = ticker_dict[ticker].loc[start: end]['Close']

    upward = 0
    downward = 0
    highscore = 0
    lowscore = 0
    i = 1
    n = len(truncated)
    while i < n:
        diff = truncated.iloc[i] - truncated.iloc[i - 1]
        if diff > 0:
            upward += 1
            downward = 0
            if upward > highscore:
                highscore = upward
            # highscore = max(highscore, upward)
        elif diff < 0:
            downward += 1
            upward = 0
            if downward > lowscore:
                lowscore = downward
            # lowscore = max(lowscore, downward)
        else:
            upward = 0
            downward = 0
        i += 1
        
    return highscore, lowscore

In [17]:
print(upwardDownwardRuns('AAPL', '2022-01-03', '2022-01-31'))

(4, 8)


In [None]:
#test_range = [random.randint(0, 100) for _ in range(1000000)]
start_time = time.time()
upward = 0  # can combine these 4 into one line
downward = 0
highscore = 0
lowscore = 0
i = 1
n = len(test_range)
while i < n:
    diff = test_range[i] - test_range[i - 1]
    if diff > 0:
        upward += 1
        downward = 0
        if upward > highscore:
            highscore = upward
        # highscore = max(highscore, upward)
    elif diff < 0:
        downward += 1
        upward = 0
        if downward > lowscore:
            lowscore = downward
        # lowscore = max(lowscore, downward)
    else:
        upward = 0
        downward = 0
    i += 1

print(highscore, lowscore)
print("--- %s seconds ---" % (time.time() - start_time))    

6 6
--- 0.20491361618041992 seconds ---


In [131]:
start_time = time.time()
upward = 0
downward = 0
highscore = 0
lowscore = 0
i = 1
while i < len(test_range):
    if test_range[i] > test_range[i - 1]:
        upward += 1
        downward = 0
    elif test_range[i] < test_range[i - 1]:
        downward += 1
        upward = 0

    highscore = max(upward, highscore)
    lowscore = max(downward, lowscore)
    i += 1

print(highscore, lowscore)
print("--- %s seconds ---" % (time.time() - start_time))   

8 9
--- 0.3613138198852539 seconds ---


In [None]:
# use timedelta to create sma
# possible edge case wud be cuz of missing entry
sma_test = ticker_dict['AAPL'].head()
sma_test

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2022-01-03 00:00:00-05:00,174.345039,179.296076,174.227395,178.443115,104487900,0.0,0.0
1,2022-01-04 00:00:00-05:00,179.05101,179.354932,175.609785,176.178421,99310400,0.0,0.0
2,2022-01-05 00:00:00-05:00,176.090157,176.63918,171.217554,171.492065,94537600,0.0,0.0
3,2022-01-06 00:00:00-05:00,169.315597,171.864651,168.276373,168.629318,96904000,0.0,0.0
4,2022-01-07 00:00:00-05:00,169.501866,170.72737,167.678316,168.795975,86709100,0.0,0.0


In [37]:
print(simpleMovingAverage(sma_test))

172.70777893066406


valid intervals = [1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 4h, 1d, 5d, 1wk, 1mo, 3mo]
1min:
2min:



1day:


In [25]:
# start and end will be gotten from the user's choice/input in web app
idx = pd.date_range(start = '2022-01-01 00:00:00', end = '2024-12-31 23:59:59', freq = 'H')
idx = idx[idx.indexer_between_time('09:00', '16:30')]
idx

  idx = pd.date_range(start = '2022-01-01 00:00:00', end = '2024-12-31 23:59:59', freq = 'H')


DatetimeIndex(['2022-01-01 09:00:00', '2022-01-01 10:00:00',
               '2022-01-01 11:00:00', '2022-01-01 12:00:00',
               '2022-01-01 13:00:00', '2022-01-01 14:00:00',
               '2022-01-01 15:00:00', '2022-01-01 16:00:00',
               '2022-01-02 09:00:00', '2022-01-02 10:00:00',
               ...
               '2024-12-30 15:00:00', '2024-12-30 16:00:00',
               '2024-12-31 09:00:00', '2024-12-31 10:00:00',
               '2024-12-31 11:00:00', '2024-12-31 12:00:00',
               '2024-12-31 13:00:00', '2024-12-31 14:00:00',
               '2024-12-31 15:00:00', '2024-12-31 16:00:00'],
              dtype='datetime64[ns]', length=8768, freq=None)

       value
9          0
10         1
11         2
12         3
13         4
...      ...
26292   8763
26293   8764
26294   8765
26295   8766
26296   8767

[8768 rows x 1 columns]
