In [15]:
import time
import random

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import yfinance as yf

In [3]:
TICKERS = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']
FFILL_COLS = ['Open', 'High', 'Low', 'Close']

In [4]:
# import stock data from downloaded csv

ticker_dict = {}
for ticker in TICKERS:
    df = pd.read_csv(f'data_2022\\{ticker}_2022.csv', index_col = 'Date', parse_dates = True)
    df.index = pd.to_datetime(df.index, utc = True)
    df = df.loc['2022-01-01': '2022-01-31']
    # print(ticker, df.isnull().sum())
    # print(ticker, df.isnull().any())

    # no point summing if im gna not look at it

    df[FFILL_COLS] = df[FFILL_COLS].ffill()
    #df[FFILL_COLS] = df[FFILL_COLS].ffill().bfill() # bfill for leading NaNs
    ticker_dict[ticker] = df

    

In [None]:
# Simple Moving Average (SMA) func, but rolling window

def simpleMovingAverage_rolling(ticker, start, end, window = 5, ticker_dict = ticker_dict):
    """ Calculates the average closing price over a user defined period for the specified stock

    Args:
        ticker (str): stock ticker
        start (str): first entry in interval
        end (str): last entry in interval
        ticker_dict (dict: str[df]): defaults to ticker_dict
    
    Returns:
        rolling_sma (list[float]): list of floats representing average of close prices of last 5 entries (including self)
    """
    truncated = ticker_dict[ticker].loc[start: end]['Close']

    if truncated.size < window:
        raise IndexError('Window is too small')

    window_sum = truncated.iloc[:window].sum()
    rolling_sma = [window_sum / window]

    for i in range(window, truncated.size):
        window_sum = window_sum + truncated.iloc[i] - truncated.iloc[i - window]
        rolling_sma.append(window_sum / window)

    # maybe return rolling_sma with window - 1 of NaNs at the start?
    # starting_nans = pd.Series([np.nan] * (window - 1))
    # rolling_sma = pd.concat(starting_nans, rolling_sma)

    return rolling_sma

In [11]:
output = simpleMovingAverage_rolling('AAPL', '2022-01-03', '2022-01-31')
print(output)

[np.float64(172.70777893066406), np.float64(170.7822723388672), np.float64(169.87637634277343), np.float64(169.99598388671876), np.float64(170.03323669433593), np.float64(170.2097137451172), np.float64(169.74107666015624), np.float64(168.00576782226562), np.float64(165.84496154785157), np.float64(163.927294921875), np.float64(161.68216857910156), np.float64(159.71744689941406), np.float64(158.4350799560547), np.float64(157.39781799316407), np.float64(158.95077209472657), np.float64(161.53119506835938)]


In [20]:
test_list = [0, 1, 2]
test_series = pd.Series([3, 4, 5])
test_combi = test_list + test_series
print(type(test_combi))
print(test_combi)

<class 'pandas.core.series.Series'>
0    3
1    5
2    7
dtype: int64


In [None]:
# Simple Moving Average (SMA) func
def simpleMovingAverage(ticker, start, end, ticker_dict = ticker_dict):
    """ Calculates the average closing price over a user defined period for the specified stock

    Args:
        ticker (str): stock ticker
        start (str): first entry in interval
        end (str): last entry in interval
        ticker_dict (dict: str[df]): defaults to ticker_dict
    
    Returns:
        float: the average closing price
    """
    truncated = ticker_dict[ticker].loc[start: end]['Close']
    return truncated.mean()    # this should probably be the pytest instead

In [152]:
print(simpleMovingAverage('AAPL', '2022-01-03', '2022-01-31'))
print(upwardDownwardRuns('AAPL', '2022-01-03', '2022-01-31'))

166.53271408081054
(4, 8)


In [150]:
# Upward and Downward Runs
def upwardDownwardRuns(ticker, start, end, ticker_dict = ticker_dict):
    """ Calculates highest no. of consecutive upward and downward trending closing prices

    Args:
        ticker (str): stock ticker
        start (str): first entry in interval
        end (str): last entry in interval
        ticker_dict (dict: str[df]): defaults to ticker_dict
    
    Returns:
        upward (int): no. of days in longest trend upwards of closing price
        downward (int): no. of days in longest trend downwards of closing price
    """
    truncated = ticker_dict[ticker].loc[start: end]['Close']

    upward = 0
    downward = 0
    highscore = 0
    lowscore = 0
    i = 1
    n = len(truncated)
    while i < n:
        diff = truncated.iloc[i] - truncated.iloc[i - 1]
        if diff > 0:
            upward += 1
            downward = 0
            if upward > highscore:
                highscore = upward
            # highscore = max(highscore, upward)
        elif diff < 0:
            downward += 1
            upward = 0
            if downward > lowscore:
                lowscore = downward
            # lowscore = max(lowscore, downward)
        else:
            upward = 0
            downward = 0
        i += 1
        
    return highscore, lowscore

In [132]:
test_range = [random.randint(0, 10) for _ in range(1000000)]

In [144]:
#test_range = [random.randint(0, 100) for _ in range(1000000)]
start_time = time.time()
upward = 0
downward = 0
highscore = 0
lowscore = 0
i = 1
n = len(test_range)
while i < n:
    diff = test_range[i] - test_range[i - 1]
    if diff > 0:
        upward += 1
        downward = 0
        if upward > highscore:
            highscore = upward
        # highscore = max(highscore, upward)
    elif diff < 0:
        downward += 1
        upward = 0
        if downward > lowscore:
            lowscore = downward
        # lowscore = max(lowscore, downward)
    else:
        upward = 0
        downward = 0
    i += 1

print(highscore, lowscore)
print("--- %s seconds ---" % (time.time() - start_time))    

6 6
--- 0.20491361618041992 seconds ---


In [131]:
start_time = time.time()
upward = 0
downward = 0
highscore = 0
lowscore = 0
i = 1
while i < len(test_range):
    if test_range[i] > test_range[i - 1]:
        upward += 1
        downward = 0
    elif test_range[i] < test_range[i - 1]:
        downward += 1
        upward = 0

    highscore = max(upward, highscore)
    lowscore = max(downward, lowscore)
    i += 1

print(highscore, lowscore)
print("--- %s seconds ---" % (time.time() - start_time))   

8 9
--- 0.3613138198852539 seconds ---


In [None]:
# use timedelta to create sma
# possible edge case wud be cuz of missing entry
sma_test = ticker_dict['AAPL'].head()
sma_test

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2022-01-03 00:00:00-05:00,174.345039,179.296076,174.227395,178.443115,104487900,0.0,0.0
1,2022-01-04 00:00:00-05:00,179.05101,179.354932,175.609785,176.178421,99310400,0.0,0.0
2,2022-01-05 00:00:00-05:00,176.090157,176.63918,171.217554,171.492065,94537600,0.0,0.0
3,2022-01-06 00:00:00-05:00,169.315597,171.864651,168.276373,168.629318,96904000,0.0,0.0
4,2022-01-07 00:00:00-05:00,169.501866,170.72737,167.678316,168.795975,86709100,0.0,0.0


In [37]:
print(simpleMovingAverage(sma_test))

172.70777893066406


valid intervals = [1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 4h, 1d, 5d, 1wk, 1mo, 3mo]
1min:
2min:



1day:


In [25]:
# start and end will be gotten from the user's choice/input in web app
idx = pd.date_range(start = '2022-01-01 00:00:00', end = '2024-12-31 23:59:59', freq = 'H')
idx = idx[idx.indexer_between_time('09:00', '16:30')]
idx

  idx = pd.date_range(start = '2022-01-01 00:00:00', end = '2024-12-31 23:59:59', freq = 'H')


DatetimeIndex(['2022-01-01 09:00:00', '2022-01-01 10:00:00',
               '2022-01-01 11:00:00', '2022-01-01 12:00:00',
               '2022-01-01 13:00:00', '2022-01-01 14:00:00',
               '2022-01-01 15:00:00', '2022-01-01 16:00:00',
               '2022-01-02 09:00:00', '2022-01-02 10:00:00',
               ...
               '2024-12-30 15:00:00', '2024-12-30 16:00:00',
               '2024-12-31 09:00:00', '2024-12-31 10:00:00',
               '2024-12-31 11:00:00', '2024-12-31 12:00:00',
               '2024-12-31 13:00:00', '2024-12-31 14:00:00',
               '2024-12-31 15:00:00', '2024-12-31 16:00:00'],
              dtype='datetime64[ns]', length=8768, freq=None)

       value
9          0
10         1
11         2
12         3
13         4
...      ...
26292   8763
26293   8764
26294   8765
26295   8766
26296   8767

[8768 rows x 1 columns]
