In [1]:
import pandas as pd
from processor.processor import Processor as processor
from database.adatabase import ADatabase
import matplotlib.pyplot as plt
from tqdm import tqdm
from xgboost import XGBRegressor
import warnings
warnings.simplefilter(action="ignore")
import numpy as np

In [2]:
sp500 = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies",attrs={"id":"constituents"})[0].rename(columns={"Symbol":"ticker"})

In [3]:
def calculate_technical_indicators(price):
    # Average Return
    price["average_return"] = price["adjclose"].pct_change(100)

    # Coefficient of Variation
    price["coev"] = price["adjclose"].rolling(100).std() / price["adjclose"].rolling(100).mean()

    # Bollinger Bands
    price["bollinger"] = ((price["adjclose"].rolling(100).mean() - 2 * price["adjclose"].rolling(100).std()) - price["adjclose"]) / price["adjclose"]

    # RSI (Relative Strength Index)
    delta = price["adjclose"].diff()
    gain = (delta.where(delta > 0, 0)).rolling(14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
    rs = gain / loss
    price["rsi"] = 100 - (100 / (1 + rs))

    # MACD (Moving Average Convergence Divergence)
    ema_12 = price["adjclose"].ewm(span=12, adjust=False).mean()
    ema_26 = price["adjclose"].ewm(span=26, adjust=False).mean()
    price["macd"] = ema_12 - ema_26
    price["signal_line"] = price["macd"].ewm(span=9, adjust=False).mean()

    # EMA (Exponential Moving Average)
    price["ema_20"] = price["adjclose"].ewm(span=20, adjust=False).mean()

    # ATR (Average True Range)
    high_low = price["adjhigh"] - price["adjlow"]
    high_close = np.abs(price["adjhigh"] - price["adjclose"].shift())
    low_close = np.abs(price["adjlow"] - price["adjclose"].shift())
    tr = high_low.combine(high_close, np.maximum).combine(low_close, np.maximum)
    price["atr"] = tr.rolling(14).mean()

    # Stochastic Oscillator
    low_14 = price["adjlow"].rolling(window=14).min()
    high_14 = price["adjhigh"].rolling(window=14).max()
    price["stochastic"] = (price["adjclose"] - low_14) / (high_14 - low_14) * 100

    return price

In [4]:
prices = []
market = ADatabase("market")
market.connect()
for ticker in tqdm(sp500["ticker"]):
    try:
        price = processor.column_date_processing(market.query("prices",{"ticker":ticker})).sort_values("date")
        price = calculate_technical_indicators(price)
        prices.append(price.dropna())
    except Exception as e:
        print(ticker, str(e))
        continue
market.disconnect()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 503/503 [00:08<00:00, 60.41it/s]


In [5]:
prices = pd.concat(prices).sort_values("date")

In [6]:
factors = [
           "average_return"
           ,"coev"
           ,"bollinger","rsi","signal_line","ema_20","stochastic","atr"
          ]
analysis = []        

In [7]:
prices

Unnamed: 0,date,adjclose,adjlow,adjhigh,ticker,average_return,coev,bollinger,rsi,macd,signal_line,ema_20,atr,stochastic
100,2022-12-07,126.35,124.10,127.31,MMM,-0.019859,0.094520,-0.185146,42.848594,0.712636,1.339249,125.949962,2.876207,58.983452
100,2022-12-07,99.58,99.33,101.26,DUK,-0.055487,0.078719,-0.144664,55.832148,1.490419,1.335755,97.858836,1.774564,66.714491
100,2022-12-07,69.70,69.44,70.75,DD,0.282193,0.104987,-0.325971,53.035144,2.415002,2.990284,68.598604,1.615714,54.079696
100,2022-12-07,84.75,84.32,85.68,EMN,-0.047646,0.109282,-0.214615,50.107527,1.594962,1.966694,84.575147,2.190357,44.482173
100,2022-12-07,157.27,156.59,158.42,ETN,0.239518,0.067289,-0.191587,38.218239,2.275608,3.791033,160.067091,3.106071,8.407871
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
500,2024-07-15,283.72,282.58,290.00,LULU,-0.361853,0.165737,-0.149301,20.749693,-8.320274,-7.481207,299.335053,7.116071,3.291943
500,2024-07-15,130.87,130.03,135.35,MU,0.621484,0.137448,-0.340388,40.639374,-0.222876,0.810603,133.959734,5.108179,21.433716
500,2024-07-15,104.55,102.71,105.22,PCAR,-0.026627,0.058803,-0.062453,41.281669,-1.225730,-1.469349,103.747963,2.105357,68.736411
500,2024-07-15,150.61,148.79,152.50,HES,0.026793,0.033099,-0.061242,50.886263,-0.321695,-0.805002,148.098667,2.435000,76.226415


In [8]:
for factor in tqdm(factors):
    for ascending in [True,False]:
        default_portfolio = {"ticker":"","adjclose":0,"quantity":0,"buy_price":0}
        cash = 100
        portfolio = default_portfolio.copy()
        portfolios = []
        for date in [x for x in prices["date"].unique() if x ]:
            try:
                today = prices[prices["date"]==date]
                rec = today.sort_values(factor,ascending=ascending).iloc[0]
                if portfolio["ticker"] == "":
                    portfolio["ticker"] = rec["ticker"]
                    portfolio["adjclose"] = rec["adjclose"]
                    portfolio["quantity"] = cash / rec["adjclose"]
                    portfolio["buy_price"] = rec["adjclose"]
                    portfolio["date"] = date
                    cash = 0
                    portfolios.append(portfolio.copy())
                else:
                    ## update
                    updates = today[today["ticker"]==portfolio["ticker"]].iloc[0]
                    portfolio["adjclose"] = updates["adjclose"]
                    portfolio["date"] = date
                    if rec["ticker"] != portfolio["ticker"]:
                        cash = portfolio["adjclose"] * portfolio["quantity"]
                        portfolio["ticker"] = rec["ticker"]
                        portfolio["adjclose"] = rec["adjclose"]
                        portfolio["quantity"] = cash / rec["adjclose"]
                        portfolio["buy_price"] = rec["adjclose"]
                        portfolio["date"] = date
                        cash = 0
                        portfolios.append(portfolio.copy())
                    else:
                        portfolios.append(portfolio.copy())
            except Exception as e:
                print(str(e))
                continue
        states = pd.DataFrame(portfolios)
        states["pv"] = states["adjclose"] * states["quantity"]
        states["trade"] = states["ticker"] != states["ticker"].shift(-1)
        analysis.append({"factor":factor,"ascending":ascending,"pv":states.iloc[-1]["pv"]})

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:07<00:00,  1.00it/s]


In [9]:
pd.DataFrame(analysis).sort_values("pv",ascending=False)

Unnamed: 0,factor,ascending,pv
4,bollinger,True,557.500391
0,average_return,True,414.578743
3,coev,False,371.113022
1,average_return,False,269.247852
5,bollinger,False,257.157787
7,rsi,False,185.062191
15,atr,False,170.77164
11,ema_20,False,170.77164
12,stochastic,True,143.7155
2,coev,True,125.050342
