In [1]:
import pandas as pd
from processor.processor import Processor as processor
from database.adatabase import ADatabase
import matplotlib.pyplot as plt
from tqdm import tqdm
from xgboost import XGBRegressor
import warnings
warnings.simplefilter(action="ignore")
import numpy as np
from datetime import datetime

In [2]:
sp500 = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies",attrs={"id":"constituents"})[0].rename(columns={"Symbol":"ticker"})

In [3]:
def calculate_technical_indicators(price):
    # Average Return
    price["average_return"] = price["adjclose"].pct_change(5).rolling(100).mean()
    price["prev_return"] = price["adjclose"].pct_change(5)

    # Coefficient of Variation
    price["coev"] = price["adjclose"].rolling(100).std() / price["adjclose"].rolling(100).mean()

    # Bollinger Bands
    price["bollinger"] = ((price["adjclose"].rolling(100).mean() - 2 * price["adjclose"].rolling(100).std()) - price["adjclose"]) / price["adjclose"]

    # RSI (Relative Strength Index)
    delta = price["adjclose"].diff()
    gain = (delta.where(delta > 0, 0)).rolling(14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
    rs = gain / loss
    price["rsi"] = 100 - (100 / (1 + rs))

    # MACD (Moving Average Convergence Divergence)
    ema_12 = price["adjclose"].ewm(span=12, adjust=False).mean()
    ema_26 = price["adjclose"].ewm(span=26, adjust=False).mean()
    price["macd"] = ema_12 - ema_26
    price["signal_line"] = price["macd"].ewm(span=9, adjust=False).mean()

    # EMA (Exponential Moving Average)
    price["ema_20"] = price["adjclose"].ewm(span=20, adjust=False).mean()

    # ATR (Average True Range)
    high_low = price["adjhigh"] - price["adjlow"]
    high_close = np.abs(price["adjhigh"] - price["adjclose"].shift())
    low_close = np.abs(price["adjlow"] - price["adjclose"].shift())
    tr = high_low.combine(high_close, np.maximum).combine(low_close, np.maximum)
    price["atr"] = tr.rolling(14).mean()

    # Stochastic Oscillator
    low_14 = price["adjlow"].rolling(window=14).min()
    high_14 = price["adjhigh"].rolling(window=14).max()
    price["stochastic"] = (price["adjclose"] - low_14) / (high_14 - low_14) * 100

    return price

In [4]:
prices = []
market = ADatabase("market")
market.connect()
for ticker in tqdm(sp500["ticker"]):
    try:
        price = processor.column_date_processing(market.query("prices",{"ticker":ticker})).sort_values("date")
        price = calculate_technical_indicators(price)
        prices.append(price.dropna())
    except Exception as e:
        print(ticker, str(e))
        continue
market.disconnect()

100%|███████████████████████████████████████████████████████████████████████████████████████████| 503/503 [00:43<00:00, 11.58it/s]


In [5]:
prices = pd.concat(prices).sort_values("date")

In [10]:
factors = [
           "average_return"
            ,"prev_return"
           ,"coev"
           ,"bollinger","rsi","signal_line","ema_20","stochastic","atr"
          ]
analysis = []        

In [11]:
prices

Unnamed: 0,date,adjclose,adjlow,adjhigh,ticker,average_return,prev_return,coev,bollinger,rsi,macd,signal_line,ema_20,atr,stochastic
104,2022-12-14,126.09,124.835,128.4700,MMM,-0.001654,-0.002058,0.094035,-0.184765,43.882127,0.550489,0.831549,126.138817,3.185850,54.618938
104,2022-12-14,103.30,102.630,105.5200,DUK,-0.000715,0.037357,0.078453,-0.176049,68.979266,1.882281,1.582356,99.517546,1.796357,73.188406
104,2022-12-14,69.64,69.000,71.3400,DD,0.011275,-0.000861,0.110110,-0.327151,40.145322,1.646999,2.220677,69.092240,1.586300,32.972973
104,2022-12-14,86.33,85.953,88.0500,EMN,-0.002303,0.018643,0.107731,-0.228983,48.053786,1.344053,1.563438,85.280262,2.140329,47.812500
104,2022-12-14,161.36,160.730,164.5469,ETN,0.009335,0.026006,0.067121,-0.205032,40.817264,1.570552,2.333353,160.347610,3.243350,46.189164
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
502,2024-07-18,143.20,143.150,146.2000,DRI,-0.006664,0.023149,0.063067,-0.051031,33.781818,-1.824414,-1.826943,145.739008,3.122214,41.012021
502,2024-07-18,140.37,139.870,142.3800,DVA,0.006693,0.000856,0.036333,-0.095279,47.326733,-0.420558,-0.466506,139.516516,3.066786,56.577416
502,2024-07-18,737.00,730.095,750.5800,NOW,0.000456,-0.016205,0.042174,-0.073868,39.043966,3.518862,8.366288,752.896599,20.551786,9.035002
502,2024-07-18,884.18,880.990,916.8000,DECK,0.003005,-0.005019,0.083087,-0.132267,20.806216,-25.243317,-18.511493,933.425665,31.581550,21.205562


In [12]:
for factor in tqdm(factors):
    for ascending in [True,False]:
        default_portfolio = {"ticker":"","adjclose":0,"quantity":0,"buy_price":0}
        cash = 100
        portfolio = default_portfolio.copy()
        portfolios = []
        for date in [x for x in prices["date"].unique() if x > datetime(2024,1,1)]:
            try:
                today = prices[prices["date"]==date]
                rec = today.sort_values(factor,ascending=ascending).iloc[0]
                if portfolio["ticker"] == "":
                    portfolio["ticker"] = rec["ticker"]
                    portfolio["adjclose"] = rec["adjclose"]
                    portfolio["quantity"] = cash / rec["adjclose"]
                    portfolio["buy_price"] = rec["adjclose"]
                    portfolio["date"] = date
                    cash = 0
                    portfolios.append(portfolio.copy())
                else:
                    ## update
                    updates = today[today["ticker"]==portfolio["ticker"]].iloc[0]
                    portfolio["adjclose"] = updates["adjclose"]
                    portfolio["date"] = date
                    if rec["ticker"] != portfolio["ticker"]:
                        cash = portfolio["adjclose"] * portfolio["quantity"]
                        portfolio["ticker"] = rec["ticker"]
                        portfolio["adjclose"] = rec["adjclose"]
                        portfolio["quantity"] = cash / rec["adjclose"]
                        portfolio["buy_price"] = rec["adjclose"]
                        portfolio["date"] = date
                        cash = 0
                        portfolios.append(portfolio.copy())
                    else:
                        portfolios.append(portfolio.copy())
            except Exception as e:
                print(str(e))
                continue
        states = pd.DataFrame(portfolios)
        states["pv"] = states["adjclose"] * states["quantity"]
        states["trade"] = states["ticker"] != states["ticker"].shift(-1)
        analysis.append({"factor":factor,"ascending":ascending,"pv":states.iloc[-1]["pv"]})

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:16<00:00,  1.89s/it]


In [13]:
pd.DataFrame(analysis).sort_values("pv",ascending=False)

Unnamed: 0,factor,ascending,pv
3,prev_return,False,272.103507
2,prev_return,True,239.111613
7,bollinger,False,185.359595
1,average_return,False,174.299917
0,average_return,True,139.152177
6,bollinger,True,132.45933
9,rsi,False,132.048902
4,coev,True,130.869308
13,ema_20,False,121.410389
17,atr,False,121.410389
