In [1]:
from database.market import Market
from database.strategy import Strategy
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None

In [2]:
market = Market()
strategy = Strategy()

In [3]:
market.connect()
prices = market.retrieve("prices")
market.disconnect()

In [4]:
prices["date"] = pd.to_datetime(prices["date"])
prices["year"] = [x.year for x in prices["date"]]
prices["quarter"] = [x.quarter for x in prices["date"]]
prices["week"] = [x.week for x in prices["date"]]

In [5]:
hl_sim = []
strategy.connect()
for ticker in tqdm(prices["ticker"].unique()):
    try:
        ticker_sim = strategy.retrieve_hlt_data(ticker)
        complete = ticker_sim.groupby(["year","week","ticker"]).mean().reset_index()
        hl_sim.append(complete)
    except:
        continue
strategy.disconnect()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [02:19<00:00,  3.59it/s]


In [6]:
sim = pd.concat(hl_sim)

In [7]:
final_sim = prices.merge(sim,on=["year","week","ticker"],how="left").dropna()

In [8]:
final_sim["weekly_delta"] = (final_sim["high_level_prediction"] - final_sim["adjClose"]) / final_sim["adjClose"]

In [41]:
final_sim

Unnamed: 0,_id,date,close,high,low,open,volume,adjClose,adjHigh,adjLow,...,divCash,splitFactor,ticker,year,quarter_x,week,quarter_y,high_level_prediction,high_level_score,weekly_delta
6294,6157ee1a20b66d72115d8ea8,2019-01-02 00:00:00+00:00,190.95,190.99,186.70,187.82,2475193,173.912352,173.948783,170.041561,...,0.0,1.0,MMM,2019,1,1,2.5,192.096431,0.889134,0.104559
6295,6157ee1a20b66d72115d8ea9,2019-01-03 00:00:00+00:00,183.76,188.28,182.89,188.28,3358241,167.363885,171.480584,166.571511,...,0.0,1.0,MMM,2019,1,1,2.5,192.096431,0.889134,0.147777
6296,6157ee1a20b66d72115d8eaa,2019-01-04 00:00:00+00:00,191.32,191.98,186.03,186.75,2995052,174.249338,174.850449,169.431342,...,0.0,1.0,MMM,2019,1,1,2.5,192.096431,0.889134,0.102423
6297,6157ee1a20b66d72115d8eab,2019-01-07 00:00:00+00:00,190.88,192.30,188.66,191.36,2162158,173.848597,175.141897,171.826679,...,0.0,1.0,MMM,2019,1,2,1.0,171.529013,0.918366,-0.013343
6298,6157ee1a20b66d72115d8eac,2019-01-08 00:00:00+00:00,191.68,194.11,189.58,193.00,2479787,174.577217,176.790398,172.664591,...,0.0,1.0,MMM,2019,1,2,1.0,171.529013,0.918366,-0.017460
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2916566,6157f22220b66d721189f6e8,2021-09-27 00:00:00+00:00,134.30,136.15,134.25,135.47,568008,134.300000,136.150000,134.250000,...,0.0,1.0,XYL,2021,3,39,3.0,148.299465,0.722099,0.104240
2916567,6157f22220b66d721189f6e9,2021-09-28 00:00:00+00:00,128.92,133.72,128.76,133.69,1030853,128.920000,133.720000,128.760000,...,0.0,1.0,XYL,2021,3,39,3.0,148.299465,0.722099,0.150322
2916568,6157f22220b66d721189f6ea,2021-09-29 00:00:00+00:00,127.78,129.72,127.32,129.39,1134015,127.780000,129.720000,127.320000,...,0.0,1.0,XYL,2021,3,39,3.0,148.299465,0.722099,0.160584
2916569,6157f22220b66d721189f6eb,2021-09-30 00:00:00+00:00,123.68,128.69,123.68,128.24,1427543,123.680000,128.690000,123.680000,...,0.0,1.0,XYL,2021,3,39,3.0,148.299465,0.722099,0.199058


In [57]:
trades = []
start = final_sim["date"].min()
end = final_sim["date"].max()
seats = 11
for delta in tqdm(range(0,15,5)):
    d = delta/100
    for score in range(60,100,10):
        date = start
        while date <= end:
            req = float(score/100)
            if date.weekday() > 4:
                date = date + timedelta(days=1)
            try:
                todays_sim = final_sim[(final_sim["date"]==date) 
                                       & (final_sim["weekly_delta"] > d) \
                                      & (final_sim["high_level_score"] >= req)].sort_values("weekly_delta",ascending=False)
                if todays_sim.index.size < 1:
                    date = date + timedelta(days=1)
                else:
                    for seat in range(min(seats,todays_sim.index.size)):
                        try:
                            trade = todays_sim.iloc[seat]
                            ticker = trade["ticker"]
                            buy_price = trade["adjClose"]
                            exit_price = buy_price * (1+d)
                            max_date = date + timedelta(days=7)
                            exits = final_sim[(final_sim["ticker"]==ticker) & 
                                                (final_sim["date"] > date) & \
                                              (final_sim["date"] <= max_date) & \
                                              (final_sim["adjClose"] >= exit_price)
                                            ]
                            if exits.index.size < 1:
                                exits = final_sim[(final_sim["ticker"]==ticker) & 
                                              (final_sim["year"] == date.year) & \
                                              (final_sim["adjClose"] >= buy_price)].sort_values("date",ascending=False)
                                if exits.index.size > 1:
                                    sell_trade = exits.iloc[0]
                                else:
                                    exits = final_sim[(final_sim["ticker"]==ticker) & 
                                              (final_sim["year"] == date.year)].sort_values("date",ascending=False)
                                    sell_trade = exits.iloc[exits.index.size -1]
                            else:
                                sell_trade = exits.iloc[0]
                            trade["sell_date"] = sell_trade["date"]
                            trade["sell_price"] = sell_trade["adjClose"]
                            trade["delta"] = (trade["sell_price"] - trade["adjClose"]) / trade["adjClose"]
                            trade["score"] = score
                            trade["d"] = d
                            trade["seat"] = seat
                            trades.append(trade)
                            date = sell_trade["date"] + timedelta(days=1)
                        except Exception as e:
                            continue
                    date = date + timedelta(days=1)
            except Exception as e:
                date = date + timedelta(days=1)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:29<00:00,  9.90s/it]


In [58]:
t = pd.DataFrame(trades)

In [59]:
t

Unnamed: 0,_id,date,close,high,low,open,volume,adjClose,adjHigh,adjLow,...,quarter_y,high_level_prediction,high_level_score,weekly_delta,sell_date,sell_price,delta,score,d,seat
1152236,6157efb220b66d72116f0afe,2019-01-02 00:00:00+00:00,7.90,8.02,7.4800,7.53,47494361,7.286326,7.397004,6.898951,...,2.5,40.653947,0.889134,4.579485,2019-01-04 00:00:00+00:00,7.452343,0.022785,60,0.0,0
986759,6157ef7720b66d72116c8499,2019-01-02 00:00:00+00:00,4.92,4.93,4.6100,4.63,756478,4.920000,4.930000,4.610000,...,4.0,21.171026,0.859901,3.303054,2019-12-31 00:00:00+00:00,26.130000,4.310976,60,0.0,1
859476,6157ef4620b66d72116a9366,2019-01-02 00:00:00+00:00,59.41,59.89,57.7900,57.92,2096660,55.675283,56.125108,54.157122,...,2.5,110.040675,0.889134,0.976473,2020-01-02 00:00:00+00:00,81.681393,0.467103,60,0.0,2
294678,6157ee8320b66d721161f528,2019-01-02 00:00:00+00:00,90.10,90.53,88.3100,88.31,496219,85.207227,85.613876,83.514431,...,2.8,150.484804,0.883287,0.766104,2020-01-06 00:00:00+00:00,128.655159,0.509909,60,0.0,3
1159075,6157efb720b66d72116f25b5,2019-01-02 00:00:00+00:00,38.50,38.67,37.2069,37.47,1605174,37.055462,37.219084,35.810880,...,2.5,60.678541,0.889134,0.637506,2020-01-08 00:00:00+00:00,65.363254,0.763930,60,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2647998,6157f1c120b66d721185ddd0,2020-01-08 00:00:00+00:00,310.99,314.82,310.9900,311.71,214140,310.990000,314.820000,310.990000,...,1.0,428.983996,0.930153,0.379414,2021-10-01 00:00:00+00:00,464.330000,0.493071,90,0.1,6
2314165,6157f14f20b66d721180c5c7,2020-01-08 00:00:00+00:00,382.42,384.85,370.7100,373.18,743094,382.420000,384.850000,370.710000,...,1.0,509.031470,0.930153,0.331080,2021-10-01 00:00:00+00:00,570.790000,0.492574,90,0.1,7
1152492,6157efb220b66d72116f0bfe,2020-01-08 00:00:00+00:00,9.25,9.30,9.1700,9.23,45994922,9.096004,9.145172,9.017336,...,1.0,11.471543,0.930153,0.261163,2021-10-01 00:00:00+00:00,14.160000,0.556727,90,0.1,8
67749,6157ee2f20b66d72115e7eb7,2020-01-08 00:00:00+00:00,52.49,52.95,52.3800,52.44,2854867,50.109279,50.548416,50.004268,...,1.0,62.617484,0.930153,0.249619,2021-10-01 00:00:00+00:00,52.940000,0.056491,90,0.1,9


In [60]:
a = []
seats = 10
for delta in tqdm(range(0,15,5)):
    d = delta/100
    for score in tqdm(range(70,100,10)):
        score_trades = t[(t["score"]==score) & 
                         (t["d"]==d)]
        cash = []
        for seat in range(seats):
            initial = float(100 / seats)
            seat_trades = score_trades[score_trades["seat"]==seat]
            for delta in seat_trades["delta"]:
                initial = initial * (1+delta)
                cash.append(initial)
            analysis = {}
            analysis["score"] = score
            analysis["trades"] = seat_trades.index.size
            if seat_trades.index.size > 0:
                analysis["winrate"] = seat_trades[score_trades["delta"] > 0].index.size / seat_trades.index.size
                seat_trades["hpr"] = (seat_trades["sell_date"] - seat_trades["date"])
                seat_trades["hpr"] =[x.days for x in seat_trades["hpr"]]
                analysis["hpr"] = seat_trades["hpr"].mean()
                analysis["seat"] = seat
                analysis["d"] = d
                analysis["score"] = score
                analysis["pv"] = initial
                a.append(analysis)

  0%|                                                                                                                                             | 0/3 [00:00<?, ?it/s]
  analysis["winrate"] = seat_trades[score_trades["delta"] > 0].index.size / seat_trades.index.size
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 61.84it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 62.49it/s][A
 67%|████████████████████████████████████████████████████████████████████████████████████████▋                                            | 2/3 [00:00<00:00, 19.70it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 62.49it/s][A
100%|████████████████████████████████████████████

In [61]:
final = pd.DataFrame(a)

In [62]:
final

Unnamed: 0,score,trades,winrate,hpr,seat,d,pv
0,70,10,1.0,3.0,0,0.0,11.205111
1,70,10,0.6,39.5,1,0.0,54.948734
2,70,10,0.8,76.2,2,0.0,26.775239
3,70,10,0.7,77.4,3,0.0,23.076155
4,70,10,0.8,76.1,4,0.0,23.227340
...,...,...,...,...,...,...,...
85,90,1,1.0,632.0,5,0.1,12.086295
86,90,1,1.0,632.0,6,0.1,14.930705
87,90,1,1.0,632.0,7,0.1,14.925736
88,90,1,1.0,632.0,8,0.1,15.567275


In [63]:
hl = final.pivot_table(index=["score","d"],columns="seat",values="pv")

In [64]:
report = final.merge(hl.reset_index(),on=["score","d"]).drop(["pv","seat"],axis=1).groupby(["score","d"]).mean()
report["pv"] = [sum([row[1][x] for x in range(seats)]) for row in report.iterrows()]

In [65]:
report.sort_values("pv",ascending=False).reset_index()

Unnamed: 0,score,d,trades,winrate,hpr,0,1,2,3,4,5,6,7,8,9,pv
0,70,0.05,7.0,0.871429,135.157143,13.235731,93.165239,26.091702,25.197426,30.119786,28.037686,39.351119,15.85259,31.383725,29.253414,331.688417
1,70,0.0,10.0,0.79,76.14,11.205111,54.948734,26.775239,23.076155,23.22734,26.248301,46.407215,14.544372,29.41248,30.896411,286.741357
2,70,0.1,2.0,1.0,381.75,11.704169,75.554112,22.464178,21.783929,22.435103,18.842058,22.434477,15.143601,34.189489,27.23348,271.784596
3,80,0.1,2.0,1.0,381.75,11.704169,75.554112,22.464178,21.783929,22.435103,18.842058,22.434477,15.143601,34.189489,27.23348,271.784596
4,80,0.05,2.0,1.0,344.3,11.234391,75.554112,22.464178,21.783929,22.435103,18.842058,22.434477,13.486739,26.80509,20.752066,255.792143
5,80,0.0,2.0,1.0,307.85,10.257802,53.375759,23.514276,21.285061,21.853518,19.485025,23.901149,13.410151,26.818788,19.271149,233.172678
6,90,0.1,1.3,1.0,498.55,13.348895,16.213365,19.262477,12.668091,12.08576,12.086295,14.930705,14.925736,15.567275,10.56491,141.65351
7,90,0.0,1.5,1.0,319.3,14.190249,14.12223,12.024603,18.189611,16.165797,10.402092,15.044482,11.809464,12.137444,11.107596,135.193567
8,90,0.05,1.4,1.0,362.8,11.066028,14.101337,15.031661,18.991249,10.665867,13.514071,13.863579,14.404691,12.533996,10.496917,134.669396


In [66]:
score = report.sort_values("pv",ascending=False).reset_index().iloc[0]["score"].item()
d = report.sort_values("pv",ascending=False).reset_index().iloc[0]["d"].item()
benchmark = t[(t["score"]==score) & (t["d"]==d)]

In [67]:
timelines =[]
for seat in tqdm(range(seats)):
    seat_trades = benchmark[benchmark["seat"]==seat]
    initial = float(100/seats)
    timeline = []
    for row in seat_trades.iterrows():
        initial = initial * (1+row[1]["delta"])
        timeline.append(initial)
    seat_trades["pv"] = timeline
    timelines.append(seat_trades)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 908.76it/s]


In [68]:
market.connect()
sp5 = market.retrieve("sp500")
market.disconnect()
sp5.rename(columns={"Symbol":"ticker"},inplace=True)
benchmark = benchmark.merge(sp5,on="ticker",how="left")

In [69]:
sp5.columns

Index(['_id', 'ticker', 'Security', 'SEC filings', 'GICS Sector',
       'GICS Sub-Industry', 'Headquarters Location', 'Date first added', 'CIK',
       'Founded'],
      dtype='object')

In [70]:
# industry check
benchmark.groupby(["year","GICS Sector"]).mean().sort_values("delta",ascending=False).reset_index()

Unnamed: 0,year,GICS Sector,close,high,low,open,volume,adjClose,adjHigh,adjLow,...,quarter_y,high_level_prediction,high_level_score,weekly_delta,sell_price,delta,score,d,seat,CIK
0,2019,Information Technology,4.92,4.93,4.61,4.63,756478.0,4.92,4.93,4.61,...,4.0,21.171026,0.859901,3.303054,26.13,4.310976,70.0,0.05,1.0,1463101.0
1,2019,Industrials,73.106667,73.675,71.2873,72.466667,1299603.0,70.538183,71.084718,68.784757,...,2.949057,108.17071,0.880382,0.558148,124.982566,0.821376,70.0,0.05,6.333333,873961.3
2,2020,Health Care,394.743333,398.52,390.15,396.753333,878979.7,394.743333,398.52,390.15,...,1.0,564.150399,0.930153,0.416366,596.94,0.543154,70.0,0.05,4.0,927524.0
3,2020,Financials,357.86,363.09,350.65,355.08,212674.0,354.552631,359.734295,347.409266,...,1.0,522.045627,0.930153,0.472407,542.888383,0.531193,70.0,0.05,2.0,1278021.0
4,2019,Financials,74.755,75.21,73.05,73.115,1296440.0,70.441255,70.869492,68.835776,...,2.65,130.26274,0.886211,0.871288,105.168276,0.488506,70.0,0.05,2.5,1330425.0
5,2019,Real Estate,14.58,14.67,14.32,14.5,4422705.0,12.74873,12.827426,12.521387,...,2.5,19.014808,0.889134,0.491506,18.944783,0.486013,70.0,0.05,10.0,879101.0
6,2020,Materials,18.56,18.81,18.24,18.56,5217832.0,18.238016,18.483679,17.923568,...,1.0,23.179609,0.930153,0.27095,25.806082,0.414961,70.0,0.05,10.0,1285785.0
7,2019,Utilities,56.75,57.08,55.3402,56.18,1801769.0,50.559757,50.85376,49.303736,...,2.5,77.952565,0.889134,0.541791,69.689568,0.37836,70.0,0.05,5.0,827052.0
8,2020,Information Technology,308.7,310.46,304.05,304.91,191371.0,308.7,310.46,304.05,...,1.0,429.765225,0.930153,0.392178,421.99,0.366991,70.0,0.05,5.0,860731.0
9,2019,Consumer Discretionary,35.435,35.635,34.535,34.715,24431710.0,35.128163,35.323502,34.244476,...,2.5,67.850259,0.889134,2.54444,51.699908,0.289474,70.0,0.05,4.0,604003.0


In [71]:
benchmark.groupby(["year","ticker"]).mean().sort_values("delta",ascending=False).reset_index().merge(sp5[["ticker","Security"]],on="ticker",how="left").head(20)

Unnamed: 0,year,ticker,close,high,low,open,volume,adjClose,adjHigh,adjLow,...,high_level_prediction,high_level_score,weekly_delta,sell_price,delta,score,d,seat,CIK,Security
0,2019,ENPH,4.92,4.93,4.61,4.63,756478.0,4.92,4.93,4.61,...,21.171026,0.859901,3.303054,26.13,4.310976,70.0,0.05,1.0,1463101.0,Enphase Energy
1,2019,CPRT,47.68,47.96,46.69,46.9,1369886.0,47.68,47.96,46.69,...,73.140398,0.883287,0.533985,96.26,1.018876,70.0,0.05,6.0,900075.0,Copart
2,2020,IDXX,277.31,279.25,275.02,277.15,368748.0,277.31,279.25,275.02,...,366.053202,0.930153,0.320014,489.21,0.764127,70.0,0.05,8.0,874716.0,Idexx Laboratories
3,2019,FBHS,38.5,38.67,37.2069,37.47,1605174.0,37.055462,37.219084,35.81088,...,60.678541,0.889134,0.637506,65.363254,0.76393,70.0,0.05,4.0,1519751.0,Fortune Brands Home & Security
4,2019,LHX,133.14,134.395,129.965,133.03,923750.0,126.879086,128.07507,123.853391,...,190.693191,0.868726,0.502952,213.324444,0.681321,70.0,0.05,9.0,202058.0,L3Harris Technologies
5,2020,MKTX,357.86,363.09,350.65,355.08,212674.0,354.552631,359.734295,347.409266,...,522.045627,0.930153,0.472407,542.888383,0.531193,70.0,0.05,2.0,1278021.0,MarketAxess
6,2019,KMX,62.97,63.25,61.59,61.9,1369068.0,62.97,63.25,61.59,...,95.046571,0.889134,0.509394,95.68,0.519454,70.0,0.05,8.0,1170010.0,CarMax
7,2019,AIZ,90.1,90.53,88.31,88.31,496219.0,85.207227,85.613876,83.514431,...,150.484804,0.883287,0.766104,128.655159,0.509909,70.0,0.05,3.0,1267238.0,Assurant
8,2019,KIM,14.58,14.67,14.32,14.5,4422705.0,12.74873,12.827426,12.521387,...,19.014808,0.889134,0.491506,18.944783,0.486013,70.0,0.05,10.0,879101.0,Kimco Realty
9,2019,DFS,59.41,59.89,57.79,57.92,2096660.0,55.675283,56.125108,54.157122,...,110.040675,0.889134,0.976473,81.681393,0.467103,70.0,0.05,2.0,1393612.0,Discover Financial Services
