In [1]:
from general_trader import GeneralTrader
import polars as pl
import polars_talib as pltalib
from trading_helpers import TradingData

In [2]:
traderData = TradingData(
    "/mnt/jonarne/dev/trading_data/",
    "/SP500/",
    "SP500_list.parquet",
)
# download_all_tickers. This function is only needed to call once,
# or when you decide to download new fresh ticker and sector data
#traderData.download_all_tickers()


# populate polars dataframe with open, close, high, low and volume data
ochl_data = traderData.load_ticker_parquet_files()
print(ochl_data.collect())


TradingData Initialized
shape: (4_300_704, 7)
┌─────────────────────┬───────────┬────────────┬───────────┬───────────┬─────────┬────────┐
│ date                ┆ close     ┆ high       ┆ low       ┆ open      ┆ volume  ┆ ticker │
│ ---                 ┆ ---       ┆ ---        ┆ ---       ┆ ---       ┆ ---     ┆ ---    │
│ datetime[ns]        ┆ f64       ┆ f64        ┆ f64       ┆ f64       ┆ i64     ┆ str    │
╞═════════════════════╪═══════════╪════════════╪═══════════╪═══════════╪═════════╪════════╡
│ 1994-10-18 00:00:00 ┆ 1.205047  ┆ 1.214462   ┆ 1.186218  ┆ 1.205047  ┆ 418166  ┆ O      │
│ 1994-10-19 00:00:00 ┆ 1.308607  ┆ 1.318021   ┆ 1.214463  ┆ 1.214463  ┆ 249950  ┆ O      │
│ 1994-10-20 00:00:00 ┆ 1.318019  ┆ 1.365091   ┆ 1.318019  ┆ 1.327434  ┆ 141590  ┆ O      │
│ 1994-10-21 00:00:00 ┆ 1.299191  ┆ 1.327434   ┆ 1.289776  ┆ 1.318019  ┆ 68731   ┆ O      │
│ 1994-10-24 00:00:00 ┆ 1.270948  ┆ 1.327435   ┆ 1.270948  ┆ 1.299192  ┆ 75749   ┆ O      │
│ …                   ┆ …         

In [4]:
from datetime import datetime

my_filter = (
    pl.col("date").is_between(
    datetime(2019, 5, 22), datetime(2023, 2, 27)
    )
    #& (
    #pl.col("ticker").is_in(["MSFT", "AAPL", "TSLA"])
    #)
)

    # Define a sample Polars expression

    # --- Test Cases ---

print("--- Testing Valid Instantiation ---")
try:
    trader = GeneralTrader(ochl_data.collect(), my_filter)
    print("Successfully created GeneralTrader instance.")
    print("DataFrame Schema:", trader.df_ohlc.schema)
except ValidationError as e:
    print("Validation Error (unexpected):", e)
trader.df_ohlc

--- Testing Valid Instantiation ---
Successfully created GeneralTrader instance.
DataFrame Schema: Schema([('date', Datetime(time_unit='ns', time_zone=None)), ('close', Float64), ('high', Float64), ('low', Float64), ('open', Float64), ('volume', Int64), ('ticker', String)])


date,close,high,low,open,volume,ticker
datetime[ns],f64,f64,f64,f64,i64,str
2019-05-22 00:00:00,51.031487,51.0388,50.372732,50.716745,1766990,"""O"""
2019-05-23 00:00:00,51.660919,51.675557,50.819178,51.009485,2458327,"""O"""
2019-05-24 00:00:00,51.624371,51.968383,51.536536,51.653647,1347379,"""O"""
2019-05-28 00:00:00,51.046116,52.129397,51.046116,51.800023,2237892,"""O"""
2019-05-29 00:00:00,50.358086,51.075398,50.219018,51.01684,1676381,"""O"""
…,…,…,…,…,…,…
2023-02-21 00:00:00,32.153538,32.757714,32.106334,32.106334,3539300,"""NRG"""
2023-02-22 00:00:00,31.294474,32.115776,31.209511,32.002493,4113600,"""NRG"""
2023-02-23 00:00:00,31.634319,31.889207,31.23783,31.247269,5426500,"""NRG"""
2023-02-24 00:00:00,31.719286,31.860891,31.133991,31.303916,3798700,"""NRG"""


In [None]:
#trader.df_ohlc.write_csv("ohlc_pr_ticker_explode.csv")

In [5]:
trader \
    .add_column("sma", pltalib.sma(timeperiod=200)) \
    .add_column("mean_B", pl.col("close").mean()) \
    .add_column("A_plus_B", pl.col("close") + pl.col("close"))

<general_trader.GeneralTrader at 0x7f6bc1f66f10>

In [6]:

print(trader.added_columns)

{'sma': <Expr ['col("close")./home/user/dev/py…'] at 0x7F6BC1653450>, 'mean_B': <Expr ['col("close").mean()'] at 0x7F6BC1698B90>, 'A_plus_B': <Expr ['[(col("close")) + (col("close"…'] at 0x7F6BC1683A90>}


In [7]:
trader.apply_added_columns()
trader.df_ohlc

ticker,date,close,high,low,open,volume,sma,mean_B,A_plus_B
str,datetime[ns],f64,f64,f64,f64,i64,f64,f64,f64
"""O""",2019-05-22 00:00:00,51.031487,51.0388,50.372732,50.716745,1766990,,53.638094,102.062973
"""O""",2019-05-23 00:00:00,51.660919,51.675557,50.819178,51.009485,2458327,,53.638094,103.321838
"""O""",2019-05-24 00:00:00,51.624371,51.968383,51.536536,51.653647,1347379,,53.638094,103.248741
"""O""",2019-05-28 00:00:00,51.046116,52.129397,51.046116,51.800023,2237892,,53.638094,102.092232
"""O""",2019-05-29 00:00:00,50.358086,51.075398,50.219018,51.01684,1676381,,53.638094,100.716171
…,…,…,…,…,…,…,…,…,…
"""NRG""",2023-02-21 00:00:00,32.153538,32.757714,32.106334,32.106334,3539300,36.297714,32.985105,64.307076
"""NRG""",2023-02-22 00:00:00,31.294474,32.115776,31.209511,32.002493,4113600,36.281204,32.985105,62.588947
"""NRG""",2023-02-23 00:00:00,31.634319,31.889207,31.23783,31.247269,5426500,36.249466,32.985105,63.268639
"""NRG""",2023-02-24 00:00:00,31.719286,31.860891,31.133991,31.303916,3798700,36.218016,32.985105,63.438572


In [8]:
trader.df_ohlc.filter(pltalib.sma(timeperiod=200)
			    .over("ticker") < pl.col("close"))

date,close,high,low,open,volume,ticker,sma,mean_B,A_plus_B
datetime[ns],f64,f64,f64,f64,i64,str,f64,f64,f64
2020-03-06 00:00:00,154.651367,156.125416,149.319875,155.646827,72821100,"""MSFT""",139.939248,221.429683,309.302734
2020-03-09 00:00:00,144.170242,150.994931,143.576796,144.533975,70419300,"""MSFT""",140.05483,221.429683,288.340485
2020-03-10 00:00:00,154.02919,154.13448,146.046323,151.387382,65354400,"""MSFT""",140.226771,221.429683,308.05838
2020-03-11 00:00:00,147.051392,150.947102,144.677578,150.401518,56371600,"""MSFT""",140.363539,221.429683,294.102783
2020-03-13 00:00:00,152.028656,154.976767,134.70372,141.183822,92727400,"""MSFT""",140.598773,221.429683,304.057312
…,…,…,…,…,…,…,…,…,…
2023-02-17 00:00:00,151.026947,151.472451,149.343923,150.828947,59144100,"""AAPL""",145.482443,116.364988,302.053894
2023-02-21 00:00:00,146.997559,149.789411,146.928265,148.700387,58867200,"""AAPL""",145.400566,116.364988,293.995117
2023-02-22 00:00:00,147.423264,148.452873,145.690736,147.383654,51011300,"""AAPL""",145.366331,116.364988,294.846527
2023-02-23 00:00:00,147.908371,148.838988,145.769948,148.591484,48394200,"""AAPL""",145.330876,116.364988,295.816742


In [9]:
TIMEPERIOD = 200
sma_expression = pltalib.sma(pl.col("close"), timeperiod=TIMEPERIOD).over("ticker")
#
actionable_day_condition = (
    (sma_expression.shift(1) > pl.col("close").shift(1).over("ticker")) &
    (sma_expression.shift(2) <= pl.col("close").shift(2).over("ticker"))
 )

actionable_dates_df = trader.df_ohlc.filter(
    actionable_day_condition
 ).select([
    "ticker",
    "date" # This is the date of Day D (the day *after* the crossover)
    # You might also want pl.col("open") of Day D for backtesting entry.
 ])

In [10]:
actionable_dates_df.filter(pl.col("ticker") == "MSFT")
# "MSFT"	2023-02-27

ticker,date
str,datetime[ns]
"""MSFT""",2020-03-13 00:00:00
"""MSFT""",2020-03-17 00:00:00
"""MSFT""",2020-03-26 00:00:00
"""MSFT""",2022-01-26 00:00:00
"""MSFT""",2022-02-14 00:00:00
…,…
"""MSFT""",2022-03-22 00:00:00
"""MSFT""",2022-03-24 00:00:00
"""MSFT""",2022-04-07 00:00:00
"""MSFT""",2023-02-23 00:00:00
