In [1]:
import time
import polars as pl
import polars_talib as plta
import pandas as pd
import talib.abstract as ta

In [2]:
p = pl.scan_parquet("us_market_cap2000.parquet").select(
    pl.col("Date"), pl.col("Ticker").alias("Symbol"), pl.selectors.float().name.to_lowercase()
)

In [3]:
spend_records = {}

In [4]:
start_t = time.time()
p.collect()
end_t = time.time()
pl_read_spend = end_t - start_t
spend_records["pl_read"] = pl_read_spend

In [5]:
start_t = time.time()
p.with_columns(
    plta.sma(timeperiod=5).over("Symbol").alias("sma5"),
).collect()
end_t = time.time()
pl_sma_spend_t = end_t - start_t
spend_records["pl_sma"] = pl_sma_spend_t

In [6]:
start_t = time.time()
df = (
    pd.read_parquet("us_market_cap2000.parquet")
    .set_index(["Ticker", "Date"])
    .rename(columns={c: c.lower() for c in ["Open", "High", "Low", "Close"]})
)
end_t = time.time()
pd_read_spend = end_t - start_t
spend_records["pd_read"] = pd_read_spend

In [7]:
start_t = time.time()
df["sma5"] = df.groupby("Ticker")["close"].transform(lambda x: ta.SMA(x, timeperiod=5))
end_t = time.time()
pd_sma_spend_t = end_t - start_t
spend_records["pd_sma"] = pd_sma_spend_t

In [8]:
start_t = time.time()
p.with_columns(
    plta.macd(fastperiod=10, slowperiod=20, signalperiod=5).over("Symbol").alias("macd"),
).with_columns(
    pl.col("macd").struct.field("macd"),
    pl.col("macd").struct.field("macdsignal"),
    pl.col("macd").struct.field("macdhist"),
).collect()
end_t = time.time()
pl_macd_spend_t = end_t - start_t
spend_records["pl_macd"] = pl_macd_spend_t

In [9]:
start_t = time.time()
g = df.groupby("Ticker")["close"]
df["macd"] = g.transform(lambda x: ta.MACD(x, fastperiod=10, slowperiod=20, signalperiod=5)[0])
df["macdsignal"] = g.transform(
    lambda x: ta.MACD(x, fastperiod=10, slowperiod=20, signalperiod=5)[1]
)
df["macdhist"] = g.transform(lambda x: ta.MACD(x, fastperiod=10, slowperiod=20, signalperiod=5)[2])
end_t = time.time()
pd_macd_spend_t = end_t - start_t
spend_records["pd_macd"] = pd_macd_spend_t

In [10]:
start_t = time.time()
p.with_columns(
    plta.stoch(
        pl.col("high"),
        pl.col("low"),
        pl.col("close"),
        fastk_period=14,
        slowk_period=7,
        slowd_period=7,
    )
    .over("Symbol")
    .alias("stoch"),
).with_columns(
    pl.col("stoch").struct.field("slowk"),
    pl.col("stoch").struct.field("slowd"),
).select(pl.exclude("stoch")).collect()
end_t = time.time()
pl_stoch_spend_t = end_t - start_t
spend_records["pl_stoch"] = pl_stoch_spend_t

In [11]:
start_t = time.time()
g = df.groupby("Ticker")
df["slowk"] = g.apply(
    lambda x: ta.STOCH(x, fastk_period=14, slowk_period=7, slowd_period=7)
).droplevel(0)["slowk"]
df["slowd"] = g.apply(
    lambda x: ta.STOCH(x, fastk_period=14, slowk_period=7, slowd_period=7)
).droplevel(0)["slowd"]
end_t = time.time()
pd_stoch_spend_t = end_t - start_t
spend_records["pd_stoch"] = pd_stoch_spend_t

In [12]:
start_t = time.time()
p.with_columns(
    plta.wclprice().over("Symbol").alias("wclprice"),
).collect()
end_t = time.time()
pl_wclprice_spend_t = end_t - start_t
spend_records["pl_wclprice"] = pl_wclprice_spend_t

In [13]:
start_t = time.time()
df["wclprice"] = df.groupby("Ticker").apply(lambda x: ta.WCLPRICE(x)).droplevel(0)
end_t = time.time()
pd_wclprice_spend_t = end_t - start_t
spend_records["pd_wclprice"] = pd_wclprice_spend_t

In [24]:
df_bench = pl.DataFrame(
    {
        "kind": [k.split("_")[0] for k in spend_records.keys()],
        "op": [k.split("_")[1] for k in spend_records.keys()],
        "time": [v for v in spend_records.values()],
    }
).with_columns(
    pl.when(pl.col("kind")=="pl").then(pl.lit("polars")).otherwise(pl.lit("pandas")).alias("stack")
)

In [25]:
df_bench

kind,op,time,stack
str,str,f64,str
"""pl""","""read""",0.427506,"""polars"""
"""pl""","""sma""",0.399857,"""polars"""
"""pd""","""read""",1.490565,"""pandas"""
"""pd""","""sma""",1.955835,"""pandas"""
"""pl""","""macd""",0.63583,"""polars"""
"""pd""","""macd""",5.529319,"""pandas"""
"""pl""","""stoch""",1.062676,"""polars"""
"""pd""","""stoch""",8.159397,"""pandas"""
"""pl""","""wclprice""",0.607319,"""polars"""
"""pd""","""wclprice""",3.896856,"""pandas"""


In [27]:
df_bench.plot.bar(x="op", y="time", by="stack")