In [126]:
import polars as pl
import numpy as np
import yfinance as yf
import argparse

In [127]:
schema = {
    "Open": pl.Float32,
    "High": pl.Float32,
    "Low": pl.Float32,
    "Close": pl.Float32,
    "Volume": pl.UInt64,
    "Dividends": pl.Float32,
    "Stock Splits": pl.Float32
}

df = pl.from_pandas(yf.Ticker("AAPL").history(period="5y"), schema_overrides=schema).lazy()

$AAPL: possibly delisted; no price data found  (period=5y)


In [128]:
columns = df.collect_schema().names()
df = df.rename({old: new for old, new in zip(columns, [x.lower() for x in columns])})

In [129]:
df = df.with_columns(
    pl.col("close").rolling_mean(window_size=20).alias("sma_20")
)

In [130]:
df.with_columns(
    pl.col("close").ewm_mean(span=20).alias("ema_20")
)

In [131]:
macd_short, macd_long, macd_signal = 12, 26, 9

def macd(df, macd_short, macd_long, macd_signal):
    df = df.with_columns(
        (pl.col("close").ewm_mean(span=macd_short, adjust=False) - pl.col("close")\
            .ewm_mean(span=macd_long, adjust=False)).alias("macd")
    )

    df = df.with_columns(
        pl.col("macd").ewm_mean(span=macd_signal, adjust=False).alias("signal_line")
    )

    df = df.with_columns(
        (pl.col("macd") - pl.col("signal_line")).alias("macd_hist")
    )

    return df

df = macd(df=df, macd_long=macd_long, macd_short=macd_short, macd_signal=macd_signal)

In [134]:
def rsi(df, win):
    df = df.with_columns(
        pl.col("close").diff(1).alias("returns")
    )

    df = df.with_columns(
        (100 / (1 + (pl.when(pl.col("returns") > 0)
        .then(pl.col(returns)).otherwise(0).rolling_mean(window_size=win) \
            / pl.when(pl.col("returns") < 0).
            then(pl.col("returns")).otherwise(0).rolling_mean(window_size=win)))).alias("rsi")
    )

    return df

df = rsi(df=df, win=20)

NameError: name 'returns' is not defined

In [123]:
def bbands(df, win):
    df = df.with_columns(
        (pl.col("close").rolling_mean(window_size=win) - 2 * pl.col("close").rolling_std(window_size=win)).alias("lower")
    )
    
    df = df.with_columns(
        (pl.col("close").rolling_mean(window_size=win) + 2 * pl.col("close").rolling_std(window_size=win)).alias("lower")
    )

    return df

df = bbands(df, win=20)

In [124]:
query = """
    SELECT AVG(returns) OVER() AS mean_returns
    FROM df
"""

pl.sql(query)

In [125]:
df.collect(engine="cpu")

ShapeError: unable to add a column of length 0 to a DataFrame of height 1256