In [8]:
import polars as pl
import datetime as dt

In [9]:
from quantbullet.utils.data import generate_fake_loan_prices

In [10]:
df = generate_fake_loan_prices(
    start_date=dt.date(2023, 1, 1),
    end_date=dt.date(2023, 12, 31),
)

# convert to polars DataFrame
df = pl.from_pandas(df)

## Windows

In [11]:
df.with_columns(
    last_price = pl.col("Price").shift(1).over("LoanID", order_by="Date")
).filter(
    pl.col("LoanID") == "LoanID0003"
)


Date,LoanID,Price,Balance,last_price
datetime[ns],str,f64,i64,f64
2023-01-24 00:00:00,"""LoanID0003""",96.22,3000,
2023-01-25 00:00:00,"""LoanID0003""",99.53,3000,96.22
2023-01-26 00:00:00,"""LoanID0003""",94.91,3000,99.53
2023-01-27 00:00:00,"""LoanID0003""",96.45,3000,94.91
2023-01-28 00:00:00,"""LoanID0003""",90.74,3000,96.45
2023-01-29 00:00:00,"""LoanID0003""",104.43,3000,90.74
2023-01-30 00:00:00,"""LoanID0003""",99.89,3000,104.43


In [12]:
df.with_columns(
    ma_2 = pl.col("Price").shift(1).rolling_mean(2).over("LoanID", order_by="Date")
).filter(
    pl.col("LoanID") == "LoanID0003"
)


Date,LoanID,Price,Balance,ma_2
datetime[ns],str,f64,i64,f64
2023-01-24 00:00:00,"""LoanID0003""",96.22,3000,
2023-01-25 00:00:00,"""LoanID0003""",99.53,3000,
2023-01-26 00:00:00,"""LoanID0003""",94.91,3000,97.875
2023-01-27 00:00:00,"""LoanID0003""",96.45,3000,97.22
2023-01-28 00:00:00,"""LoanID0003""",90.74,3000,95.68
2023-01-29 00:00:00,"""LoanID0003""",104.43,3000,93.595
2023-01-30 00:00:00,"""LoanID0003""",99.89,3000,97.585


## Aggregations

In [22]:
df.group_by("Date").agg(
    pl.col("Price").mean().alias("avg_price"),
).sort("Date", descending=True).head(5)

Date,avg_price
datetime[ns],f64
2023-12-31 00:00:00,97.7867
2023-12-30 00:00:00,97.6833
2023-12-29 00:00:00,97.7468
2023-12-28 00:00:00,97.3889
2023-12-27 00:00:00,96.6516


In [23]:
df.group_by( "LoanID" ).agg( 
    mean = pl.col("Price").mean(),
    max = pl.col("Price").max(),
    min = pl.col("Price").min(),
    std = pl.col("Price").std(),
    median = pl.col("Price").median(),
 ).head(5)

LoanID,mean,max,min,std,median
str,f64,f64,f64,f64,f64
"""LoanID8041""",99.315,103.92,94.33,4.116669,99.505
"""LoanID0385""",95.325556,101.92,91.05,4.295812,93.0
"""LoanID5940""",95.903043,101.79,90.11,3.557654,95.96
"""LoanID7246""",99.92,101.35,98.49,2.022325,99.92
"""LoanID6440""",102.84,102.84,102.84,,102.84


## Clip

In [31]:
df.filter(
    pl.col("LoanID") == "LoanID0003"
).with_columns(
    pl.col( "Price" ).clip(
        lower_bound= 92,
        upper_bound= 95).alias("Price_Clipped")
)

Date,LoanID,Price,Balance,Price_Clipped
datetime[ns],str,f64,i64,f64
2023-01-24 00:00:00,"""LoanID0003""",96.22,3000,95.0
2023-01-25 00:00:00,"""LoanID0003""",99.53,3000,95.0
2023-01-26 00:00:00,"""LoanID0003""",94.91,3000,94.91
2023-01-27 00:00:00,"""LoanID0003""",96.45,3000,95.0
2023-01-28 00:00:00,"""LoanID0003""",90.74,3000,92.0
2023-01-29 00:00:00,"""LoanID0003""",104.43,3000,95.0
2023-01-30 00:00:00,"""LoanID0003""",99.89,3000,95.0
