In [1]:
import polars as pl
pl.Config.set_tbl_rows(50) 

polars.config.Config

In [2]:
df = pl.read_csv("ingested.csv")

In [3]:
display(df.sample())

address,lease_remaining,minimum_floor,maximum_floor,built_year,sqft,sqm,flat_type,road,town,year_of_sale,month_of_sale,months_ago,price,psf,band_name,price_is_geq_one_million
str,i64,i64,i64,i64,i64,i64,str,str,str,i64,i64,i64,i64,f64,str,i64
"""116 MARSILING RISE""",70,4,6,1995,1829,170,"""EXECUTIVE""","""MARSILING RISE""","""WOODLANDS""",2024,5,6,920000,50300.71,"""70 - 74""",0


In [4]:
df_filter = df.select(
    ("flat_type", "year_of_sale", "month_of_sale", "months_ago", "price_is_geq_one_million")
)

In [5]:
display(df_filter.sample())

flat_type,year_of_sale,month_of_sale,months_ago,price_is_geq_one_million
str,i64,i64,i64,i64
"""4 ROOM""",2023,1,22,0


In [6]:
dm = df_filter.group_by(["year_of_sale", "month_of_sale", "months_ago",]).agg(
    pl.col("flat_type").len().alias("no_of_flats_sold"),
    pl.col("price_is_geq_one_million").sum().alias("no_of_million_dollar_flats_sold"),    
)
dm = dm.with_columns([
    (pl.col("year_of_sale").cast(str) + "-" + pl.col("month_of_sale").cast(str).str.zfill(2)).alias("year_month")
])

In [7]:
display(
    dm.sort("months_ago", descending=False)
)

year_of_sale,month_of_sale,months_ago,no_of_flats_sold,no_of_million_dollar_flats_sold,year_month
i64,i64,i64,u32,i64,str
2024,11,0,188,10,"""2024-11"""
2024,10,1,2155,103,"""2024-10"""
2024,9,2,2209,106,"""2024-09"""
2024,8,3,2595,105,"""2024-08"""
2024,7,4,3036,120,"""2024-07"""
2024,6,5,2172,95,"""2024-06"""
2024,5,6,2491,73,"""2024-05"""
2024,4,7,2370,68,"""2024-04"""
2024,3,8,2046,60,"""2024-03"""
2024,2,9,2123,49,"""2024-02"""


In [8]:
months_ago_list = dm.select(
    pl.col("months_ago").unique()
).to_numpy().flatten().tolist()
start_month_list = dm.with_columns(
    pl.col("year_month").unique()
).sort(pl.col("year_month"), descending=True).select("year_month").to_numpy().flatten().tolist()
start_month_list
print(months_ago_list[-5:])
min_months_ago, max_months_ago = months_ago_list[0], months_ago_list[-1]
print(min_months_ago, max_months_ago)
min_start_month, max_start_month = start_month_list[0], start_month_list[-1]
print(min_start_month, max_start_month)

[30, 31, 32, 33, 34]
0 34
2024-11 2022-01


In [9]:
for mstart, mend, ymstart, ymend in zip(
    months_ago_list[:-24], months_ago_list[-(max_months_ago-24+1):],
    start_month_list[:-24], start_month_list[-(max_months_ago-24+1):]
):
    print([mstart, mend, ymstart, ymend])
    dm_filtered = dm.filter(
        (mstart <= pl.col("months_ago")) &
        (pl.col("months_ago") < mend)
    ).sort("months_ago")
    display(dm_filtered)
    flats_sold = pl.DataFrame(
        {
            "no_of_flats_sold" : dm_filtered.select(pl.col("no_of_flats_sold").sum()),
            "one" : 1
        }
    )
    million_dollar_flats_sold = pl.DataFrame(
        {
            "no_of_million_dollar_flats_sold" : dm_filtered.select(pl.col("no_of_million_dollar_flats_sold").sum()),
            "one" : 1
        }
    )
    metric_df = flats_sold.join(million_dollar_flats_sold, on="one")
    metric_df = metric_df.with_columns(
        (pl.col("no_of_million_dollar_flats_sold") / pl.col("no_of_flats_sold")).alias("pctage_of_million_dollar_flats_sold")
    ).select(["no_of_flats_sold", "no_of_million_dollar_flats_sold", "pctage_of_million_dollar_flats_sold"])
    print(metric_df)
    print("---")

[0, 24, '2024-11', '2022-11']


year_of_sale,month_of_sale,months_ago,no_of_flats_sold,no_of_million_dollar_flats_sold,year_month
i64,i64,i64,u32,i64,str
2024,11,0,188,10,"""2024-11"""
2024,10,1,2155,103,"""2024-10"""
2024,9,2,2209,106,"""2024-09"""
2024,8,3,2595,105,"""2024-08"""
2024,7,4,3036,120,"""2024-07"""
2024,6,5,2172,95,"""2024-06"""
2024,5,6,2491,73,"""2024-05"""
2024,4,7,2370,68,"""2024-04"""
2024,3,8,2046,60,"""2024-03"""
2024,2,9,2123,49,"""2024-02"""


shape: (1, 3)
┌──────────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ no_of_flats_sold ┆ no_of_million_dollar_flats_sol… ┆ pctage_of_million_dollar_flats… │
│ ---              ┆ ---                             ┆ ---                             │
│ u32              ┆ i64                             ┆ f64                             │
╞══════════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ 51995            ┆ 1360                            ┆ 0.026156                        │
└──────────────────┴─────────────────────────────────┴─────────────────────────────────┘
---
[1, 25, '2024-10', '2022-10']


year_of_sale,month_of_sale,months_ago,no_of_flats_sold,no_of_million_dollar_flats_sold,year_month
i64,i64,i64,u32,i64,str
2024,10,1,2155,103,"""2024-10"""
2024,9,2,2209,106,"""2024-09"""
2024,8,3,2595,105,"""2024-08"""
2024,7,4,3036,120,"""2024-07"""
2024,6,5,2172,95,"""2024-06"""
2024,5,6,2491,73,"""2024-05"""
2024,4,7,2370,68,"""2024-04"""
2024,3,8,2046,60,"""2024-03"""
2024,2,9,2123,49,"""2024-02"""
2024,1,10,2621,74,"""2024-01"""


shape: (1, 3)
┌──────────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ no_of_flats_sold ┆ no_of_million_dollar_flats_sol… ┆ pctage_of_million_dollar_flats… │
│ ---              ┆ ---                             ┆ ---                             │
│ u32              ┆ i64                             ┆ f64                             │
╞══════════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ 53937            ┆ 1376                            ┆ 0.025511                        │
└──────────────────┴─────────────────────────────────┴─────────────────────────────────┘
---
[2, 26, '2024-09', '2022-09']


year_of_sale,month_of_sale,months_ago,no_of_flats_sold,no_of_million_dollar_flats_sold,year_month
i64,i64,i64,u32,i64,str
2024,9,2,2209,106,"""2024-09"""
2024,8,3,2595,105,"""2024-08"""
2024,7,4,3036,120,"""2024-07"""
2024,6,5,2172,95,"""2024-06"""
2024,5,6,2491,73,"""2024-05"""
2024,4,7,2370,68,"""2024-04"""
2024,3,8,2046,60,"""2024-03"""
2024,2,9,2123,49,"""2024-02"""
2024,1,10,2621,74,"""2024-01"""
2023,12,11,2004,48,"""2023-12"""


shape: (1, 3)
┌──────────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ no_of_flats_sold ┆ no_of_million_dollar_flats_sol… ┆ pctage_of_million_dollar_flats… │
│ ---              ┆ ---                             ┆ ---                             │
│ u32              ┆ i64                             ┆ f64                             │
╞══════════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ 53740            ┆ 1311                            ┆ 0.024395                        │
└──────────────────┴─────────────────────────────────┴─────────────────────────────────┘
---
[3, 27, '2024-08', '2022-08']


year_of_sale,month_of_sale,months_ago,no_of_flats_sold,no_of_million_dollar_flats_sold,year_month
i64,i64,i64,u32,i64,str
2024,8,3,2595,105,"""2024-08"""
2024,7,4,3036,120,"""2024-07"""
2024,6,5,2172,95,"""2024-06"""
2024,5,6,2491,73,"""2024-05"""
2024,4,7,2370,68,"""2024-04"""
2024,3,8,2046,60,"""2024-03"""
2024,2,9,2123,49,"""2024-02"""
2024,1,10,2621,74,"""2024-01"""
2023,12,11,2004,48,"""2023-12"""
2023,11,12,2129,45,"""2023-11"""


shape: (1, 3)
┌──────────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ no_of_flats_sold ┆ no_of_million_dollar_flats_sol… ┆ pctage_of_million_dollar_flats… │
│ ---              ┆ ---                             ┆ ---                             │
│ u32              ┆ i64                             ┆ f64                             │
╞══════════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ 54109            ┆ 1250                            ┆ 0.023102                        │
└──────────────────┴─────────────────────────────────┴─────────────────────────────────┘
---
[4, 28, '2024-07', '2022-07']


year_of_sale,month_of_sale,months_ago,no_of_flats_sold,no_of_million_dollar_flats_sold,year_month
i64,i64,i64,u32,i64,str
2024,7,4,3036,120,"""2024-07"""
2024,6,5,2172,95,"""2024-06"""
2024,5,6,2491,73,"""2024-05"""
2024,4,7,2370,68,"""2024-04"""
2024,3,8,2046,60,"""2024-03"""
2024,2,9,2123,49,"""2024-02"""
2024,1,10,2621,74,"""2024-01"""
2023,12,11,2004,48,"""2023-12"""
2023,11,12,2129,45,"""2023-11"""
2023,10,13,2189,40,"""2023-10"""


shape: (1, 3)
┌──────────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ no_of_flats_sold ┆ no_of_million_dollar_flats_sol… ┆ pctage_of_million_dollar_flats… │
│ ---              ┆ ---                             ┆ ---                             │
│ u32              ┆ i64                             ┆ f64                             │
╞══════════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ 53823            ┆ 1178                            ┆ 0.021887                        │
└──────────────────┴─────────────────────────────────┴─────────────────────────────────┘
---
[5, 29, '2024-06', '2022-06']


year_of_sale,month_of_sale,months_ago,no_of_flats_sold,no_of_million_dollar_flats_sold,year_month
i64,i64,i64,u32,i64,str
2024,6,5,2172,95,"""2024-06"""
2024,5,6,2491,73,"""2024-05"""
2024,4,7,2370,68,"""2024-04"""
2024,3,8,2046,60,"""2024-03"""
2024,2,9,2123,49,"""2024-02"""
2024,1,10,2621,74,"""2024-01"""
2023,12,11,2004,48,"""2023-12"""
2023,11,12,2129,45,"""2023-11"""
2023,10,13,2189,40,"""2023-10"""
2023,9,14,1974,42,"""2023-09"""


shape: (1, 3)
┌──────────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ no_of_flats_sold ┆ no_of_million_dollar_flats_sol… ┆ pctage_of_million_dollar_flats… │
│ ---              ┆ ---                             ┆ ---                             │
│ u32              ┆ i64                             ┆ f64                             │
╞══════════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ 53148            ┆ 1091                            ┆ 0.020528                        │
└──────────────────┴─────────────────────────────────┴─────────────────────────────────┘
---
[6, 30, '2024-05', '2022-05']


year_of_sale,month_of_sale,months_ago,no_of_flats_sold,no_of_million_dollar_flats_sold,year_month
i64,i64,i64,u32,i64,str
2024,5,6,2491,73,"""2024-05"""
2024,4,7,2370,68,"""2024-04"""
2024,3,8,2046,60,"""2024-03"""
2024,2,9,2123,49,"""2024-02"""
2024,1,10,2621,74,"""2024-01"""
2023,12,11,2004,48,"""2023-12"""
2023,11,12,2129,45,"""2023-11"""
2023,10,13,2189,40,"""2023-10"""
2023,9,14,1974,42,"""2023-09"""
2023,8,15,2467,54,"""2023-08"""


shape: (1, 3)
┌──────────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ no_of_flats_sold ┆ no_of_million_dollar_flats_sol… ┆ pctage_of_million_dollar_flats… │
│ ---              ┆ ---                             ┆ ---                             │
│ u32              ┆ i64                             ┆ f64                             │
╞══════════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ 53111            ┆ 1027                            ┆ 0.019337                        │
└──────────────────┴─────────────────────────────────┴─────────────────────────────────┘
---
[7, 31, '2024-04', '2022-04']


year_of_sale,month_of_sale,months_ago,no_of_flats_sold,no_of_million_dollar_flats_sold,year_month
i64,i64,i64,u32,i64,str
2024,4,7,2370,68,"""2024-04"""
2024,3,8,2046,60,"""2024-03"""
2024,2,9,2123,49,"""2024-02"""
2024,1,10,2621,74,"""2024-01"""
2023,12,11,2004,48,"""2023-12"""
2023,11,12,2129,45,"""2023-11"""
2023,10,13,2189,40,"""2023-10"""
2023,9,14,1974,42,"""2023-09"""
2023,8,15,2467,54,"""2023-08"""
2023,7,16,2052,32,"""2023-07"""


shape: (1, 3)
┌──────────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ no_of_flats_sold ┆ no_of_million_dollar_flats_sol… ┆ pctage_of_million_dollar_flats… │
│ ---              ┆ ---                             ┆ ---                             │
│ u32              ┆ i64                             ┆ f64                             │
╞══════════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ 52774            ┆ 984                             ┆ 0.018646                        │
└──────────────────┴─────────────────────────────────┴─────────────────────────────────┘
---
[8, 32, '2024-03', '2022-03']


year_of_sale,month_of_sale,months_ago,no_of_flats_sold,no_of_million_dollar_flats_sold,year_month
i64,i64,i64,u32,i64,str
2024,3,8,2046,60,"""2024-03"""
2024,2,9,2123,49,"""2024-02"""
2024,1,10,2621,74,"""2024-01"""
2023,12,11,2004,48,"""2023-12"""
2023,11,12,2129,45,"""2023-11"""
2023,10,13,2189,40,"""2023-10"""
2023,9,14,1974,42,"""2023-09"""
2023,8,15,2467,54,"""2023-08"""
2023,7,16,2052,32,"""2023-07"""
2023,6,17,1853,34,"""2023-06"""


shape: (1, 3)
┌──────────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ no_of_flats_sold ┆ no_of_million_dollar_flats_sol… ┆ pctage_of_million_dollar_flats… │
│ ---              ┆ ---                             ┆ ---                             │
│ u32              ┆ i64                             ┆ f64                             │
╞══════════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ 52666            ┆ 938                             ┆ 0.01781                         │
└──────────────────┴─────────────────────────────────┴─────────────────────────────────┘
---
[9, 33, '2024-02', '2022-02']


year_of_sale,month_of_sale,months_ago,no_of_flats_sold,no_of_million_dollar_flats_sold,year_month
i64,i64,i64,u32,i64,str
2024,2,9,2123,49,"""2024-02"""
2024,1,10,2621,74,"""2024-01"""
2023,12,11,2004,48,"""2023-12"""
2023,11,12,2129,45,"""2023-11"""
2023,10,13,2189,40,"""2023-10"""
2023,9,14,1974,42,"""2023-09"""
2023,8,15,2467,54,"""2023-08"""
2023,7,16,2052,32,"""2023-07"""
2023,6,17,1853,34,"""2023-06"""
2023,5,18,2245,34,"""2023-05"""


shape: (1, 3)
┌──────────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ no_of_flats_sold ┆ no_of_million_dollar_flats_sol… ┆ pctage_of_million_dollar_flats… │
│ ---              ┆ ---                             ┆ ---                             │
│ u32              ┆ i64                             ┆ f64                             │
╞══════════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ 52882            ┆ 905                             ┆ 0.017114                        │
└──────────────────┴─────────────────────────────────┴─────────────────────────────────┘
---
[10, 34, '2024-01', '2022-01']


year_of_sale,month_of_sale,months_ago,no_of_flats_sold,no_of_million_dollar_flats_sold,year_month
i64,i64,i64,u32,i64,str
2024,1,10,2621,74,"""2024-01"""
2023,12,11,2004,48,"""2023-12"""
2023,11,12,2129,45,"""2023-11"""
2023,10,13,2189,40,"""2023-10"""
2023,9,14,1974,42,"""2023-09"""
2023,8,15,2467,54,"""2023-08"""
2023,7,16,2052,32,"""2023-07"""
2023,6,17,1853,34,"""2023-06"""
2023,5,18,2245,34,"""2023-05"""
2023,4,19,2177,37,"""2023-04"""


shape: (1, 3)
┌──────────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ no_of_flats_sold ┆ no_of_million_dollar_flats_sol… ┆ pctage_of_million_dollar_flats… │
│ ---              ┆ ---                             ┆ ---                             │
│ u32              ┆ i64                             ┆ f64                             │
╞══════════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ 52654            ┆ 885                             ┆ 0.016808                        │
└──────────────────┴─────────────────────────────────┴─────────────────────────────────┘
---
