In [1]:
import polars as pl

import seaborn as sns
import matplotlib.pyplot as plt

sns.set()

In [2]:
df = pl.read_csv("ingested.csv")

df_filter = df.filter(
    (pl.col("year_of_sale").is_in([2022, 2023, 2024])),
).with_columns(
    (pl.col("price").ge(1000000).cast(pl.Int64).alias("ge_1M")),
)
display(df_filter.sample())
print(list(df_filter.columns))

address,lease_remaining,minimum_floor,maximum_floor,built_year,sqft,sqm,flat_type,road,town,year_of_sale,month_of_sale,months_ago,price,psf,band_name,price_is_geq_one_million,ge_1M
str,i64,i64,i64,i64,i64,i64,str,str,str,i64,i64,i64,i64,f64,str,i64,i64
"""110D PUNGGOL FIELD""",78,13,15,2003,1184,110,"""5 ROOM""","""PUNGGOL FIELD""","""PUNGGOL""",2024,2,10,625000,527.87,"""75 - 79""",0,0


['address', 'lease_remaining', 'minimum_floor', 'maximum_floor', 'built_year', 'sqft', 'sqm', 'flat_type', 'road', 'town', 'year_of_sale', 'month_of_sale', 'months_ago', 'price', 'psf', 'band_name', 'price_is_geq_one_million', 'ge_1M']


In [3]:
with pl.Config(tbl_rows = 20, tbl_cols = 20):
    display(
        df_filter
        .filter(
            pl.col("year_of_sale").eq(2024), 
            pl.col("ge_1M").eq(1)
        ).shape[0]
    )
    print(
        df_filter
        .filter(
            pl.col("year_of_sale").eq(2024), 
            pl.col("ge_1M").eq(1)
        ).group_by("year_of_sale", "month_of_sale")
        .agg(pl.col("address").len())
        .sort("month_of_sale")
        .transpose().to_numpy()
        )


1016

[[2024 2024 2024 2024 2024 2024 2024 2024 2024 2024 2024 2024]
 [   1    2    3    4    5    6    7    8    9   10   11   12]
 [  74   49   60   68   73   95  120  105  106  103   87   76]]


In [4]:
with pl.Config(tbl_rows = 20, tbl_cols = 20):
    display(
        df_filter
        .filter(
            pl.col("year_of_sale").eq(2024), 
        )
        .group_by("ge_1M")
        .agg(pl.col("address").len().alias("count_transactions"))
        .with_columns(
            (pl.col("count_transactions") / 
                 pl.col("count_transactions").sum() * 100)
            .alias("pct_transactions")
        )
    )


ge_1M,count_transactions,pct_transactions
i64,u32,f64
0,26410,96.295486
1,1016,3.704514


In [5]:
with pl.Config(tbl_rows = 20, tbl_cols = 20):
    display(
        df_filter
        .filter(
            pl.col("ge_1M").eq(1)
        )
        .group_by("year_of_sale", "ge_1M")
        .agg(pl.col("address").len().alias("count_transactions"))
        .with_columns(
            (pl.col("count_transactions") / 
                 pl.col("count_transactions").min() * 100)
            .alias("pct_transactions")
        )
        .sort("year_of_sale")
    )


year_of_sale,ge_1M,count_transactions,pct_transactions
i64,i64,u32,f64
2022,1,369,100.0
2023,1,469,127.100271
2024,1,1016,275.338753


In [14]:
with pl.Config(tbl_rows = 40, tbl_cols = 20):
    # from source
    df1 = pl.DataFrame(
        [
    {
        "id": 1,
        "Town": "BUKIT MERAH",
        "Flat_type": "Five-room",
        "Street": "BOON TIONG RD",
        "Storey_range": "34 to 36",
        "Floor_area": 112,
        "Flat_model": "Improved",
        "Resale_price": 1588000,
        "Remaining_lease": "90 years 8 months"
    },
    {
        "id": 2,
        "Town": "BUKIT MERAH",
        "Flat_type": "Five-room",
        "Street": "HENDERSON RD",
        "Storey_range": "46 to 48",
        "Floor_area": 113,
        "Flat_model": "Improved",
        "Resale_price": 1588000,
        "Remaining_lease": "94 years"
    },
    {
        "id": 3,
        "Town": "BUKIT MERAH",
        "Flat_type": "Five-room",
        "Street": "KIM TIAN RD",
        "Storey_range": "40 to 42",
        "Floor_area": 113,
        "Flat_model": "Improved",
        "Resale_price": 1580000,
        "Remaining_lease": "87 years 6 months"
    },
    {
        "id": 4,
        "Town": "TOA PAYOH",
        "Flat_type": "Five-room",
        "Street": "LOR 1A TOA PAYOH",
        "Storey_range": "40 to 42",
        "Floor_area": 117,
        "Flat_model": "DBSS",
        "Resale_price": 1568888,
        "Remaining_lease": "87 years 4 months"
    },
    {
        "id": 5,
        "Town": "BISHAN",
        "Flat_type": "Five-room",
        "Street": "BISHAN ST 24",
        "Storey_range": "37 to 39",
        "Floor_area": 120,
        "Flat_model": "DBSS",
        "Resale_price": 1568000,
        "Remaining_lease": "86 years 3 months"
    },
    {
        "id": 6,
        "Town": "KALLANG/WHAMPOA",
        "Flat_type": "Three-room",
        "Street": "JLN MA'MOR",
        "Storey_range": "01 to 03",
        "Floor_area": 367,
        "Flat_model": "Terrace",
        "Resale_price": 1568000,
        "Remaining_lease": "47 years"
    },
    {
        "id": 7,
        "Town": "QUEENSTOWN",
        "Flat_type": "Five-room",
        "Street": "DOVER CRES",
        "Storey_range": "37 to 39",
        "Floor_area": 124,
        "Flat_model": "Improved",
        "Resale_price": 1550000,
        "Remaining_lease": "86 years 5 months"
    },
    {
        "id": 8,
        "Town": "CENTRAL AREA",
        "Flat_type": "Five-room",
        "Street": "CANTONMENT RD",
        "Storey_range": "43 to 45",
        "Floor_area": 105,
        "Flat_model": "Type S2",
        "Resale_price": 1542880,
        "Remaining_lease": "85 years 3 months"
    },
    {
        "id": 9,
        "Town": "TOA PAYOH",
        "Flat_type": "Five-room",
        "Street": "LOR 1A TOA PAYOH",
        "Storey_range": "31 to 33",
        "Floor_area": 117,
        "Flat_model": "DBSS",
        "Resale_price": 1540000,
        "Remaining_lease": "87 years 3 months"
    },
    {
        "id": 10,
        "Town": "TOA PAYOH",
        "Flat_type": "Five-room",
        "Street": "LOR 1A TOA PAYOH",
        "Storey_range": "31 to 33",
        "Floor_area": 117,
        "Flat_model": "DBSS",
        "Resale_price": 1540000,
        "Remaining_lease": "86 years 11 months"
    },
    {
        "id": 11,
        "Town": "CENTRAL AREA",
        "Flat_type": "Five-room",
        "Street": "CANTONMENT RD",
        "Storey_range": "43 to 45",
        "Floor_area": 107,
        "Flat_model": "Type S2",
        "Resale_price": 1540000,
        "Remaining_lease": "85 years 4 months"
    )
    df2 = pl.DataFrame(
    
    )
    display(
        df_filter
        .filter(
            pl.col("price").ge(1500000)
        ).with_columns(
            pl.col("minimum_floor").alias("min_floor"),
            pl.col("maximum_floor").alias("max_floor"),
            (99 - (pl.col("year_of_sale") - pl.col("built_year"))).alias("lease_remaining_during_sale")
        )
        .sort(pl.col("price"), descending=True)
        .select(
            "town", 
            "flat_type", "road", "address",
            "min_floor", "max_floor", "price", 
            "lease_remaining_during_sale"
        )
        
    )

town,flat_type,road,address,min_floor,max_floor,price,lease_remaining_during_sale
str,str,str,str,i64,i64,i64,i64
"""BUKIT MERAH""","""5 ROOM""","""BOON TIONG ROAD""","""9B BOON TIONG ROAD""",34,36,1588000,91
"""BUKIT MERAH""","""5 ROOM""","""HENDERSON ROAD""","""96A HENDERSON ROAD""",46,48,1588000,94
"""BUKIT MERAH""","""5 ROOM""","""KIM TIAN ROAD""","""126A KIM TIAN ROAD""",40,42,1580000,88
"""TOA PAYOH""","""5 ROOM""","""LORONG 1A TOA PAYOH""","""139A LORONG 1A TOA PAYOH""",40,42,1568888,87
"""BISHAN""","""5 ROOM""","""BISHAN STREET 24""","""275A BISHAN STREET 24""",37,39,1568000,86
"""KALLANG/WHAMPOA""","""3 ROOM""","""JALAN MA'MOR""","""53 JALAN MA'MOR""",1,3,1568000,47
"""QUEENSTOWN""","""5 ROOM""","""DOVER CRESCENT""","""28A DOVER CRESCENT""",37,39,1550000,87
"""CENTRAL AREA""","""5 ROOM""","""CANTONMENT ROAD""","""1D CANTONMENT ROAD""",43,45,1542880,86
"""CENTRAL AREA""","""5 ROOM""","""CANTONMENT ROAD""","""1F CANTONMENT ROAD""",43,45,1540000,86
"""TOA PAYOH""","""5 ROOM""","""LORONG 1A TOA PAYOH""","""138C LORONG 1A TOA PAYOH""",31,33,1540000,87


In [None]:
df_million_2024 = df_filter.filter(
    pl.col("year_of_sale").eq(2024)
).group_by(
    pl.col("year_of_sale"), pl.col("price_is_geq_one_million")
).agg(
    pl.len().alias("count_transactions")
).sort(
    pl.col("price_is_geq_one_million"), descending=False
).with_columns(
    (pl.col("count_transactions") / pl.col("count_transactions").sum()).alias("pct_transactions")
)

print(df_million_2024)

In [None]:
df_by_100_000 = df_filter.with_columns(
    (pl.col("price") // 100000 * 100000).alias("price_round_down_100k")  
).filter(
    pl.col("price_is_geq_one_million").eq(1)
).group_by("price_round_down_100k").agg(
    pl.len().alias("count_transactions")
).with_columns(
  (pl.col("count_transactions") / pl.col("count_transactions").sum()).alias("pct_transactions")
).sort("price_round_down_100k")
print(df_by_100_000)

In [None]:
with pl.Config(set_tbl_rows=30):
    display(
        df.filter(
            (pl.col("year_of_sale").eq(2024)),
            (pl.col("price_is_geq_one_million").eq(1)),
        )
        .group_by("town")
        .agg(pl.col("address").len().alias("count_transactions"))
        .sort("count_transactions", descending=True)
    )


In [None]:
df.filter(
    (pl.col("year_of_sale").eq(2024)),
    (pl.col("price_is_geq_one_million").eq(1)),
    (pl.col("town").eq("SENGKANG"))
).sort("month_of_sale").select(
    "address", "sqft", "minimum_floor", "maximum_floor", "flat_type",
    "price", "year_of_sale", "month_of_sale"
)

In [None]:
df.filter(
    (pl.col("year_of_sale").eq(2024)),
    (pl.col("price_is_geq_one_million").eq(0)),
    (pl.col("town").eq("JURONG WEST"))
).sort("price", descending=True).select(
    "address", "sqft", "minimum_floor", "maximum_floor", "flat_type",
    "price", "year_of_sale", "month_of_sale"
)

In [None]:
df.filter(
    (pl.col("year_of_sale").eq(2024)),
    (pl.col("price_is_geq_one_million").eq(1)),
).sort("price", descending=True).select(
    "address", "sqft", "minimum_floor", "maximum_floor", "flat_type",
    "price", "year_of_sale", "month_of_sale", "psf", "band_name"
)

In [None]:
df.filter(
    (pl.col("year_of_sale").eq(2024)),
    (pl.col("price_is_geq_one_million").eq(1)),
).group_by("band_name").agg(
    pl.col("address").len().alias("count_transactions")
).with_columns(
    pl.col("count_transactions")/pl.col("count_transactions").sum().alias("pct_transactions")
).sort("band_name", descending=True)

In [None]:
df.filter(
    (pl.col("year_of_sale").eq(2024)),
).sort("psf", descending=True).select(
    "address", "sqft", "minimum_floor", "maximum_floor", "flat_type",
    "price", "year_of_sale", "month_of_sale", "psf", "band_name"
)

In [None]:
df_by_sale = df.group_by(
    (pl.col("year_of_sale"))
).agg(
    pl.col("address").len().alias("count_transactions")
).sort(
    "year_of_sale"
)
display(df_by_sale)

In [None]:
sns.barplot(
    x=df_by_sale.select("year_of_sale").to_numpy().flatten(), 
    y=df_by_sale.select("count_transactions").to_numpy().flatten()
)

In [None]:
df_twn = df.filter(
    pl.col("price_is_geq_one_million").eq(1),
    pl.col("year_of_sale").is_in([2022, 2023, 2024])
).group_by(
    "year_of_sale", "town"
).agg(
    pl.col("address").len().alias("count_transactions")
).sort(
    "town", "year_of_sale"
)

In [None]:
# ... existing code ...

plt.figure(figsize=(15, 8))  # Width: 15 inches, Height: 8 inches
sns.barplot(
    x=df_twn.select("town").to_numpy().flatten(), 
    y=df_twn.select("count_transactions").to_numpy().flatten(),
    hue=df_twn.select("year_of_sale").to_numpy().flatten(),
)
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for better readability