In [78]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# todo: change the paths

In [79]:
with open("data/resale_flat_transactions_df_grouped_dict.pkl", "rb") as f:
    resale_flat_transactions_df_grouped_dict = pickle.load(f)

with open("data/private_rental_df_grouped_dict.pkl", "rb") as f:
    private_rental_df_grouped_dict = pickle.load(f)

with open("data/private_transactions_df_grouped_dict.pkl", "rb") as f:
    private_transactions_df_grouped_dict = pickle.load(f)

with open("data/flat_rental_df_grouped_dict.pkl", "rb") as f:
    flat_rental_df_grouped_dict = pickle.load(f)

# map district name to district number
district_mapping_path = "../airflow/dags/data/districts.xlsx"

district_mapping = pd.read_excel(district_mapping_path)


def get_district_name(district_no):
    return district_mapping[district_mapping["Postal District"] == district_no][
        "General Location"
    ].values[0]

# Housing Prices

This notebook is one of the end products of the data pipeline. It contains the code to visualise and model housing prices.

In the context of Singapore, housing prices come in many forms. They are:
1. Resale flat prices
2. Flat rental prices
3. Private property prices
4. Private property rental prices

For each of these, we will be modelling the monthly median prices.


# Visualising Prices and Predictions

For each of the above, we will be visualising the prices and predictions. The predictions are made with the best models after backtesting. 

The backtesting is done in the `sarima.ipynb`, `sarimax.ipynb`, and `lstm_keras` notebook.

In [80]:
# seaborn
def plot_all_ts(
    all_district_ts, title
):
    fig = plt.figure(figsize=(30, 20))
    fig.tight_layout(pad=20)
    plot_no = 0
    
    if len(all_district_ts) % 4 != 0:
         num_rows = len(all_district_ts) // 4 + 1
    else:
        num_rows = len(all_district_ts) // 4

    for district_no, curr_district_df in all_district_ts.items():
        curr_district_df = curr_district_df['price']

        plt.subplot(num_rows, 4, plot_no + 1)
        sns.lineplot(x=curr_district_df.index, y=curr_district_df.values)
        # add points
        sns.scatterplot(x=curr_district_df.index, y=curr_district_df.values)
        # remove xlabel
        plt.xlabel("")
        # remove ylabel
        plt.ylabel("")
        # reduce label font size
        for label in plt.gca().get_xticklabels():
            label.set_fontsize(8)
        # reduce y label font size
        for label in plt.gca().get_yticklabels():
            label.set_fontsize(8)
        plt.title(
            f"({district_no}): " + get_district_name(district_no),
            fontsize=11,
            fontweight="bold",
        )

        plot_no += 1

    plt.suptitle(
        title,
        fontsize=20,
        fontweight="bold",
    )

    plt.show()

# plotly
def plot_all_ts_plotly(
    all_district_ts, title
):
    if len(all_district_ts) % 4 != 0:
       num_rows = len(all_district_ts) // 4 + 1 
    else: 
        num_rows = len(all_district_ts) // 4

    fig = make_subplots(
        rows=num_rows, cols=4,
        subplot_titles=[f"({district_no}): " + get_district_name(district_no) for district_no in all_district_ts.keys()],
    )

    curr_row = 1
    curr_col = 1

    for district_no, curr_district_df in all_district_ts.items():
        curr_district_df = curr_district_df['price']

        fig.add_trace(
            go.Scatter(
                x=curr_district_df.index,   
                y=curr_district_df.values,
                mode='lines+markers',
                name=f'({int(district_no)}): ' + get_district_name(district_no)
            ),
            row=curr_row,
            col=curr_col
        )

        if curr_col == 4:
            curr_row += 1
            curr_col = 1
        else:
            curr_col += 1

    fig.update_layout(
        title=title,
        height=1400,
        width=1500,
        showlegend=False
    )

    fig.update_annotations(
        font_size=11
    )

    fig.show()

## Resale Flat Prices

In [81]:
# plot_all_ts(resale_flat_transactions_df_grouped_dict, "Distrct Resale Flat Transactions and Median Prices, 2018-2023")
plot_all_ts_plotly(resale_flat_transactions_df_grouped_dict, "District Resale Flat Transactions and Median Prices, 2018-2023")


## Flat Rental Prices

In [83]:
plot_all_ts_plotly(flat_rental_df_grouped_dict, "District Private Rental Transactions and Median Prices, 2018-2023")

## Private Property Prices

In [84]:
plot_all_ts_plotly(private_transactions_df_grouped_dict, "District Private Rental Transactions and Median Prices, 2018-2023")

## Private Property Rental Prices

In [85]:
plot_all_ts_plotly(private_rental_df_grouped_dict, "District Private Rental Transactions and Median Prices, 2018-2023")