In [8]:
import pandas as pd
import itertools
import pandas_gbq
import os
from bigquery_testing import get_latest_financial_data, get_unique_col, upload_table

Define constants and set env path for credentials

In [9]:
project_id = "graphical-reach-380414" 
dataset_id = "hdb" 
resale_table_id = "resale_transactions" 
financial_table_id = "financial_data"
combine_table_id = "combine_data"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/Users/suzifeng/Desktop/IS3107/Project/graphical-reach-380414-bbaf9019146a.json"

We want to create 7500 rows of input data, 5 years into the future, 3 cases of economic performance, all town types, all flat types, all storey ranges

1. Get the S_P, STI, 10Y_Treasury for last row in financial data
2. Predict 3 different cases for next 5 years
3. For each of the 15 rows of S_P, STI, 10Y, Year, generate all possible combinations of features

In [10]:
financials = get_latest_financial_data()
base_year = 2023
years = [base_year + i for i in range(1, 6)]
cases = ['bull', 'bear', 'base']
features = ['town', 'flat_type', 'storey_range']
values = [get_unique_col(feature) for feature in features] + [years] + [cases]

In [15]:
def predict_prices(initial, periods, col='S_P', case='bull'):
    growth = {'S_P': {'bull': 1.1, 'base': 1, 'bear': 0.9}, 'STI': {'bull': 1.05, 'base': 1, 'bear': 0.95}}
    growth_rate = growth[col][case] if col in growth else 1
    return initial * growth_rate ** periods

In [16]:
combinations = list(itertools.product(values[0], values[1], values[2], values[3], values[4]))
df = pd.DataFrame(combinations, columns=features+['year', 'case'])
for stat in financials:
    df[stat] = df.apply(lambda row: predict_prices(financials[stat], row['year']-base_year, stat, row['case'] ), axis=1)
df

Unnamed: 0,town,flat_type,storey_range,year,case,_10Y_Treasury,S_P,STI
0,YISHUN,EXECUTIVE,01 TO 03,2024,bull,3.916,4367.164893,3425.761377
1,YISHUN,EXECUTIVE,01 TO 03,2024,bear,3.916,3573.134912,3099.498389
2,YISHUN,EXECUTIVE,01 TO 03,2024,base,3.916,3970.149902,3262.629883
3,YISHUN,EXECUTIVE,01 TO 03,2025,bull,3.916,4803.881382,3597.049446
4,YISHUN,EXECUTIVE,01 TO 03,2025,bear,3.916,3215.821421,2944.523469
...,...,...,...,...,...,...,...,...
70870,LIM CHU KANG,MULTI-GENERATION,31 TO 35,2027,bear,3.916,2604.815351,2657.432431
70871,LIM CHU KANG,MULTI-GENERATION,31 TO 35,2027,base,3.916,3970.149902,3262.629883
70872,LIM CHU KANG,MULTI-GENERATION,31 TO 35,2028,bull,3.916,6393.966119,4164.034365
70873,LIM CHU KANG,MULTI-GENERATION,31 TO 35,2028,bear,3.916,2344.333816,2524.560809


In [13]:
upload_table(df)

In [14]:
# df.to_csv('sample_input.csv', index=False)