In [194]:
import pandas as pd
import numpy as np

import aiohttp
import asyncio
import nest_asyncio
nest_asyncio.apply()

from google.cloud import bigquery

In [195]:
transactions = pd.read_csv('transactions2_data.csv', index_col=0).reset_index(drop = True)
buildings = pd.read_csv('buildings_data.csv', index_col=0).reset_index(drop =True)
schools = pd.read_csv('schools.csv')
medianrent = pd.read_csv('rent.csv')

In [196]:
def make_title(df):
    string_columns = df.select_dtypes(include=['object'])
    df[string_columns.columns] = string_columns.map(str.title)
    return df.reset_index(drop = True)

In [197]:
transactions.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,resale_price
0,2017-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,10 TO 12,44.0,Improved,1979,61 years 04 months,232000.0
1,2017-01,ANG MO KIO,3 ROOM,108,ANG MO KIO AVE 4,01 TO 03,67.0,New Generation,1978,60 years 07 months,250000.0
2,2017-01,ANG MO KIO,3 ROOM,602,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,62 years 05 months,262000.0
3,2017-01,ANG MO KIO,3 ROOM,465,ANG MO KIO AVE 10,04 TO 06,68.0,New Generation,1980,62 years 01 month,265000.0
4,2017-01,ANG MO KIO,3 ROOM,601,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,62 years 05 months,265000.0


In [198]:
schools.head()

Unnamed: 0,school_name,url_address,address,postal_code,telephone_no,telephone_no_2,fax_no,fax_no_2,email_address,mrt_desc,...,nature_code,session_code,mainlevel_code,sap_ind,autonomous_ind,gifted_ind,ip_ind,mothertongue1_code,mothertongue2_code,mothertongue3_code
0,ADMIRALTY PRIMARY SCHOOL,https://admiraltypri.moe.edu.sg/,11 WOODLANDS CIRCLE,738907,63620598,na,63627512,na,ADMIRALTY_PS@MOE.EDU.SG,Admiralty Station,...,CO-ED SCHOOL,FULL DAY,PRIMARY,No,No,No,No,Chinese,Malay,Tamil
1,ADMIRALTY SECONDARY SCHOOL,http://www.admiraltysec.moe.edu.sg,31 WOODLANDS CRESCENT,737916,63651733,63654596,63652774,na,Admiralty_SS@moe.edu.sg,ADMIRALTY MRT,...,CO-ED SCHOOL,SINGLE SESSION,SECONDARY,No,No,No,No,Chinese,Malay,Tamil
2,AHMAD IBRAHIM PRIMARY SCHOOL,http://www.ahmadibrahimpri.moe.edu.sg,10 YISHUN STREET 11,768643,67592906,na,67592927,na,aips@moe.edu.sg,Yishun,...,CO-ED SCHOOL,SINGLE SESSION,PRIMARY,No,No,No,No,Chinese,Malay,Tamil
3,AHMAD IBRAHIM SECONDARY SCHOOL,http://www.ahmadibrahimsec.moe.edu.sg,751 YISHUN AVENUE 7,768928,67585384,na,67557778,na,aiss@moe.edu.sg,"CANBERRA MRT, YISHUN MRT",...,CO-ED SCHOOL,SINGLE SESSION,SECONDARY,No,No,No,No,Chinese,Malay,Tamil
4,AI TONG SCHOOL,http://www.aitong.moe.edu.sg,100 Bright Hill Drive,579646,64547672,na,64532726,na,AITONG_SCH@MOE.EDU.SG,Bishan MRT,...,CO-ED SCHOOL,SINGLE SESSION,PRIMARY,Yes,No,No,No,Chinese,na,na


In [199]:
medianrent

Unnamed: 0,quarter,town,flat_type,median_rent
0,2005-Q2,ANG MO KIO,1-RM,na
1,2005-Q2,ANG MO KIO,2-RM,na
2,2005-Q2,ANG MO KIO,3-RM,800
3,2005-Q2,ANG MO KIO,4-RM,950
4,2005-Q2,ANG MO KIO,5-RM,-
...,...,...,...,...
11272,2023-Q1,YISHUN,2-RM,-
11273,2023-Q1,YISHUN,3-RM,2500
11274,2023-Q1,YISHUN,4-RM,3000
11275,2023-Q1,YISHUN,5-RM,3200


In [200]:
medianrent.head()

Unnamed: 0,quarter,town,flat_type,median_rent
0,2005-Q2,ANG MO KIO,1-RM,na
1,2005-Q2,ANG MO KIO,2-RM,na
2,2005-Q2,ANG MO KIO,3-RM,800
3,2005-Q2,ANG MO KIO,4-RM,950
4,2005-Q2,ANG MO KIO,5-RM,-


# Flat Type

In [201]:
medianrent['flat_type'].unique()

array(['1-RM', '2-RM', '3-RM', '4-RM', '5-RM', 'EXEC'], dtype=object)

In [202]:
transactions['flat_type'].unique()

array(['2 ROOM', '3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', '1 ROOM',
       'MULTI-GENERATION'], dtype=object)

In [203]:
def load_flat_type(transactions):
    flat_type = pd.DataFrame(transactions['flat_type'].unique(), columns = ['flat_type'])
    return flat_type

load_flat_type(transactions)

Unnamed: 0,flat_type
0,2 ROOM
1,3 ROOM
2,4 ROOM
3,5 ROOM
4,EXECUTIVE
5,1 ROOM
6,MULTI-GENERATION


In [204]:
def onemap_api(data):
    async def fetch(session, blk_no, street):
        search_val = f"{blk_no} {street}"
        url = f"https://www.onemap.gov.sg/api/common/elastic/search?searchVal={search_val}&returnGeom=Y&getAddrDetails=Y&pageNum=1"

        async with session.get(url) as response:
            try:
                query = await response.json()
                query = query['results'][0]  
                postal = query['POSTAL']
                return postal
            except:
                print(f'API Query failed for {blk_no} , {street}')
                return None
            
    async def main():
        tasks = []
        async with aiohttp.ClientSession() as session:
            for index, row in data.iterrows():
                task = asyncio.create_task(fetch(session, row['blk_no'], row['street']))
                tasks.append(task)
            results = await asyncio.gather(*tasks)
        return results

    return asyncio.run(main())

# Buildings

In [205]:
buildings

Unnamed: 0,blk_no,street,max_floor_lvl,year_completed,residential,commercial,market_hawker,miscellaneous,multistorey_carpark,precinct_pavilion,...,3room_sold,4room_sold,5room_sold,exec_sold,multigen_sold,studio_apartment_sold,1room_rental,2room_rental,3room_rental,other_room_rental
0,1,BEACH RD,16,1970,Y,Y,N,N,N,N,...,138,1,2,0,0,0,0,0,0,0
1,1,BEDOK STH AVE 1,14,1975,Y,N,N,Y,N,N,...,204,0,2,0,0,0,0,0,0,0
2,1,CANTONMENT RD,2,2010,N,Y,N,N,N,N,...,0,0,0,0,0,0,0,0,0,0
3,1,CHAI CHEE RD,15,1982,Y,N,N,N,N,N,...,0,10,92,0,0,0,0,0,0,0
4,1,CHANGI VILLAGE RD,4,1975,Y,Y,N,N,N,N,...,54,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12821,998A,BUANGKOK CRES,18,2018,Y,N,N,N,N,N,...,33,50,0,0,0,0,0,0,0,0
12822,998B,BUANGKOK CRES,17,2018,Y,N,N,N,N,N,...,31,47,0,0,0,0,0,0,0,0
12823,999,BUANGKOK CRES,2,2018,N,N,N,Y,N,Y,...,0,0,0,0,0,0,0,0,0,0
12824,999A,BUANGKOK CRES,18,2018,Y,N,N,N,N,N,...,0,0,0,0,0,0,136,170,0,0


In [206]:
def transform_buildings(buildings):
    #retained_cols = ['blk_no', 'street', 'max_floor_lvl', 'year_completed', 'multistorey_carpark', 'precinct_pavilion', 'market_hawker']
    #buildings = buildings[retained_cols]

    # Run OneMap API to get postal code
    buildings['postal_code'] = onemap_api(buildings)
    return buildings

buildings = transform_buildings(buildings)

API Query failed for 141A , SERANGOON NTH AVE 2
API Query failed for 274D , COMPASSVALE BOW
API Query failed for 346 , YISHUN AVE 11
API Query failed for 4 , WOODLANDS ST 12
API Query failed for 5 , BANDA ST
API Query failed for 64 , NEW UPP CHANGI RD
API Query failed for 641 , ANG MO KIO AVE 4
API Query failed for 86 , MARINE PARADE CTRL


In [207]:
def transform_flats_sold(buildings):
    # Extracting required columns
    new_dataframe = pd.DataFrame()
    new_dataframe['postal_code'] = buildings['postal_code']
    
    # Extracting flat types and their respective sold counts
    flat_types = ['1room_sold', '2room_sold', '3room_sold', '4room_sold', '5room_sold', 'exec_sold', 'multigen_sold']
    renamed_flat_types = {'1room_sold': '1 ROOM', '2room_sold': '2 ROOM', '3room_sold': '3 ROOM',
                          '4room_sold': '4 ROOM', '5room_sold' : '5 ROOM', 'exec_sold' : 'EXECUTIVE', 
                          'multigen_sold' : 'MULTI-GENERATION'}

    # Creating new columns for each flat type and their sold counts
    for flat_type, renamed_flat_type in renamed_flat_types.items():
        new_dataframe[renamed_flat_type] = buildings[flat_type]
    
    # Reshaping the DataFrame to have flat_type as a column
    new_dataframe = pd.melt(new_dataframe, id_vars=['postal_code'], value_vars=list(renamed_flat_types.values()),
                            var_name='flat_type', value_name='flats_sold')
    
    return new_dataframe

flats_sold = transform_flats_sold(buildings)


In [208]:
flats_sold

Unnamed: 0,postal_code,flat_type,flats_sold
0,190001,1 ROOM,0
1,460001,1 ROOM,0
2,080001,1 ROOM,0
3,461001,1 ROOM,0
4,500001,1 ROOM,0
...,...,...,...
89777,531998,MULTI-GENERATION,0
89778,532998,MULTI-GENERATION,0
89779,530999,MULTI-GENERATION,0
89780,531999,MULTI-GENERATION,0


# Town

In [231]:
town_mapping = {
        'AMK': 'ANG MO KIO', 'BB': 'BUKIT BATOK', 'BD': 'BEDOK', 'BH': 'BISHAN', 'BM': 'BUKIT MERAH',
        'BP': 'BUKIT PANJANG', 'BT': 'BUKIT TIMAH', 'CCK': 'CHOA CHU KANG', 'CL': 'CLEMENTI', 'CT': 'CENTRAL AREA',
        'GL': 'GEYLANG', 'HG': 'HOUGANG', 'JE': 'JURONG EAST', 'JW': 'JURONG WEST', 'KWN': 'KALLANG/WHAMPOA',
        'MP': 'MARINE PARADE', 'PG': 'PUNGGOL', 'PRC': 'PASIR RIS', 'QT': 'QUEENSTOWN', 'SB': 'SEMBAWANG',
        'SGN': 'SERANGOON', 'SK': 'SENGKANG', 'TAP': 'TAMPINES', 'TG': 'TENGAH', 'TP' : 'TOA PAYOH' ,
        'WL' : 'WOODLANDS' , 'YS' : 'YISHUN'
}

In [232]:
buildings['bldg_contract_town'].unique()

array(['KWN', 'BD', 'CT', 'PRC', 'BM', 'QT', 'GL', 'HG', 'SGN', 'TP',
       'MP', 'WL', 'PG', 'BT', 'TAP', 'CCK', 'AMK', 'BH', 'BB', 'SB',
       'CL', 'BP', 'JE', 'SK', 'YS', 'JW', 'TG'], dtype=object)

In [233]:
def load_town(town_mapping):
    return pd.DataFrame(list(town_mapping.values()), columns = ['town'])

load_town(town_mapping)


Unnamed: 0,town
0,ANG MO KIO
1,BUKIT BATOK
2,BEDOK
3,BISHAN
4,BUKIT MERAH
5,BUKIT PANJANG
6,BUKIT TIMAH
7,CHOA CHU KANG
8,CLEMENTI
9,CENTRAL AREA


In [237]:
def rename_buildings_town(data):
    data = data.copy(deep = True)

    data['bldg_contract_town'] = data['bldg_contract_town'].map(town_mapping)
    return data

rename_buildings_town(buildings)['bldg_contract_town']

0        KALLANG/WHAMPOA
1                  BEDOK
2           CENTRAL AREA
3                  BEDOK
4              PASIR RIS
              ...       
12821            HOUGANG
12822            HOUGANG
12823            HOUGANG
12824            HOUGANG
12825            HOUGANG
Name: bldg_contract_town, Length: 12826, dtype: object

In [212]:
def transform_transactions(transactions):
    pass

# Median Rent Prices

# Schools

# Transactions