In [None]:
from google.colab import drive

drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
from scipy import stats

In [None]:
df_rent = pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/IS4242/df_condo_mrt_coords.csv", index_col=0)
df_rent

Unnamed: 0,Property Name,Bathrooms,Tenure,No. of Units,District,Lease Term,Room Type,Address,Size,Date Listed,...,Built Year,Primary Schools,Groceries & Supermarts,Shopping Malls,Trains (MRT & LRT),Property,Asking,Neighborhood,nearest_station_lat,nearest_station_lng
0,JadeScape,1,99,1206.0,D20,Flexible,Entire Unit,8 Shunfu Road (575745),646,30/1/23 14:08,...,2022,0.60,0.44,0.79,"MARYMOUNT MRT,CC16",Condominium,5750.0,Big,1.349089,103.839116
1,JadeScape,2,99,1206.0,D20,Flexible,Entire Unit,8 Shunfu Road (575745),1055,1/4/23 6:30,...,2022,0.60,0.44,0.79,"MARYMOUNT MRT,CC16",Condominium,5750.0,Big,1.349089,103.839116
2,JadeScape,2,99,1206.0,D20,Flexible,Entire Unit,8 Shunfu Road (575745),764,23/12/22 15:57,...,2022,0.60,0.44,0.79,"MARYMOUNT MRT,CC16",Condominium,5750.0,Big,1.349089,103.839116
3,JadeScape,1,99,1206.0,D20,2+ years,Entire Unit,8 Shunfu Road (575745),646,7/2/23 3:33,...,2022,0.60,0.44,0.79,"MARYMOUNT MRT,CC16",Condominium,5750.0,Big,1.349089,103.839116
4,JadeScape,2,99,1206.0,D20,Flexible,Entire Unit,8 Shunfu Road (575745),1015,29/1/23 2:23,...,2022,0.60,0.44,0.79,"MARYMOUNT MRT,CC16",Condominium,5750.0,Big,1.349089,103.839116
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3726,Fernwood Towers,1,999,215.0,D15,Flexible,Entire Unit,Fernwood Terrace,200,7/9/22 0:23,...,1994,0.95,0.32,0.40,"BEDOK MRT,EW5",Condominium,2625.0,Small,1.324043,103.930205
3727,Parc Rosewood,1,99,689.0,D25,Flexible,Entire Unit,81 Rosewood Drive (737788),431,17/3/23 2:21,...,2014,0.45,0.49,0.86,"WOODLANDS SOUTH MRT,TE3",Condominium,3000.0,Big,1.427346,103.793130
3728,Dover Parkview,3,99,686.0,D5,2+ years,Entire Unit,32 Dover Rise (138686),936,29/1/23 14:52,...,1997,0.66,0.23,0.56,"BUONA VISTA MRT,CC22",Condominium,5000.0,Big,1.307337,103.790046
3729,Shelford Suites,3,999,77.0,D11,Flexible,Entire Unit,Shelford Road,1292,13/3/23 16:31,...,2011,0.76,0.42,0.43,"BOTANIC GARDENS MRT,CC19",Condominium,6800.0,Small,1.322387,103.814905


In [None]:
df_rent.dtypes

Property Name              object
Bathrooms                   int64
Tenure                      int64
No. of Units              float64
District                   object
Lease Term                 object
Room Type                  object
Address                    object
Size                        int64
Date Listed                object
Bedrooms                    int64
Developer                  object
Built Year                  int64
Primary Schools           float64
Groceries & Supermarts    float64
Shopping Malls            float64
Trains (MRT & LRT)         object
Property                   object
Asking                    float64
Neighborhood               object
nearest_station_lat       float64
nearest_station_lng       float64
dtype: object

# Get Dropdown Lists Info

In [None]:
df_train_coords = pd.read_csv('/content/drive/MyDrive/Colab_Notebooks/IS4242/mrt_lrt_data_v2.csv')
df_train_coords.head(5)

Unnamed: 0,station_name,type,lat,lng
0,Jurong East,MRT,1.333207,103.742308
1,Bukit Batok,MRT,1.349069,103.749596
2,Bukit Gombak,MRT,1.359043,103.751863
3,Choa Chu Kang,MRT,1.385417,103.744316
4,Yew Tee,MRT,1.397383,103.747523


In [None]:
df_train_coords['type'] = df_train_coords['type'].str.lower()

In [None]:
df_train_coords['type'] = df_train_coords['type'].str.capitalize()

In [None]:
df_train_coords['full_station_name'] = df_train_coords['station_name'] + ' ' + df_train_coords['type']
df_train_coords.head(5)

Unnamed: 0,station_name,type,lat,lng,full_station_name
0,Jurong East,Mrt,1.333207,103.742308,Jurong East Mrt
1,Bukit Batok,Mrt,1.349069,103.749596,Bukit Batok Mrt
2,Bukit Gombak,Mrt,1.359043,103.751863,Bukit Gombak Mrt
3,Choa Chu Kang,Mrt,1.385417,103.744316,Choa Chu Kang Mrt
4,Yew Tee,Mrt,1.397383,103.747523,Yew Tee Mrt


## Get Train List 

In [None]:
train_list = df_train_coords['full_station_name'].tolist()

## Get Property Name List

In [None]:
property_name_list = df_rent['Property Name'].unique().tolist()

## Get Lease Term List

In [None]:
lease_term_list = df_rent['Lease Term'].unique().tolist()

## Get District List

In [None]:
district_list = df_rent['District'].unique().tolist()

## Get Room Type List

In [None]:
room_type_list = df_rent['Room Type'].unique().tolist()

## Get Neighbourhood List

In [None]:
neighbourhood_list = df_rent['Neighborhood'].unique().tolist()

# Preprocessing

## Drop columns

In [None]:
df_rent.drop(['Developer', 'Property', 'Address', 'Trains (MRT & LRT)'], axis=1, inplace=True)

In [None]:
df_rent

Unnamed: 0,Property Name,Bathrooms,Tenure,No. of Units,District,Lease Term,Room Type,Size,Date Listed,Bedrooms,Built Year,Primary Schools,Groceries & Supermarts,Shopping Malls,Asking,Neighborhood,nearest_station_lat,nearest_station_lng
0,JadeScape,1,99,1206.0,D20,Flexible,Entire Unit,646,30/1/23 14:08,2,2022,0.60,0.44,0.79,5750.0,Big,1.349089,103.839116
1,JadeScape,2,99,1206.0,D20,Flexible,Entire Unit,1055,1/4/23 6:30,3,2022,0.60,0.44,0.79,5750.0,Big,1.349089,103.839116
2,JadeScape,2,99,1206.0,D20,Flexible,Entire Unit,764,23/12/22 15:57,2,2022,0.60,0.44,0.79,5750.0,Big,1.349089,103.839116
3,JadeScape,1,99,1206.0,D20,2+ years,Entire Unit,646,7/2/23 3:33,2,2022,0.60,0.44,0.79,5750.0,Big,1.349089,103.839116
4,JadeScape,2,99,1206.0,D20,Flexible,Entire Unit,1015,29/1/23 2:23,4,2022,0.60,0.44,0.79,5750.0,Big,1.349089,103.839116
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3726,Fernwood Towers,1,999,215.0,D15,Flexible,Entire Unit,200,7/9/22 0:23,1,1994,0.95,0.32,0.40,2625.0,Small,1.324043,103.930205
3727,Parc Rosewood,1,99,689.0,D25,Flexible,Entire Unit,431,17/3/23 2:21,1,2014,0.45,0.49,0.86,3000.0,Big,1.427346,103.793130
3728,Dover Parkview,3,99,686.0,D5,2+ years,Entire Unit,936,29/1/23 14:52,2,1997,0.66,0.23,0.56,5000.0,Big,1.307337,103.790046
3729,Shelford Suites,3,999,77.0,D11,Flexible,Entire Unit,1292,13/3/23 16:31,3,2011,0.76,0.42,0.43,6800.0,Small,1.322387,103.814905


## Clean Feature Names

In [None]:
import re

# replace special characters with underscores --> cos of the 2 years vs 2+ years
df_rent.columns = [re.sub(r'[^\w\s]+', '_', c) for c in df_rent.columns]

# replace space with underscore
df_rent.columns = df_rent.columns.str.replace(' ', '_')
df_rent.columns

Index(['Property_Name', 'Bathrooms', 'Tenure', 'No__of_Units', 'District',
       'Lease_Term', 'Room_Type', 'Size', 'Date_Listed', 'Bedrooms',
       'Built_Year', 'Primary_Schools', 'Groceries___Supermarts',
       'Shopping_Malls', 'Asking', 'Neighborhood', 'nearest_station_lat',
       'nearest_station_lng'],
      dtype='object')

In [None]:
df_org = df_rent.copy() # before encoding
df_rent_encoded = df_rent

# Preprocessing: One Hot Encoding

In [None]:
from datetime import datetime
cat_vars = ['Lease_Term', 'District', 'Room_Type', 'Neighborhood']
# other_vars = ['Address', 'Property', "Date Listed"] #"Condominium Name"

# convert all categorical variables
for cat_var in cat_vars:
  df_rent_encoded = pd.concat([df_rent_encoded, pd.get_dummies(df_rent_encoded[cat_var], prefix=cat_var)], axis=1)

In [None]:
df_rent_encoded = df_rent_encoded.drop(cat_vars, axis=1)

In [None]:
df_rent_encoded.dtypes

Property_Name              object
Bathrooms                   int64
Tenure                      int64
No__of_Units              float64
Size                        int64
Date_Listed                object
Bedrooms                    int64
Built_Year                  int64
Primary_Schools           float64
Groceries___Supermarts    float64
Shopping_Malls            float64
Asking                    float64
nearest_station_lat       float64
nearest_station_lng       float64
Lease_Term_1 year           uint8
Lease_Term_2 years          uint8
Lease_Term_2+ years         uint8
Lease_Term_6 months         uint8
Lease_Term_Flexible         uint8
District_D1                 uint8
District_D10                uint8
District_D11                uint8
District_D12                uint8
District_D13                uint8
District_D14                uint8
District_D15                uint8
District_D16                uint8
District_D17                uint8
District_D18                uint8
District_D19  

## Get mapping for one hot encoding

In [None]:
lease_term_encoding_scheme = [col for col in df_rent_encoded.columns if col.startswith('Lease_Term_')]
district_encoding_scheme = [col for col in df_rent_encoded.columns if col.startswith('District_')]
room_type_encoding_scheme = [col for col in df_rent_encoded.columns if col.startswith('Room_Type_')]
neighborhood_encoding_scheme = [col for col in df_rent_encoded.columns if col.startswith('Neighborhood_')]
print(lease_term_encoding_scheme)
print(district_encoding_scheme)
print(room_type_encoding_scheme)
print(neighborhood_encoding_scheme)

['Lease_Term_1 year', 'Lease_Term_2 years', 'Lease_Term_2+ years', 'Lease_Term_6 months', 'Lease_Term_Flexible']
['District_D1', 'District_D10', 'District_D11', 'District_D12', 'District_D13', 'District_D14', 'District_D15', 'District_D16', 'District_D17', 'District_D18', 'District_D19', 'District_D2', 'District_D20', 'District_D21', 'District_D22', 'District_D23', 'District_D25', 'District_D26', 'District_D27', 'District_D28', 'District_D3', 'District_D4', 'District_D5', 'District_D7', 'District_D8', 'District_D9']
['Room_Type_Common', 'Room_Type_Entire Unit', 'Room_Type_Master']
['Neighborhood_Big', 'Neighborhood_Small']


# Convert categorical variables to one hot encoded

In [None]:
# 1) Create a new dataset with the same categories as the original dataset for both features
# new_data = pd.DataFrame({'fruit': ['apple', 'orange', 'banana'],
#                          'color': ['green', 'red', 'yellow']})

# # 2) Encode the new dataset using the encoding scheme for each feature
# new_encoded_data = pd.DataFrame(columns=fruit_encoding_scheme + color_encoding_scheme)
# for category in fruit_encoding_scheme:
#     col_name = category.split('_')[1]
#     new_encoded_data[category] = (new_data['fruit'] == col_name).astype(int)
# for category in color_encoding_scheme:
#     col_name = category.split('_')[1]
#     new_encoded_data[category] = (new_data['color'] == col_name).astype(int)
# print(new_encoded_data)

# Gradio App

In [None]:
!pip3 install gradio

In [None]:
import gradio as gr
from gradio.blocks import *

In [None]:
with gr.Blocks(theme=gr.themes.Soft()) as demo:
  def predict_rental_price(property_name, lease_term, tenure, room_type, bedrooms, bathrooms):
    print(property_name, lease_term, tenure, room_type, bedrooms, bathrooms)
    # 1) Convert cat var inputs into model inputs
    
    
    
    
    # 2) Get and convert values into model inputs
    # Categorical Var
    # district = gr.Dropdown(district_list, label= "District")
    # neighbourhood = gr.Dropdown(neighbourhood_list, label='Size of Neighbourhood', info='Lorem ipsum do lor sit amet')

    # Numerical Var
    # num_units = gr.Number(label = "Number of Units")
    # size = gr.Number(label='Size')
    # built_year = gr.Number(label = "Built Year")
    # distance_to_pri_sch = gr.Number(label = "Distance to Nearest Primary School")
    # distance_to_supermkt = gr.Number(label = "Distance to Nearest Supermarket")
    # distance_to_shoppingmall = gr.Number(label = "Distance to Nearest Shopping Mall")










    price = 100
    return price 

  gr.HTML(
  "<div style='background-image: url(\"file/drive/MyDrive/Colab_Notebooks/IS4242/condobg2.jpeg\"); background-size: cover; background-position: center; height: 40vh; z-index:-1'></div>\
    <div style='position: absolute;top: 0;left: 0;width: 100%;height: 100%;background-color: rgba(255, 255, 255, 0.5); z-index=1'></div>\
    <div style='position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); text-align: center;z-index=2'>\
      <p style='font-size: 48px; font-weight: bold; color: #ffffff'>Predict Condo Prices</p>\
    </div>"
  )
  # 1. Using nearest mrt station to find surrounding condos 
  with gr.Row():
    with gr.Column(): # cat
      property_name = gr.Dropdown(property_name_list, label = "Property Name")
      lease_term = gr.Dropdown(lease_term_list, label='Lease Term')
      room_type = gr.Dropdown(room_type_list, label= "Room Type")

    with gr.Column(): # numeric
      tenure = gr.Number(label = "Tenure")
      bedrooms = gr.Number(label = "Bedrooms")
      bathrooms = gr.Number(label = "Bathrooms")

    
 
  greet_btn = gr.Button("Predict")
  
  with gr.Row():
    output = gr.Textbox(label="Output", interactive=True)

  greet_btn.click(fn=predict_rental_price, inputs=[property_name, lease_term, tenure, room_type, bedrooms, bathrooms], outputs=output)

  
demo.launch(debug=True)



Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>



Seletar Park Residence 6 months 99.0  0.0 0.0 None None None None None None None None None None None
Keyboard interruption in main thread... closing server.


