### Imports
- pandas: for holding the data in a pandas.DataFrame object
- wandb: for logging findings

In [None]:
import pandas as pd
import wandb

In [None]:
data = pd.read_csv("RentingOutofFlats.csv")

### Exploratory Data Analysis
1. Check number of rows of data
2. Check for any null values
3. Check for any na values
4. Check for empty strings
5. Check datatypes of all columns
6. Find unique values for each column

In [28]:
# check number of data points
len(data)

95441

In [17]:
# check for any null values
data.isnull().any()

rent_approval_date    False
town                  False
block                 False
street_name           False
flat_type             False
monthly_rent          False
dtype: bool

In [18]:
# check for any na values
data.isna().any()



rent_approval_date    False
town                  False
block                 False
street_name           False
flat_type             False
monthly_rent          False
dtype: bool

In [23]:
# check for any empty strings
data.eq('').any()

rent_approval_date    False
town                  False
block                 False
street_name           False
flat_type             False
monthly_rent          False
dtype: bool

In [25]:
# check data types for each column
data.dtypes

rent_approval_date    object
town                  object
block                 object
street_name           object
flat_type             object
monthly_rent           int64
dtype: object

In [27]:
# find unique values
for c in data.columns:
    print(c, data[c].unique(), len(data[c].unique()))

rent_approval_date ['2021-01' '2021-02' '2021-03' '2021-04' '2021-05' '2021-06' '2021-07'
 '2021-08' '2021-09' '2021-10' '2021-11' '2021-12' '2022-01' '2022-02'
 '2022-03' '2022-04' '2022-05' '2022-06' '2022-07' '2022-08' '2022-09'
 '2022-10' '2022-11' '2022-12' '2023-01' '2023-02' '2023-03' '2023-04'
 '2023-05' '2023-06' '2023-07'] 31
town ['PUNGGOL' 'JURONG WEST' 'BEDOK' 'BUKIT MERAH' 'CHOA CHU KANG' 'TAMPINES'
 'SENGKANG' 'ANG MO KIO' 'HOUGANG' 'TOA PAYOH' 'JURONG EAST' 'WOODLANDS'
 'BUKIT BATOK' 'SEMBAWANG' 'CENTRAL' 'QUEENSTOWN' 'BISHAN' 'CLEMENTI'
 'MARINE PARADE' 'PASIR RIS' 'YISHUN' 'GEYLANG' 'SERANGOON'
 'BUKIT PANJANG' 'KALLANG/WHAMPOA' 'BUKIT TIMAH'] 26
block ['272A' '187' '188B' ... '513C' '494G' '513B'] 2683
street_name ['PUNGGOL WALK' 'BOON LAY AVE' 'BEDOK NTH ST 4' 'HENDERSON RD'
 'CHOA CHU KANG AVE 5' 'BEDOK STH AVE 1' 'TAMPINES ST 41'
 'COMPASSVALE CRES' 'CHOA CHU KANG CRES' 'ANG MO KIO ST 51'
 'BEDOK RESERVOIR RD' 'HOUGANG ST 51' 'TELOK BLANGAH CRES'
 'JURONG WEST CTR

### Data Cleaning


##### 1. Convert all strings to lowercase

In [31]:
# convert strings to lowecase
data['town'] = data['town'].str.lower()
data['street_name'] = data['street_name'].str.lower()

In [32]:
data['town']

0              punggol
1          jurong west
2                bedok
3          bukit merah
4        choa chu kang
             ...      
95436      bukit merah
95437            bedok
95438    bukit panjang
95439            bedok
95440      bukit batok
Name: town, Length: 95441, dtype: object

In [33]:
data['street_name']

0               punggol walk
1               boon lay ave
2             bedok nth st 4
3               henderson rd
4        choa chu kang ave 5
                ...         
95436     telok blangah cres
95437           chai chee st
95438               senja rd
95439        bedok nth ave 2
95440         bt batok st 31
Name: street_name, Length: 95441, dtype: object

##### 2. Split date into a year and a month column

In [39]:
# split date into year and month
data["rent_approval_year"] = data["rent_approval_date"].apply(lambda x: x[:4]).astype(int)
data["rent_approval_month"] = data["rent_approval_date"].apply(lambda x: x[5:]).astype(int)

In [41]:
data

Unnamed: 0,rent_approval_date,town,block,street_name,flat_type,monthly_rent,rent_approval_year,rent_approval_month
0,2021-01,punggol,272A,punggol walk,5-ROOM,2600,2021,1
1,2021-01,jurong west,187,boon lay ave,3-ROOM,1600,2021,1
2,2021-01,bedok,188B,bedok nth st 4,4-ROOM,2200,2021,1
3,2021-01,bukit merah,95B,henderson rd,3-ROOM,2300,2021,1
4,2021-01,choa chu kang,484A,choa chu kang ave 5,4-ROOM,1900,2021,1
...,...,...,...,...,...,...,...,...
95436,2023-07,bukit merah,15,telok blangah cres,3-ROOM,2200,2023,7
95437,2023-07,bedok,54,chai chee st,3-ROOM,2000,2023,7
95438,2023-07,bukit panjang,633C,senja rd,4-ROOM,3500,2023,7
95439,2023-07,bedok,514,bedok nth ave 2,2-ROOM,2000,2023,7


##### 3. Convert Flat Type into Classes
To retain the class label = number of rooms (except for Executive Apartment, can experiment with 0 or 6)
- 0: Executive Apartment
- 1: 1 room
- 2: 2 room
- 3: 3 room
- 4: 4 room
- 5: 5 room

In [42]:
room_mapping = {
    "1-ROOM": 1,
    "2-ROOM": 2,
    "3-ROOM": 3,
    "4-ROOM": 4,
    "5-ROOM": 5,
    "EXECUTIVE": 0
}

In [45]:
data["num_rooms"] = data['flat_type'].apply(lambda x: room_mapping[x])

In [46]:
data

Unnamed: 0,rent_approval_date,town,block,street_name,flat_type,monthly_rent,rent_approval_year,rent_approval_month,num_rooms
0,2021-01,punggol,272A,punggol walk,5-ROOM,2600,2021,1,5
1,2021-01,jurong west,187,boon lay ave,3-ROOM,1600,2021,1,3
2,2021-01,bedok,188B,bedok nth st 4,4-ROOM,2200,2021,1,4
3,2021-01,bukit merah,95B,henderson rd,3-ROOM,2300,2021,1,3
4,2021-01,choa chu kang,484A,choa chu kang ave 5,4-ROOM,1900,2021,1,4
...,...,...,...,...,...,...,...,...,...
95436,2023-07,bukit merah,15,telok blangah cres,3-ROOM,2200,2023,7,3
95437,2023-07,bedok,54,chai chee st,3-ROOM,2000,2023,7,3
95438,2023-07,bukit panjang,633C,senja rd,4-ROOM,3500,2023,7,4
95439,2023-07,bedok,514,bedok nth ave 2,2-ROOM,2000,2023,7,2


##### 4. Convert Towns into Districts
- Singapore housing estates are usually divided into districts
- Rental prices are usually closely associated to the district they reside in

### Logging
Log the results of the session in Weights & Biases

#### Initialization

In [None]:
wandb.init(
    project="hdb_rental",
    name="eda"
)

#### Log the data table

In [9]:
table = wandb.Table(data=data, columns=data.columns)

In [10]:
wandb.log({
    "ReningOutofFlats Table": table
})