## 1. Data Collection

In [2]:
import pandas as pd

In [3]:
apartment_df = pd.read_csv('rent_apartments.csv')
apartment_df.head(10)

Unnamed: 0,address,area,constraction_year,rooms,bedrooms,bathrooms,balcony,storage,parking,furnished,garage,garden,energy,facilities,zip,neighborhood,rent
0,1071 HN Amsterdam (Cornelis Schuytbuurt),167.0,1870,3,2,2,yes,no,no,yes,no,Not present,D,Roof terrace,1071 HN,Cornelis Schuytbuurt,4500
1,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,yes,no,yes,yes,no,Not present,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,Concertgebouwbuurt,3450
2,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,yes,no,yes,yes,no,Not present,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,Concertgebouwbuurt,3450
3,1071 WV Amsterdam (Hondecoeterbuurt),90.0,1923,3,2,1,yes,no,no,yes,no,Not present,,"Shower, Toilet",1071 WV,Hondecoeterbuurt,2000
4,1071 WV Amsterdam (Hondecoeterbuurt),104.0,1923,3,2,1,no,no,no,no,no,Present (47 m²),D,"Shower, Bath, Toilet",1071 WV,Hondecoeterbuurt,3250
5,1071 WV Amsterdam (Hondecoeterbuurt),90.0,1923,3,2,1,yes,no,no,yes,no,Not present,,"Shower, Toilet",1071 WV,Hondecoeterbuurt,2000
6,1071 WV Amsterdam (Hondecoeterbuurt),104.0,1923,3,2,1,no,no,no,no,no,Present (47 m²),D,"Shower, Bath, Toilet",1071 WV,Hondecoeterbuurt,3250
7,1071 XL Amsterdam (Duivelseiland),67.0,1923,3,2,1,no,no,no,no,no,Not present,C,"Shower, Bath, Toilet",1071 XL,Duivelseiland,1950
8,1071 XL Amsterdam (Duivelseiland),67.0,1923,3,2,1,no,no,no,no,no,Not present,C,"Shower, Bath, Toilet",1071 XL,Duivelseiland,1850
9,1071 AC Amsterdam (P.C. Hooftbuurt),85.0,1900,2,1,1,no,no,yes,yes,no,Not present,,"Bath, Toilet",1071 AC,P.C. Hooftbuurt,2650


## 2. Data Preparation

In [4]:
apartment_df.dtypes

address               object
area                 float64
constraction_year      int64
rooms                  int64
bedrooms               int64
bathrooms              int64
balcony               object
storage               object
parking               object
furnished             object
garage                object
garden                object
energy                object
facilities            object
zip                   object
neighborhood          object
rent                   int64
dtype: object

In [5]:
cols_to_encode = ['balcony', 'storage', 'parking', 'furnished', 'garage']

encoded_df = pd.get_dummies(apartment_df, columns=cols_to_encode, drop_first=True)
encoded_df.head(10)

Unnamed: 0,address,area,constraction_year,rooms,bedrooms,bathrooms,garden,energy,facilities,zip,neighborhood,rent,balcony_yes,storage_yes,parking_yes,furnished_yes,garage_yes
0,1071 HN Amsterdam (Cornelis Schuytbuurt),167.0,1870,3,2,2,Not present,D,Roof terrace,1071 HN,Cornelis Schuytbuurt,4500,True,False,False,True,False
1,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,Not present,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,Concertgebouwbuurt,3450,True,False,True,True,False
2,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,Not present,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,Concertgebouwbuurt,3450,True,False,True,True,False
3,1071 WV Amsterdam (Hondecoeterbuurt),90.0,1923,3,2,1,Not present,,"Shower, Toilet",1071 WV,Hondecoeterbuurt,2000,True,False,False,True,False
4,1071 WV Amsterdam (Hondecoeterbuurt),104.0,1923,3,2,1,Present (47 m²),D,"Shower, Bath, Toilet",1071 WV,Hondecoeterbuurt,3250,False,False,False,False,False
5,1071 WV Amsterdam (Hondecoeterbuurt),90.0,1923,3,2,1,Not present,,"Shower, Toilet",1071 WV,Hondecoeterbuurt,2000,True,False,False,True,False
6,1071 WV Amsterdam (Hondecoeterbuurt),104.0,1923,3,2,1,Present (47 m²),D,"Shower, Bath, Toilet",1071 WV,Hondecoeterbuurt,3250,False,False,False,False,False
7,1071 XL Amsterdam (Duivelseiland),67.0,1923,3,2,1,Not present,C,"Shower, Bath, Toilet",1071 XL,Duivelseiland,1950,False,False,False,False,False
8,1071 XL Amsterdam (Duivelseiland),67.0,1923,3,2,1,Not present,C,"Shower, Bath, Toilet",1071 XL,Duivelseiland,1850,False,False,False,False,False
9,1071 AC Amsterdam (P.C. Hooftbuurt),85.0,1900,2,1,1,Not present,,"Bath, Toilet",1071 AC,P.C. Hooftbuurt,2650,False,False,True,True,False


In [6]:
import re

def handle_garden(x: pd.Series):
    if x == "Not present":
        return 0

    return int(re.findall(r'\d+', x)[0])

try:
    encoded_df.garden = encoded_df.garden.map(handle_garden)
except TypeError:
    print("Already converted to int")

encoded_df.head(5)

Unnamed: 0,address,area,constraction_year,rooms,bedrooms,bathrooms,garden,energy,facilities,zip,neighborhood,rent,balcony_yes,storage_yes,parking_yes,furnished_yes,garage_yes
0,1071 HN Amsterdam (Cornelis Schuytbuurt),167.0,1870,3,2,2,0,D,Roof terrace,1071 HN,Cornelis Schuytbuurt,4500,True,False,False,True,False
1,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,0,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,Concertgebouwbuurt,3450,True,False,True,True,False
2,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,0,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,Concertgebouwbuurt,3450,True,False,True,True,False
3,1071 WV Amsterdam (Hondecoeterbuurt),90.0,1923,3,2,1,0,,"Shower, Toilet",1071 WV,Hondecoeterbuurt,2000,True,False,False,True,False
4,1071 WV Amsterdam (Hondecoeterbuurt),104.0,1923,3,2,1,47,D,"Shower, Bath, Toilet",1071 WV,Hondecoeterbuurt,3250,False,False,False,False,False


In [None]:
# location is often one of the most important vectors for determining real estate prices
# because there are so many neighborhoods, we can simplify the names by using only the
# first word of the neighborhood
def handle_neighborhood(x: pd.Series):
    return x.split()[0]

encoded_df.neighborhood = encoded_df.neighborhood.map(handle_neighborhood)

# then we will get the dummies for the neighborhood
encoded_df = pd.get_dummies(encoded_df, columns=['neighborhood'], drop_first=True)
encoded_df.head(5)

Unnamed: 0,address,area,constraction_year,rooms,bedrooms,bathrooms,garden,energy,facilities,zip,...,neighborhood_Willemsparkbuurt,neighborhood_Willibrordusbuurt,neighborhood_Wittenburg,neighborhood_Zaagpoortbuurt,neighborhood_Zeeheldenbuurt,neighborhood_Zuidas,neighborhood_Zuiderkerkbuurt,neighborhood_Zuidoostkwadrant,neighborhood_Zuidwestkwadrant,neighborhood_de
0,1071 HN Amsterdam (Cornelis Schuytbuurt),167.0,1870,3,2,2,0,D,Roof terrace,1071 HN,...,False,False,False,False,False,False,False,False,False,False
1,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,0,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,...,False,False,False,False,False,False,False,False,False,False
2,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,0,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,...,False,False,False,False,False,False,False,False,False,False
3,1071 WV Amsterdam (Hondecoeterbuurt),90.0,1923,3,2,1,0,,"Shower, Toilet",1071 WV,...,False,False,False,False,False,False,False,False,False,False
4,1071 WV Amsterdam (Hondecoeterbuurt),104.0,1923,3,2,1,47,D,"Shower, Bath, Toilet",1071 WV,...,False,False,False,False,False,False,False,False,False,False


## 3. Model Building

In [8]:
# problem: predict rental price of apartments
y_df = apartment_df["rent"]
X_df = apartment_df.drop(columns=["rent"])