## 1. Data Collection

In [1]:
import pandas as pd

In [2]:
apartment_df = pd.read_csv('rent_apartments.csv')
apartment_df.head(10)

Unnamed: 0,address,area,constraction_year,rooms,bedrooms,bathrooms,balcony,storage,parking,furnished,garage,garden,energy,facilities,zip,neighborhood,rent
0,1071 HN Amsterdam (Cornelis Schuytbuurt),167.0,1870,3,2,2,yes,no,no,yes,no,Not present,D,Roof terrace,1071 HN,Cornelis Schuytbuurt,4500
1,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,yes,no,yes,yes,no,Not present,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,Concertgebouwbuurt,3450
2,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,yes,no,yes,yes,no,Not present,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,Concertgebouwbuurt,3450
3,1071 WV Amsterdam (Hondecoeterbuurt),90.0,1923,3,2,1,yes,no,no,yes,no,Not present,,"Shower, Toilet",1071 WV,Hondecoeterbuurt,2000
4,1071 WV Amsterdam (Hondecoeterbuurt),104.0,1923,3,2,1,no,no,no,no,no,Present (47 m²),D,"Shower, Bath, Toilet",1071 WV,Hondecoeterbuurt,3250
5,1071 WV Amsterdam (Hondecoeterbuurt),90.0,1923,3,2,1,yes,no,no,yes,no,Not present,,"Shower, Toilet",1071 WV,Hondecoeterbuurt,2000
6,1071 WV Amsterdam (Hondecoeterbuurt),104.0,1923,3,2,1,no,no,no,no,no,Present (47 m²),D,"Shower, Bath, Toilet",1071 WV,Hondecoeterbuurt,3250
7,1071 XL Amsterdam (Duivelseiland),67.0,1923,3,2,1,no,no,no,no,no,Not present,C,"Shower, Bath, Toilet",1071 XL,Duivelseiland,1950
8,1071 XL Amsterdam (Duivelseiland),67.0,1923,3,2,1,no,no,no,no,no,Not present,C,"Shower, Bath, Toilet",1071 XL,Duivelseiland,1850
9,1071 AC Amsterdam (P.C. Hooftbuurt),85.0,1900,2,1,1,no,no,yes,yes,no,Not present,,"Bath, Toilet",1071 AC,P.C. Hooftbuurt,2650


## 2. Data Preparation

In [6]:
apartment_df.dtypes

address               object
area                 float64
constraction_year      int64
rooms                  int64
bedrooms               int64
bathrooms              int64
balcony               object
storage               object
parking               object
furnished             object
garage                object
garden                object
energy                object
facilities            object
zip                   object
neighborhood          object
rent                   int64
dtype: object

In [7]:
cols_to_encode = ['balcony', 'storage', 'parking', 'furnished', 'garage']

encoded_df = pd.get_dummies(apartment_df, columns=cols_to_encode, drop_first=True)
encoded_df.head(10)

Unnamed: 0,address,area,constraction_year,rooms,bedrooms,bathrooms,garden,energy,facilities,zip,neighborhood,rent,balcony_yes,storage_yes,parking_yes,furnished_yes,garage_yes
0,1071 HN Amsterdam (Cornelis Schuytbuurt),167.0,1870,3,2,2,Not present,D,Roof terrace,1071 HN,Cornelis Schuytbuurt,4500,True,False,False,True,False
1,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,Not present,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,Concertgebouwbuurt,3450,True,False,True,True,False
2,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,Not present,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,Concertgebouwbuurt,3450,True,False,True,True,False
3,1071 WV Amsterdam (Hondecoeterbuurt),90.0,1923,3,2,1,Not present,,"Shower, Toilet",1071 WV,Hondecoeterbuurt,2000,True,False,False,True,False
4,1071 WV Amsterdam (Hondecoeterbuurt),104.0,1923,3,2,1,Present (47 m²),D,"Shower, Bath, Toilet",1071 WV,Hondecoeterbuurt,3250,False,False,False,False,False
5,1071 WV Amsterdam (Hondecoeterbuurt),90.0,1923,3,2,1,Not present,,"Shower, Toilet",1071 WV,Hondecoeterbuurt,2000,True,False,False,True,False
6,1071 WV Amsterdam (Hondecoeterbuurt),104.0,1923,3,2,1,Present (47 m²),D,"Shower, Bath, Toilet",1071 WV,Hondecoeterbuurt,3250,False,False,False,False,False
7,1071 XL Amsterdam (Duivelseiland),67.0,1923,3,2,1,Not present,C,"Shower, Bath, Toilet",1071 XL,Duivelseiland,1950,False,False,False,False,False
8,1071 XL Amsterdam (Duivelseiland),67.0,1923,3,2,1,Not present,C,"Shower, Bath, Toilet",1071 XL,Duivelseiland,1850,False,False,False,False,False
9,1071 AC Amsterdam (P.C. Hooftbuurt),85.0,1900,2,1,1,Not present,,"Bath, Toilet",1071 AC,P.C. Hooftbuurt,2650,False,False,True,True,False


In [None]:
import re

def handle_garden(x: pd.Series):
    if x == "Not present":
        return 0

    return int(re.findall(r'\d+', x)[0])

try:
    encoded_df.garden = encoded_df.garden.map(handle_garden)
except TypeError:
    print("Already converted to int")

encoded_df.head(5)

Already converted to int


Unnamed: 0,address,area,constraction_year,rooms,bedrooms,bathrooms,garden,energy,facilities,zip,neighborhood,rent,balcony_yes,storage_yes,parking_yes,furnished_yes,garage_yes
0,1071 HN Amsterdam (Cornelis Schuytbuurt),167.0,1870,3,2,2,0,D,Roof terrace,1071 HN,Cornelis Schuytbuurt,4500,True,False,False,True,False
1,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,0,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,Concertgebouwbuurt,3450,True,False,True,True,False
2,1071 HK Amsterdam (Concertgebouwbuurt),150.0,1890,3,2,2,0,A,"Cable TV, Internet connection, Fireplace, Bath...",1071 HK,Concertgebouwbuurt,3450,True,False,True,True,False
3,1071 WV Amsterdam (Hondecoeterbuurt),90.0,1923,3,2,1,0,,"Shower, Toilet",1071 WV,Hondecoeterbuurt,2000,True,False,False,True,False
4,1071 WV Amsterdam (Hondecoeterbuurt),104.0,1923,3,2,1,47,D,"Shower, Bath, Toilet",1071 WV,Hondecoeterbuurt,3250,False,False,False,False,False


In [48]:
sorted(encoded_df.neighborhood.unique())

['Aalsmeerwegbuurt Oost',
 'Aalsmeerwegbuurt West',
 'Alexanderplein e.o.',
 'Amstelkwartier Noord',
 'Amstelveldbuurt',
 'Amsterdamse Bos',
 'Andreasterrein',
 'Anjeliersbuurt Noord',
 'Anjeliersbuurt Zuid',
 'Architectenbuurt',
 'BG-terrein e.o.',
 'Baanakkerspark Zuid',
 'Balboaplein e.o.',
 'Banne Noordwest',
 'Banne Zuidoost',
 'Banne Zuidwest',
 'Bedrijvengebied Cruquiusweg',
 'Bedrijventerrein Landlust',
 'Beethovenbuurt',
 'Begijnhofbuurt',
 'Belgiëplein e.o.',
 'Bellamybuurt Noord',
 'Bellamybuurt Zuid',
 'Betondorp',
 'Bijlmermuseum Noord',
 'Bijlmermuseum Zuid',
 'Bloemgrachtbuurt',
 'Borgerbuurt',
 'Bosleeuw',
 'Buiksloterham',
 'Buikslotermeer Noord',
 'Buitenveldert Midden Zuid',
 'Buitenveldert Oost Midden',
 'Buitenveldert West Midden',
 'Buitenveldert Zuidoost',
 'Buitenveldert Zuidwest',
 'Burgemeester Tellegenbuurt Oost',
 'Burgwallen Oost',
 'Buurt 2',
 'Buurt 3',
 'Buurt 4 Oost',
 'Buurt 5 Noord',
 'Buurt 8',
 'Buurt 9',
 'Buyskade e.o.',
 'Columbusplein e.o.',
 'C

## 3. Model Building

In [None]:
# Problem: predict rental price of apartments
y_df = apartment_df["rent"]
X_df = apartment_df.drop(columns=["rent"])