<h1>Predicting house prices in Cameroon</h1>

We'll be building a model to predict house prices in Yaounde and Douala, the two main cities in Cameroon. This model is based on data scraped from the web in april 2017. We'll start with some data cleanup before getting into building the model proper.

In [48]:
import pandas as pd
import numpy  as np

In [57]:
data = pd.read_csv('koutchoumi1.csv')
data.head()

Unnamed: 0,Area,Bathrooms,Bedrooms,Price,Type
0,Douala/Makepe,> 2 bathrooms,> 3 bedrooms,> 130 000 FCFA,3 bedrooms apartment to rent
1,Douala/Makepe,> 2 bathrooms,> 2 bedrooms,> 100 000 FCFA,2 bedrooms apartment to rent
2,Douala/Logpom,> 2 bathrooms,> 3 bedrooms,> 100 000 FCFA,3 bedrooms apartment to rent
3,Douala/Logpom,> 1 bathroom,> 2 bedrooms,> 75 000 FCFA,2 bedrooms apartment to rent
4,Douala/Makepe,> 1 bathroom,> 2 bedrooms,> 60 000 FCFA,2 bedrooms apartment to rent


In [58]:
#Let's add a new column to our dataset that tells if the house is furnished or not

furnish = []

for row in data['Type']:
    if 'furnished' in row:
        furnish.append('yes')
    else:
        furnish.append('no')
        
data['Furnished'] = furnish

#Here we strip off unnecessary words from the 'Type' column
def remap_type(x):
    if 'apartment' in x:
        return 'Apartment'
    else:
        return x
    
data['Type'] = data['Type'].map(remap_type)

def clean_price(x):
    p = x.strip('FCFA')
    pr = p.strip('>')
    return pr

data['Price'] = data['Price'].map(clean_price)

def clean_rooms(x):
    b = x.split(' ')[1]
    return b

data['Bedrooms'] = data['Bedrooms'].map(clean_rooms)
data['Bathrooms'] = data['Bathrooms'].map(clean_rooms)


In [59]:
towns =[]

for row in data['Area']:
    town = row.split('/')[0]
    towns.append(town)
    
data['City'] = towns

def clean_area(a):
    ar = a.split('/')[1]
    return ar

data['Area'] = data['Area'].map(clean_area)

In [60]:
data.rename(columns={
        'Price': 'Price(FCFA)'
    }, inplace=True)
data

Unnamed: 0,Area,Bathrooms,Bedrooms,Price(FCFA),Type,Furnished,City
0,Makepe,2,3,130 000,Apartment,no,Douala
1,Makepe,2,2,100 000,Apartment,no,Douala
2,Logpom,2,3,100 000,Apartment,no,Douala
3,Logpom,1,2,75 000,Apartment,no,Douala
4,Makepe,1,2,60 000,Apartment,no,Douala
5,Bonamoussadi,2,2,120 000,Apartment,no,Douala
6,Kotto,1,2,75 000,Apartment,no,Douala
7,Makepe,1,2,75 000,Apartment,no,Douala
8,Akwa I,1,1,300 000,Apartment,no,Douala
9,Makepe,3,3,175 000,Apartment,no,Douala


In [61]:
data = data[['Area', 'City', 'Type', 'Bedrooms', 'Bathrooms', 'Furnished', 'Price(FCFA)']]

data

Unnamed: 0,Area,City,Type,Bedrooms,Bathrooms,Furnished,Price(FCFA)
0,Makepe,Douala,Apartment,3,2,no,130 000
1,Makepe,Douala,Apartment,2,2,no,100 000
2,Logpom,Douala,Apartment,3,2,no,100 000
3,Logpom,Douala,Apartment,2,1,no,75 000
4,Makepe,Douala,Apartment,2,1,no,60 000
5,Bonamoussadi,Douala,Apartment,2,2,no,120 000
6,Kotto,Douala,Apartment,2,1,no,75 000
7,Makepe,Douala,Apartment,2,1,no,75 000
8,Akwa I,Douala,Apartment,1,1,no,300 000
9,Makepe,Douala,Apartment,3,3,no,175 000


In [62]:
df = pd.read_csv('jumia.csv').drop(labels={'Unnamed: 0'}, axis=1)

In [31]:
df.head()

Unnamed: 0,Address,Bathrooms,Bedrooms,Designation,Price
0,"Akwa, Akwa, Douala, Littoral",2 Salles de bain,3 Chambres,\n\n Appartement � loue...,"CFA 500,000"
1,"Akwa, Akwa, Douala, Littoral",2 Salles de bain,3 Chambres,\n\n Appartement � loue...,"CFA 500,000"
2,"Bonamoussadi, Bonamoussadi, Douala, Littoral",2 Salles de bain,2 Chambres,\n\n Appartement � Bona...,"CFA 150,000"
3,"Bonamoussadi, Douala, Littoral",1 Salles de bain,2 Chambres,\n\n Appartement � loue...,Prix : Contactez le vendeur
4,"Nkoabang, Yaound�, Centre",2 Salles de bain,1 Chambres,\n\n Joli studio meubl�...,"CFA 200,000"


In [63]:
df.rename(columns={
        'Address':'Area',
        'Designation': 'Type',
        'Price': 'Price(FCFA)'
    }, inplace=True)

In [8]:
df.head()

Unnamed: 0,Area,Bathrooms,Bedrooms,Type,Price(FCFA)
0,"Akwa, Akwa, Douala, Littoral",2 Salles de bain,3 Chambres,\n\n Appartement � loue...,"CFA 500,000"
1,"Akwa, Akwa, Douala, Littoral",2 Salles de bain,3 Chambres,\n\n Appartement � loue...,"CFA 500,000"
2,"Bonamoussadi, Bonamoussadi, Douala, Littoral",2 Salles de bain,2 Chambres,\n\n Appartement � Bona...,"CFA 150,000"
3,"Bonamoussadi, Douala, Littoral",1 Salles de bain,2 Chambres,\n\n Appartement � loue...,Prix : Contactez le vendeur
4,"Nkoabang, Yaound�, Centre",2 Salles de bain,1 Chambres,\n\n Joli studio meubl�...,"CFA 200,000"


In [64]:
towns = []

for row in df['Area']:
    ar = row.split(',')
    if len(ar) == 3:
        town = ar[1]
        towns.append(town)
    else:
        town = ar[2:3]
        towns.append(town)

df['City'] = towns

df['City'] = df['City'].astype(str).str.replace('\[|\]|\'', '')

df.head()


Unnamed: 0,Area,Bathrooms,Bedrooms,Type,Price(FCFA),City
0,"Akwa, Akwa, Douala, Littoral",2 Salles de bain,3 Chambres,\n\n Appartement � loue...,"CFA 500,000",Douala
1,"Akwa, Akwa, Douala, Littoral",2 Salles de bain,3 Chambres,\n\n Appartement � loue...,"CFA 500,000",Douala
2,"Bonamoussadi, Bonamoussadi, Douala, Littoral",2 Salles de bain,2 Chambres,\n\n Appartement � Bona...,"CFA 150,000",Douala
3,"Bonamoussadi, Douala, Littoral",1 Salles de bain,2 Chambres,\n\n Appartement � loue...,Prix : Contactez le vendeur,Douala
4,"Nkoabang, Yaound�, Centre",2 Salles de bain,1 Chambres,\n\n Joli studio meubl�...,"CFA 200,000",Yaound�


In [65]:
def clean_area(x):
    ar = x.split(',')
    if len(ar) == 3:
        a = ar[0:1]
    else:
        a = ar[1:2]
    return a

df['Area'] = df['Area'].map(clean_area).astype(str).str.replace('\[|\]|\'', '')

df.head()

Unnamed: 0,Area,Bathrooms,Bedrooms,Type,Price(FCFA),City
0,Akwa,2 Salles de bain,3 Chambres,\n\n Appartement � loue...,"CFA 500,000",Douala
1,Akwa,2 Salles de bain,3 Chambres,\n\n Appartement � loue...,"CFA 500,000",Douala
2,Bonamoussadi,2 Salles de bain,2 Chambres,\n\n Appartement � Bona...,"CFA 150,000",Douala
3,Bonamoussadi,1 Salles de bain,2 Chambres,\n\n Appartement � loue...,Prix : Contactez le vendeur,Douala
4,Nkoabang,2 Salles de bain,1 Chambres,\n\n Joli studio meubl�...,"CFA 200,000",Yaound�


In [66]:
furnish = []

for row in df['Type']:
    if 'meubl' in row:
        furnish.append('yes')
    else:
        furnish.append('no')
        
df['Furnished'] = furnish
df.head()

Unnamed: 0,Area,Bathrooms,Bedrooms,Type,Price(FCFA),City,Furnished
0,Akwa,2 Salles de bain,3 Chambres,\n\n Appartement � loue...,"CFA 500,000",Douala,no
1,Akwa,2 Salles de bain,3 Chambres,\n\n Appartement � loue...,"CFA 500,000",Douala,no
2,Bonamoussadi,2 Salles de bain,2 Chambres,\n\n Appartement � Bona...,"CFA 150,000",Douala,no
3,Bonamoussadi,1 Salles de bain,2 Chambres,\n\n Appartement � loue...,Prix : Contactez le vendeur,Douala,no
4,Nkoabang,2 Salles de bain,1 Chambres,\n\n Joli studio meubl�...,"CFA 200,000",Yaound�,yes


In [67]:
def remap_type(t):
    if 'appartement' in t:
        return 'Apartment'
    elif 'Duplex' in t:
        return 'Duplex'
    elif 'studio' in t:
        return 'Studio'
    else:
        return 'Apartment'

df['Type'] = df['Type'].map(remap_type)
df

Unnamed: 0,Area,Bathrooms,Bedrooms,Type,Price(FCFA),City,Furnished
0,Akwa,2 Salles de bain,3 Chambres,Apartment,"CFA 500,000",Douala,no
1,Akwa,2 Salles de bain,3 Chambres,Apartment,"CFA 500,000",Douala,no
2,Bonamoussadi,2 Salles de bain,2 Chambres,Apartment,"CFA 150,000",Douala,no
3,Bonamoussadi,1 Salles de bain,2 Chambres,Apartment,Prix : Contactez le vendeur,Douala,no
4,Nkoabang,2 Salles de bain,1 Chambres,Studio,"CFA 200,000",Yaound�,yes
5,Makepe,2 Salles de bain,2 Chambres,Apartment,"CFA 150,000",Douala,no
6,Odza,1 Salles de bain,2 Chambres,Apartment,"CFA 130,000",Yaound�,no
7,Bessengu\xe9,1 Salles de bain,1 Chambres,Apartment,"CFA 100,000",Douala,no
8,Akwa,2 Salles de bain,2 Chambres,Apartment,"CFA 300,000",Douala,no
9,Brazzaville,1 Salles de bain,2 Chambres,Apartment,"CFA 150,000",Douala,no


In [68]:
def clean_price(x):
    p = x.strip('CFA')
    if x == 'Prix : Contactez le vendeur':
        p = 'NA'
    return p

df['Price(FCFA)'] = df['Price(FCFA)'].map(clean_price)


In [27]:
df

Unnamed: 0,Area,Bathrooms,Bedrooms,Type,Price(FCFA),City,Furnished
0,Akwa,2 Salles de bain,3 Chambres,Apartment,500000,Douala,no
1,Akwa,2 Salles de bain,3 Chambres,Apartment,500000,Douala,no
2,Bonamoussadi,2 Salles de bain,2 Chambres,Apartment,150000,Douala,no
3,Bonamoussadi,1 Salles de bain,2 Chambres,Apartment,,Douala,no
4,Nkoabang,2 Salles de bain,1 Chambres,Studio,200000,Yaound�,yes
5,Makepe,2 Salles de bain,2 Chambres,Apartment,150000,Douala,no
6,Odza,1 Salles de bain,2 Chambres,Apartment,130000,Yaound�,no
7,Bessengu\xe9,1 Salles de bain,1 Chambres,Apartment,100000,Douala,no
8,Akwa,2 Salles de bain,2 Chambres,Apartment,300000,Douala,no
9,Brazzaville,1 Salles de bain,2 Chambres,Apartment,150000,Douala,no


In [69]:
def clean_rooms(x):
    try:
        b = x.split(' ')[0]
    except:
        b = 'NA'
    return b

df['Bedrooms'] = df['Bedrooms'].map(clean_rooms)
df['Bathrooms'] = df['Bathrooms'].map(clean_rooms)

In [70]:
df.head(60)

Unnamed: 0,Area,Bathrooms,Bedrooms,Type,Price(FCFA),City,Furnished
0,Akwa,2,3,Apartment,500000.0,Douala,no
1,Akwa,2,3,Apartment,500000.0,Douala,no
2,Bonamoussadi,2,2,Apartment,150000.0,Douala,no
3,Bonamoussadi,1,2,Apartment,,Douala,no
4,Nkoabang,2,1,Studio,200000.0,Yaound�,yes
5,Makepe,2,2,Apartment,150000.0,Douala,no
6,Odza,1,2,Apartment,130000.0,Yaound�,no
7,Bessengu\xe9,1,1,Apartment,100000.0,Douala,no
8,Akwa,2,2,Apartment,300000.0,Douala,no
9,Brazzaville,1,2,Apartment,150000.0,Douala,no


In [71]:
df = df[['Area', 'City', 'Type', 'Bedrooms', 'Bathrooms', 'Furnished', 'Price(FCFA)']]
df

Unnamed: 0,Area,City,Type,Bedrooms,Bathrooms,Furnished,Price(FCFA)
0,Akwa,Douala,Apartment,3,2,no,500000
1,Akwa,Douala,Apartment,3,2,no,500000
2,Bonamoussadi,Douala,Apartment,2,2,no,150000
3,Bonamoussadi,Douala,Apartment,2,1,no,
4,Nkoabang,Yaound�,Studio,1,2,yes,200000
5,Makepe,Douala,Apartment,2,2,no,150000
6,Odza,Yaound�,Apartment,2,1,no,130000
7,Bessengu\xe9,Douala,Apartment,1,1,no,100000
8,Akwa,Douala,Apartment,2,2,no,300000
9,Brazzaville,Douala,Apartment,2,1,no,150000


In [75]:
maisons = pd.concat([data, df])

In [76]:
maisons

Unnamed: 0,Area,City,Type,Bedrooms,Bathrooms,Furnished,Price(FCFA)
0,Makepe,Douala,Apartment,3,2,no,130 000
1,Makepe,Douala,Apartment,2,2,no,100 000
2,Logpom,Douala,Apartment,3,2,no,100 000
3,Logpom,Douala,Apartment,2,1,no,75 000
4,Makepe,Douala,Apartment,2,1,no,60 000
5,Bonamoussadi,Douala,Apartment,2,2,no,120 000
6,Kotto,Douala,Apartment,2,1,no,75 000
7,Makepe,Douala,Apartment,2,1,no,75 000
8,Akwa I,Douala,Apartment,1,1,no,300 000
9,Makepe,Douala,Apartment,3,3,no,175 000
