# House price estimation

## Data

In [1]:
import numpy as np
import pandas as pd

data =  {"num_of_bedrooms": [3, 2, 2, 1, 4],
         "area": [2000, 800, 800, 550, 2000],
         "neighborhood": ["wanchaq", "cusco", "santiago", "santiago", "san sebastian"],
         "price": [250000, 300000, 150000, 78000, 150000]}
df = pd.DataFrame(data)
display(df)

Unnamed: 0,num_of_bedrooms,area,neighborhood,price
0,3,2000,wanchaq,250000
1,2,800,cusco,300000
2,2,800,santiago,150000
3,1,550,santiago,78000
4,4,2000,san sebastian,150000


## Native method

In [2]:
def estimate_house_sales_price_naive(num_of_bedrooms, area, neighborhood):
    price = 0
    # Precio promedio por m2 $200
    price_per_m2 = 200
    if neighborhood == "cusco":
            # Algunos distritos tienen un precio mayor
            price_per_m2 = 400
    elif neighborhood == "san sebastian":
            # Algunos distritos tienen un precio mas bajo
            price_per_m2 = 100
    # Calcular el precio base
    price = price_per_m2 * area
    # Adjustar el precio de acuerdo al numero de habitaciones
    if num_of_bedrooms == 0:
            price = price - 20000
    else:
            price = price + (num_of_bedrooms * 1000)
    return price

In [3]:
estimated_prices_naive = [estimate_house_sales_price_naive(*datum) 
                          for datum in df[['num_of_bedrooms', 'area', 'neighborhood']].values]
display(pd.concat([pd.DataFrame(estimated_prices_naive, columns=['estimated_price']), 
                   df['price']], axis=1))

Unnamed: 0,estimated_price,price
0,403000,250000
1,322000,300000
2,162000,150000
3,111000,78000
4,204000,150000


## Black-magic method

In [4]:
def estimate_house_sales_price_black_magic(num_of_bedrooms, area, neighborhood):
    price = 0
    # Un poco de esto
    price += num_of_bedrooms * .841231951398213
    # y poco mas de esto
    price += area * 123.1231231
    # y quiza un tanto de esto
    price = price * 2.3242341421 if neighborhood == 'cusco' else price
    return price

In [5]:
estimated_prices_black_magic = [round(estimate_house_sales_price_black_magic(*datum), 2)
                                for datum in df[['num_of_bedrooms', 'area', 'neighborhood']].values]
display(pd.concat([pd.DataFrame(estimated_prices_black_magic, columns=['estimated_price']), 
                   df['price']], axis=1))

Unnamed: 0,estimated_price,price
0,246248.77,250000
1,228937.48,300000
2,98500.18,150000
3,67718.56,78000
4,246249.61,150000


## Square Mean Error

In [6]:
sqr_mean_error = lambda x, y: np.mean((x - y) ** 2)

display(pd.DataFrame(data = [sqr_mean_error(estimated_prices_naive, df['price'].values),
                             sqr_mean_error(estimated_prices_black_magic, df['price'].values)],
                     index = ['naive', 'black_magic'],
                     columns = ['sqr_mean_error']))

Unnamed: 0,sqr_mean_error
naive,5608400000.0
black_magic,3417176000.0
