In [4]:
from sklearn.datasets import load_boston
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [5]:
boston_dataset=load_boston()
data=pd.DataFrame(data=boston_dataset.data,columns=boston_dataset.feature_names)
features=data.drop(["INDUS","AGE"],axis=1)
target=pd.DataFrame(data=np.log(boston_dataset.target),columns=["PRICE"])

In [6]:
property_stats=np.ndarray(shape=(1,11))

In [17]:
property_stats=features.mean().values.reshape(1,11)

In [44]:
features.head()

Unnamed: 0,CRIM,ZN,CHAS,NOX,RM,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,0.0,0.538,6.575,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,0.0,0.469,6.421,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,0.0,0.469,7.185,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,0.0,0.458,6.998,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,0.0,0.458,7.147,6.0622,3.0,222.0,18.7,396.9,5.33


In [19]:
regr=LinearRegression().fit(features,target)

In [69]:
fitted_vals=regr.predict(features)

In [21]:
MSE=mean_squared_error(target,fitted_vals)
RMSE=np.sqrt(MSE)

In [54]:
RM_IDX=4
PT_RATIO_IDX=8
CHAS_IDX=2
def get_log_estimate(nrooms, students_per_class,next_to_river=True,high_confidence=True):
    
    #configure property
    property_stats[0][RM_IDX]=nrooms
    property_stats[0][PT_RATIO_IDX]=students_per_class
    
    if next_to_river:
        property_stats[0][CHAS_IDX]=1
    else:
        property_stats[0][CHAS_IDX]=0
    
    #Make prediction
    log_estimate=regr.predict(property_stats)[0][0]
        
    if high_confidence:
        upper_bound= log_estimate + 2 * RMSE
        lower_bound= log_estimate - 2 * RMSE
        interval=95
        
    else:
        upper_bound= log_estimate + RMSE
        lower_bound= log_estimate - RMSE
        interval=68
    
    return log_estimate,lower_bound,upper_bound,interval

In [55]:
get_log_estimate(5,20)

(2.958103774701664, 2.5830795043074035, 3.3331280450959246, 95)

In [56]:
np.median(boston_dataset.target)

21.2

In [59]:
## Checking

ZILLOW_MEDIAN_PRICE=583.3
SCALE_FACTOR=ZILLOW_MEDIAN_PRICE/np.median(boston_dataset.target)

log_est,low,upper,conf=get_log_estimate(9,15,False,False)


dollar_est=round(np.e**log_est * 1000 * SCALE_FACTOR,-3)
dollar_hi=round(np.e**upper * 1000 * SCALE_FACTOR,-3)
dollar_low=round(np.e**low * 1000 * SCALE_FACTOR,-3)

print(f"Estimated property {dollar_est}")
print(f"At {conf} confidence % the valuation range is")
print(f"USD  {dollar_low} at lower end to USD {dollar_hi} at high end" )

Estimated property 827000.0
At 68 confidence % the valuation range is
USD  685000.0 at lower end to USD 997000.0 at high end


In [67]:
def get_dollar_estimate(rm,ptratio,chas=False,large_range=True):
    """Estimate the price o fpropety in BOSTON

    """
    if rm <1 or ptratio <1:
        print("Unrealistic")
    else:
        log_est,low,upper,conf=get_log_estimate(rm,ptratio,chas,large_range)
        dollar_est=round(np.e**log_est * 1000 * SCALE_FACTOR,-3)
        dollar_hi=round(np.e**upper * 1000 * SCALE_FACTOR,-3)
        dollar_low=round(np.e**low * 1000 * SCALE_FACTOR,-3)

        print(f"Estimated property {dollar_est}")
        print(f"At {conf} confidence % the valuation range is")
        print(f"USD  {dollar_low} at lower end to USD {dollar_hi} at high end" )

In [68]:
get_dollar_estimate(10,0,True)

Unrealistic


In [73]:
import boston_valuation as val

val.get_dollar_estimate(6,12,True)

Estimated property 783000.0
At 95 confidence % the valuation range is
USD  538000.0 at lower end to USD 1139000.0 at high end
