# Dependencies

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline

# Preprocessing the Data

In [2]:
# read csv into dataframe
filepath = "Austin Properties 2019.1.6.csv"
csv_df = pd.read_csv(filepath)

In [3]:
# check the column names
csv_df.columns

Index(['Address', 'Building Name', 'Zip Code', 'Bed', 'Bath', 'Avg SF',
       '# Units', 'Mix %', 'Units Available Units', 'Units Available Percent',
       'Avg Asking Rent/Unit', 'Avg Asking Rent/SF', 'Avg Effective Rent/Unit',
       'Avg Effective Rent/SF', 'Concessions %', 'Year Built',
       'Year Renovated', 'Units', 'Lande (Acres)', 'Vacancy %', 'Washer/Dryer',
       'Walk in Closet', 'Hardwood/Vinyl Floor', 'Walk Score', 'Transit Score',
       '1 Mile Population', '1 Mile Median Household Income',
       'Miles from Domain', 'Miles from Downtown', 'Unnamed: 29'],
      dtype='object')

In [4]:
csv_df = pd.get_dummies(data=csv_df, columns=['Washer/Dryer'])

In [5]:
# select certain variables
original_df = csv_df[['Bed', 'Bath', 'Avg SF', 'Concessions %', 'Year Built', 
   'Walk in Closet', 'Hardwood/Vinyl Floor',
   'Washer/Dryer_Yes but not in unit', 'Washer/Dryer_Yes in unit', 'Washer/Dryer_no',
   'Walk Score', 'Transit Score', '1 Mile Population', '1 Mile Median Household Income',
   'Miles from Domain', 'Miles from Downtown', 
   'Avg Effective Rent/Unit']]

In [6]:
# process na values
austin_properties_df = original_df.dropna()
# check the result of na value processing
austin_properties_df.count()

Bed                                 5468
Bath                                5468
Avg SF                              5468
Concessions %                       5468
Year Built                          5468
Walk in Closet                      5468
Hardwood/Vinyl Floor                5468
Washer/Dryer_Yes but not in unit    5468
Washer/Dryer_Yes in unit            5468
Washer/Dryer_no                     5468
Walk Score                          5468
Transit Score                       5468
1 Mile Population                   5468
1 Mile Median Household Income      5468
Miles from Domain                   5468
Miles from Downtown                 5468
Avg Effective Rent/Unit             5468
dtype: int64

In [7]:
# check the data type
austin_properties_df.dtypes

Bed                                 float64
Bath                                float64
Avg SF                              float64
Concessions %                       float64
Year Built                          float64
Walk in Closet                      float64
Hardwood/Vinyl Floor                float64
Washer/Dryer_Yes but not in unit      uint8
Washer/Dryer_Yes in unit              uint8
Washer/Dryer_no                       uint8
Walk Score                          float64
Transit Score                       float64
1 Mile Population                   float64
1 Mile Median Household Income      float64
Miles from Domain                   float64
Miles from Downtown                 float64
Avg Effective Rent/Unit             float64
dtype: object

In [8]:
# overview of the new dataframe
austin_properties_df.head()

Unnamed: 0,Bed,Bath,Avg SF,Concessions %,Year Built,Walk in Closet,Hardwood/Vinyl Floor,Washer/Dryer_Yes but not in unit,Washer/Dryer_Yes in unit,Washer/Dryer_no,Walk Score,Transit Score,1 Mile Population,1 Mile Median Household Income,Miles from Domain,Miles from Downtown,Avg Effective Rent/Unit
0,1.0,1.0,560.0,0.01,2016.0,1.0,1.0,0,1,0,88.0,44.0,20115.0,70385.0,12.6,2.1,1528.0
1,1.0,1.0,612.0,0.01,2016.0,1.0,1.0,0,1,0,88.0,44.0,20115.0,70385.0,12.6,2.1,1633.0
2,1.0,1.0,629.0,0.01,2016.0,1.0,1.0,0,1,0,88.0,44.0,20115.0,70385.0,12.6,2.1,1740.0
3,1.0,1.0,774.0,0.01,2016.0,1.0,1.0,0,1,0,88.0,44.0,20115.0,70385.0,12.6,2.1,1778.0
4,1.0,1.0,778.0,0.01,2016.0,1.0,1.0,0,1,0,88.0,44.0,20115.0,70385.0,12.6,2.1,1852.0


In [9]:
austin_properties_df["Year Built"].mean()

1997.693672275055

# Determine X and y for Machine Learning

In [10]:
X = austin_properties_df.iloc[:, :-1]
y = austin_properties_df['Avg Effective Rent/Unit']

# Splitting the data into training and testing sets

In [11]:
from sklearn.model_selection import train_test_split

# 80/20 training and testing data split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [12]:
# check the shape of training and test data
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(4374, 16)
(1094, 16)
(4374,)
(1094,)


# Model (I) - Linear Regression

In [13]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [14]:
# train the model
lin_model = LinearRegression()
lin_model.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [15]:
# model evaluation
y_test_predicted = lin_model.predict(X_test)
mse_test = mean_squared_error(y_test, y_test_predicted)
r2_test = r2_score(y_test, y_test_predicted)
print(f"Mean Squared Error (MSE): {mse_test}")
print(f"R-quared (R2): {r2_test}")

Mean Squared Error (MSE): 296512.9266368084
R-quared (R2): 0.5344241825987113


# Model (II) - Polynomial Regression

In [16]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

In [17]:
poly_model = Pipeline([
    ('a', PolynomialFeatures(degree=2)),
    ('b', LinearRegression(fit_intercept=False))
])

In [18]:
poly_model.fit(X_train, y_train)
poly_model.score(X_test, y_test)

0.44883007398009794

# Model (III) Random Forest

In [20]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

forest = RandomForestRegressor()
param_grid = {'n_estimators' : list(range(1,999,2))}
model_rf = GridSearchCV(forest, param_grid, verbose=3)

model_rf.fit(X_train, y_train)
print(model_rf.best_params_)
print(model_rf.best_score_)

y_pred_rf = model_rf.predict(X_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


Fitting 3 folds for each of 499 candidates, totalling 1497 fits
[CV] n_estimators=1 ..................................................
[CV] .......... n_estimators=1, score=0.936103360114957, total=   0.0s
[CV] n_estimators=1 ..................................................
[CV] ......... n_estimators=1, score=0.8620350776106729, total=   0.0s
[CV] n_estimators=1 ..................................................
[CV] ......... n_estimators=1, score=0.8752445485214535, total=   0.0s
[CV] n_estimators=3 ..................................................
[CV] ......... n_estimators=3, score=0.9343480771121119, total=   0.1s
[CV] n_estimators=3 ..................................................
[CV] ......... n_estimators=3, score=0.8845040595415848, total=   0.0s
[CV] n_estimators=3 ..................................................
[CV] ......... n_estimators=3, score=0.8776823223594649, total=   0.0s
[CV] n_estimators=5 ..................................................
[CV] ........

[CV] ........ n_estimators=39, score=0.9428164438377401, total=   0.7s
[CV] n_estimators=39 .................................................
[CV] ........ n_estimators=39, score=0.9314464511814243, total=   0.7s
[CV] n_estimators=39 .................................................
[CV] ........ n_estimators=39, score=0.9287848053424925, total=   0.6s
[CV] n_estimators=41 .................................................
[CV] ........ n_estimators=41, score=0.9430857404253309, total=   0.7s
[CV] n_estimators=41 .................................................
[CV] ........ n_estimators=41, score=0.9413825876316618, total=   0.7s
[CV] n_estimators=41 .................................................
[CV] ........ n_estimators=41, score=0.9227305347743747, total=   0.6s
[CV] n_estimators=43 .................................................
[CV] ........ n_estimators=43, score=0.9449640438667319, total=   0.6s
[CV] n_estimators=43 .................................................
[CV] .

[CV] ......... n_estimators=77, score=0.930502695942979, total=   1.2s
[CV] n_estimators=77 .................................................
[CV] ........ n_estimators=77, score=0.9355013357002917, total=   1.1s
[CV] n_estimators=79 .................................................
[CV] ........ n_estimators=79, score=0.9456394547686473, total=   1.1s
[CV] n_estimators=79 .................................................
[CV] ........ n_estimators=79, score=0.9322579683968085, total=   1.2s
[CV] n_estimators=79 .................................................
[CV] ........ n_estimators=79, score=0.9308673803336116, total=   1.3s
[CV] n_estimators=81 .................................................
[CV] ........ n_estimators=81, score=0.9406295110153147, total=   1.1s
[CV] n_estimators=81 .................................................
[CV] ........ n_estimators=81, score=0.9221225881965954, total=   1.1s
[CV] n_estimators=81 .................................................
[CV] .

[CV] ....... n_estimators=115, score=0.9323974845738037, total=   1.7s
[CV] n_estimators=117 ................................................
[CV] ....... n_estimators=117, score=0.9397100182573338, total=   1.7s
[CV] n_estimators=117 ................................................
[CV] ....... n_estimators=117, score=0.9315609773529102, total=   1.8s
[CV] n_estimators=117 ................................................
[CV] ....... n_estimators=117, score=0.9274499892984234, total=   1.7s
[CV] n_estimators=119 ................................................
[CV] ....... n_estimators=119, score=0.9464555603643081, total=   1.7s
[CV] n_estimators=119 ................................................
[CV] ....... n_estimators=119, score=0.9352693589315412, total=   1.7s
[CV] n_estimators=119 ................................................
[CV] ....... n_estimators=119, score=0.9291256370832028, total=   1.7s
[CV] n_estimators=121 ................................................
[CV] .

[CV] ....... n_estimators=155, score=0.9414710394340949, total=   2.0s
[CV] n_estimators=155 ................................................
[CV] ....... n_estimators=155, score=0.9355647385816256, total=   2.0s
[CV] n_estimators=155 ................................................
[CV] ....... n_estimators=155, score=0.9305599580485677, total=   2.1s
[CV] n_estimators=157 ................................................
[CV] ....... n_estimators=157, score=0.9428972838748352, total=   2.1s
[CV] n_estimators=157 ................................................
[CV] ....... n_estimators=157, score=0.9343640827942379, total=   2.1s
[CV] n_estimators=157 ................................................
[CV] ....... n_estimators=157, score=0.9322682350686845, total=   2.1s
[CV] n_estimators=159 ................................................
[CV] ....... n_estimators=159, score=0.9395085025746974, total=   2.1s
[CV] n_estimators=159 ................................................
[CV] .

[CV] ....... n_estimators=193, score=0.9304028845819593, total=   3.1s
[CV] n_estimators=193 ................................................
[CV] ........ n_estimators=193, score=0.931140746681246, total=   2.8s
[CV] n_estimators=195 ................................................
[CV] ....... n_estimators=195, score=0.9430971014311491, total=   2.6s
[CV] n_estimators=195 ................................................
[CV] ....... n_estimators=195, score=0.9327794633605688, total=   2.8s
[CV] n_estimators=195 ................................................
[CV] ....... n_estimators=195, score=0.9306361455682153, total=   2.8s
[CV] n_estimators=197 ................................................
[CV] ....... n_estimators=197, score=0.9447573521159078, total=   2.6s
[CV] n_estimators=197 ................................................
[CV] ....... n_estimators=197, score=0.9355620154401953, total=   2.7s
[CV] n_estimators=197 ................................................
[CV] .

[CV] ....... n_estimators=231, score=0.9320874491705909, total=   3.5s
[CV] n_estimators=233 ................................................
[CV] ....... n_estimators=233, score=0.9402070383641086, total=   3.4s
[CV] n_estimators=233 ................................................
[CV] ....... n_estimators=233, score=0.9345342023779003, total=   3.4s
[CV] n_estimators=233 ................................................
[CV] ....... n_estimators=233, score=0.9293597068485288, total=   3.4s
[CV] n_estimators=235 ................................................
[CV] ....... n_estimators=235, score=0.9422192043504111, total=   3.4s
[CV] n_estimators=235 ................................................
[CV] ....... n_estimators=235, score=0.9347561619082762, total=   3.4s
[CV] n_estimators=235 ................................................
[CV] ....... n_estimators=235, score=0.9328894383545996, total=   3.4s
[CV] n_estimators=237 ................................................
[CV] .

[CV] ....... n_estimators=271, score=0.9456775816666714, total=   3.6s
[CV] n_estimators=271 ................................................
[CV] ....... n_estimators=271, score=0.9345386738074987, total=   3.7s
[CV] n_estimators=271 ................................................
[CV] ....... n_estimators=271, score=0.9293628049530521, total=   3.6s
[CV] n_estimators=273 ................................................
[CV] ........ n_estimators=273, score=0.942756994226858, total=   3.6s
[CV] n_estimators=273 ................................................
[CV] ........ n_estimators=273, score=0.932315198276065, total=   3.7s
[CV] n_estimators=273 ................................................
[CV] ......... n_estimators=273, score=0.92908293464719, total=   3.6s
[CV] n_estimators=275 ................................................
[CV] ....... n_estimators=275, score=0.9452621096855869, total=   3.6s
[CV] n_estimators=275 ................................................
[CV] .

[CV] ....... n_estimators=309, score=0.9332484071814617, total=   4.4s
[CV] n_estimators=309 ................................................
[CV] ........ n_estimators=309, score=0.931441356002446, total=   4.3s
[CV] n_estimators=311 ................................................
[CV] ....... n_estimators=311, score=0.9458407840339507, total=   4.2s
[CV] n_estimators=311 ................................................
[CV] ....... n_estimators=311, score=0.9365972939599665, total=   4.4s
[CV] n_estimators=311 ................................................
[CV] ....... n_estimators=311, score=0.9315566441102987, total=   4.3s
[CV] n_estimators=313 ................................................
[CV] ........ n_estimators=313, score=0.943580635684205, total=   4.2s
[CV] n_estimators=313 ................................................
[CV] ....... n_estimators=313, score=0.9347342658143494, total=   4.3s
[CV] n_estimators=313 ................................................
[CV] .

[CV] ....... n_estimators=347, score=0.9297122937037303, total=   4.7s
[CV] n_estimators=349 ................................................
[CV] ........ n_estimators=349, score=0.941930961167349, total=   4.9s
[CV] n_estimators=349 ................................................
[CV] ....... n_estimators=349, score=0.9345458210697591, total=   4.9s
[CV] n_estimators=349 ................................................
[CV] ....... n_estimators=349, score=0.9315096803191613, total=   5.0s
[CV] n_estimators=351 ................................................
[CV] ....... n_estimators=351, score=0.9418899550514909, total=   4.7s
[CV] n_estimators=351 ................................................
[CV] ....... n_estimators=351, score=0.9323844244978238, total=   4.8s
[CV] n_estimators=351 ................................................
[CV] ....... n_estimators=351, score=0.9287725944546658, total=   4.9s
[CV] n_estimators=353 ................................................
[CV] .

[CV] ....... n_estimators=387, score=0.9426839444879846, total=   5.9s
[CV] n_estimators=387 ................................................
[CV] ....... n_estimators=387, score=0.9349732897736649, total=   6.2s
[CV] n_estimators=387 ................................................
[CV] ....... n_estimators=387, score=0.9331533706525075, total=   5.5s
[CV] n_estimators=389 ................................................
[CV] ....... n_estimators=389, score=0.9416264131791264, total=   5.5s
[CV] n_estimators=389 ................................................
[CV] ........ n_estimators=389, score=0.934697907741869, total=   5.9s
[CV] n_estimators=389 ................................................
[CV] ....... n_estimators=389, score=0.9326288253067785, total=   5.8s
[CV] n_estimators=391 ................................................
[CV] ....... n_estimators=391, score=0.9431736986600823, total=   5.6s
[CV] n_estimators=391 ................................................
[CV] .

[CV] ....... n_estimators=425, score=0.9343517999450122, total=   6.1s
[CV] n_estimators=425 ................................................
[CV] ....... n_estimators=425, score=0.9325892006426271, total=   6.1s
[CV] n_estimators=427 ................................................
[CV] ....... n_estimators=427, score=0.9420901970981856, total=   6.0s
[CV] n_estimators=427 ................................................
[CV] ....... n_estimators=427, score=0.9339768061583048, total=   6.1s
[CV] n_estimators=427 ................................................
[CV] ....... n_estimators=427, score=0.9306625988058825, total=   6.1s
[CV] n_estimators=429 ................................................
[CV] ....... n_estimators=429, score=0.9453821929898905, total=   6.0s
[CV] n_estimators=429 ................................................
[CV] ....... n_estimators=429, score=0.9354976559052856, total=   6.1s
[CV] n_estimators=429 ................................................
[CV] .

[CV] ....... n_estimators=463, score=0.9292012366180068, total=   6.7s
[CV] n_estimators=465 ................................................
[CV] ....... n_estimators=465, score=0.9428620238076316, total=   6.5s
[CV] n_estimators=465 ................................................
[CV] ........ n_estimators=465, score=0.935085316862291, total=   6.5s
[CV] n_estimators=465 ................................................
[CV] ......... n_estimators=465, score=0.93020605574399, total=   6.7s
[CV] n_estimators=467 ................................................
[CV] ....... n_estimators=467, score=0.9432090051725041, total=   7.1s
[CV] n_estimators=467 ................................................
[CV] ....... n_estimators=467, score=0.9326478848053869, total=   6.9s
[CV] n_estimators=467 ................................................
[CV] ....... n_estimators=467, score=0.9312410148350425, total=   6.9s
[CV] n_estimators=469 ................................................
[CV] .

[CV] ....... n_estimators=503, score=0.9430823771593624, total=   6.9s
[CV] n_estimators=503 ................................................
[CV] ....... n_estimators=503, score=0.9334641801419631, total=   6.9s
[CV] n_estimators=503 ................................................
[CV] ....... n_estimators=503, score=0.9304529442359097, total=   7.1s
[CV] n_estimators=505 ................................................
[CV] ....... n_estimators=505, score=0.9454179736715063, total=   7.3s
[CV] n_estimators=505 ................................................
[CV] ....... n_estimators=505, score=0.9364694597713279, total=   7.2s
[CV] n_estimators=505 ................................................
[CV] ....... n_estimators=505, score=0.9301441257017432, total=   8.0s
[CV] n_estimators=507 ................................................
[CV] ....... n_estimators=507, score=0.9419763866693285, total=   7.2s
[CV] n_estimators=507 ................................................
[CV] .

[CV] ....... n_estimators=541, score=0.9352791341485898, total=   8.0s
[CV] n_estimators=541 ................................................
[CV] ....... n_estimators=541, score=0.9310411248681695, total=   7.6s
[CV] n_estimators=543 ................................................
[CV] ....... n_estimators=543, score=0.9431879259764887, total=   7.7s
[CV] n_estimators=543 ................................................
[CV] ....... n_estimators=543, score=0.9339437602945925, total=   8.1s
[CV] n_estimators=543 ................................................
[CV] ....... n_estimators=543, score=0.9328950149427525, total=   7.7s
[CV] n_estimators=545 ................................................
[CV] ....... n_estimators=545, score=0.9438076988964357, total=   7.8s
[CV] n_estimators=545 ................................................
[CV] ....... n_estimators=545, score=0.9334328811986902, total=   8.1s
[CV] n_estimators=545 ................................................
[CV] .

[CV] ....... n_estimators=579, score=0.9309751147560381, total=   8.3s
[CV] n_estimators=581 ................................................
[CV] ....... n_estimators=581, score=0.9433030522783085, total=   8.3s
[CV] n_estimators=581 ................................................
[CV] ....... n_estimators=581, score=0.9362706239344526, total=   8.7s
[CV] n_estimators=581 ................................................
[CV] ....... n_estimators=581, score=0.9280325061059196, total=   8.4s
[CV] n_estimators=583 ................................................
[CV] ....... n_estimators=583, score=0.9445578917642238, total=   8.3s
[CV] n_estimators=583 ................................................
[CV] ....... n_estimators=583, score=0.9333069066567541, total=   9.2s
[CV] n_estimators=583 ................................................
[CV] ........ n_estimators=583, score=0.930531128138289, total=  10.1s
[CV] n_estimators=585 ................................................
[CV] .

[CV] ....... n_estimators=619, score=0.9437188832434855, total=   8.5s
[CV] n_estimators=619 ................................................
[CV] ....... n_estimators=619, score=0.9336163167130311, total=   9.3s
[CV] n_estimators=619 ................................................
[CV] ....... n_estimators=619, score=0.9315364612303071, total=   9.0s
[CV] n_estimators=621 ................................................
[CV] ....... n_estimators=621, score=0.9435823963324044, total=   9.7s
[CV] n_estimators=621 ................................................
[CV] ....... n_estimators=621, score=0.9333226714538512, total=   9.0s
[CV] n_estimators=621 ................................................
[CV] ........ n_estimators=621, score=0.932780401167129, total=  10.4s
[CV] n_estimators=623 ................................................
[CV] ....... n_estimators=623, score=0.9444934991993875, total=   9.3s
[CV] n_estimators=623 ................................................
[CV] .

[CV] ....... n_estimators=657, score=0.9351038462979923, total=   8.6s
[CV] n_estimators=657 ................................................
[CV] ....... n_estimators=657, score=0.9312227585232651, total=   8.7s
[CV] n_estimators=659 ................................................
[CV] ....... n_estimators=659, score=0.9434889693256815, total=   8.6s
[CV] n_estimators=659 ................................................
[CV] ....... n_estimators=659, score=0.9328523567467045, total=   8.7s
[CV] n_estimators=659 ................................................
[CV] ........ n_estimators=659, score=0.929579125818014, total=   8.7s
[CV] n_estimators=661 ................................................
[CV] ......... n_estimators=661, score=0.94349034693625, total=   8.6s
[CV] n_estimators=661 ................................................
[CV] ....... n_estimators=661, score=0.9360086518441451, total=   9.1s
[CV] n_estimators=661 ................................................
[CV] .

[CV] ....... n_estimators=695, score=0.9287996790576554, total=   9.9s
[CV] n_estimators=697 ................................................
[CV] ....... n_estimators=697, score=0.9434823883176185, total=   9.8s
[CV] n_estimators=697 ................................................
[CV] ....... n_estimators=697, score=0.9341839005842035, total=  10.6s
[CV] n_estimators=697 ................................................
[CV] ....... n_estimators=697, score=0.9308866264375344, total=  11.2s
[CV] n_estimators=699 ................................................
[CV] ....... n_estimators=699, score=0.9428922476456223, total=  10.8s
[CV] n_estimators=699 ................................................
[CV] ....... n_estimators=699, score=0.9339750366830484, total=  10.4s
[CV] n_estimators=699 ................................................
[CV] ........ n_estimators=699, score=0.929811147180963, total=  10.5s
[CV] n_estimators=701 ................................................
[CV] .

[CV] ....... n_estimators=735, score=0.9432990265946098, total=  10.7s
[CV] n_estimators=735 ................................................
[CV] ....... n_estimators=735, score=0.9339807238669062, total=  11.5s
[CV] n_estimators=735 ................................................
[CV] ....... n_estimators=735, score=0.9300480344745078, total=  11.0s
[CV] n_estimators=737 ................................................
[CV] ....... n_estimators=737, score=0.9445523135554542, total=  10.5s
[CV] n_estimators=737 ................................................
[CV] ....... n_estimators=737, score=0.9329982672090342, total=  10.3s
[CV] n_estimators=737 ................................................
[CV] ........ n_estimators=737, score=0.930486039983339, total=  10.8s
[CV] n_estimators=739 ................................................
[CV] ....... n_estimators=739, score=0.9437169832994949, total=  10.4s
[CV] n_estimators=739 ................................................
[CV] .

[CV] ....... n_estimators=773, score=0.9335122175901633, total=  11.1s
[CV] n_estimators=773 ................................................
[CV] ....... n_estimators=773, score=0.9314956394721728, total=  10.6s
[CV] n_estimators=775 ................................................
[CV] ........ n_estimators=775, score=0.943573488588679, total=  11.4s
[CV] n_estimators=775 ................................................
[CV] ....... n_estimators=775, score=0.9335064646968425, total=  11.6s
[CV] n_estimators=775 ................................................
[CV] ........ n_estimators=775, score=0.929779192184222, total=  11.5s
[CV] n_estimators=777 ................................................
[CV] ......... n_estimators=777, score=0.94373893195186, total=  10.8s
[CV] n_estimators=777 ................................................
[CV] ....... n_estimators=777, score=0.9335398082372002, total=  11.0s
[CV] n_estimators=777 ................................................
[CV] .

[CV] ....... n_estimators=811, score=0.9302817115997597, total=  11.9s
[CV] n_estimators=813 ................................................
[CV] ....... n_estimators=813, score=0.9426942412384961, total=  12.2s
[CV] n_estimators=813 ................................................
[CV] ....... n_estimators=813, score=0.9335568632802256, total=  11.5s
[CV] n_estimators=813 ................................................
[CV] ....... n_estimators=813, score=0.9303210382061671, total=  11.3s
[CV] n_estimators=815 ................................................
[CV] ....... n_estimators=815, score=0.9429493265264048, total=  11.1s
[CV] n_estimators=815 ................................................
[CV] ....... n_estimators=815, score=0.9347013691501271, total=  11.2s
[CV] n_estimators=815 ................................................
[CV] ....... n_estimators=815, score=0.9304245920242221, total=  10.9s
[CV] n_estimators=817 ................................................
[CV] .

[CV] ....... n_estimators=851, score=0.9429580532529563, total=  12.2s
[CV] n_estimators=851 ................................................
[CV] ....... n_estimators=851, score=0.9349522078482517, total=  12.3s
[CV] n_estimators=851 ................................................
[CV] ....... n_estimators=851, score=0.9306152454979801, total=  12.7s
[CV] n_estimators=853 ................................................
[CV] ....... n_estimators=853, score=0.9429260650642013, total=  11.9s
[CV] n_estimators=853 ................................................
[CV] ....... n_estimators=853, score=0.9341904375106189, total=  12.1s
[CV] n_estimators=853 ................................................
[CV] ....... n_estimators=853, score=0.9306540731053669, total=  12.4s
[CV] n_estimators=855 ................................................
[CV] ....... n_estimators=855, score=0.9439030118256986, total=  12.2s
[CV] n_estimators=855 ................................................
[CV] .

[CV] ....... n_estimators=889, score=0.9348265236010294, total=  12.9s
[CV] n_estimators=889 ................................................
[CV] ....... n_estimators=889, score=0.9302413975659783, total=  13.2s
[CV] n_estimators=891 ................................................
[CV] ....... n_estimators=891, score=0.9433153489603292, total=  12.4s
[CV] n_estimators=891 ................................................
[CV] ....... n_estimators=891, score=0.9354552123052465, total=  13.3s
[CV] n_estimators=891 ................................................
[CV] ....... n_estimators=891, score=0.9303359839102077, total=  13.6s
[CV] n_estimators=893 ................................................
[CV] ....... n_estimators=893, score=0.9423802362017597, total=  12.8s
[CV] n_estimators=893 ................................................
[CV] ....... n_estimators=893, score=0.9340042291471131, total=  12.8s
[CV] n_estimators=893 ................................................
[CV] .

[CV] ....... n_estimators=927, score=0.9310851403721302, total=  14.0s
[CV] n_estimators=929 ................................................
[CV] ....... n_estimators=929, score=0.9428782445588119, total=  13.2s
[CV] n_estimators=929 ................................................
[CV] ....... n_estimators=929, score=0.9361038823002242, total=  12.7s
[CV] n_estimators=929 ................................................
[CV] ....... n_estimators=929, score=0.9306554230248497, total=  14.2s
[CV] n_estimators=931 ................................................
[CV] ....... n_estimators=931, score=0.9424800300379463, total=  14.0s
[CV] n_estimators=931 ................................................
[CV] ....... n_estimators=931, score=0.9333670717319161, total=  14.1s
[CV] n_estimators=931 ................................................
[CV] ....... n_estimators=931, score=0.9306461960027953, total=  14.0s
[CV] n_estimators=933 ................................................
[CV] .

[CV] ....... n_estimators=967, score=0.9426098829534937, total=  13.4s
[CV] n_estimators=967 ................................................
[CV] ....... n_estimators=967, score=0.9356528611315482, total=  14.3s
[CV] n_estimators=967 ................................................
[CV] ....... n_estimators=967, score=0.9307960632032175, total=  15.6s
[CV] n_estimators=969 ................................................
[CV] ....... n_estimators=969, score=0.9430323657254022, total=  15.2s
[CV] n_estimators=969 ................................................
[CV] ....... n_estimators=969, score=0.9345112303549795, total=  14.3s
[CV] n_estimators=969 ................................................
[CV] ....... n_estimators=969, score=0.9307553210623376, total=  14.4s
[CV] n_estimators=971 ................................................
[CV] ....... n_estimators=971, score=0.9450099188476516, total=  14.6s
[CV] n_estimators=971 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done 1497 out of 1497 | elapsed: 185.7min finished


{'n_estimators': 189}
0.9387849851278849


In [21]:
import pickle
rf_model_filepath = '/Users/apple/Documents/Data Projects/Machine_Learning_Apartment_Rent/Application/static/resources/RF999_Model.sav'
pickle.dump(model_rf, open(rf_model_filepath, 'wb'))

In [None]:
model_rf.

In [None]:
rf_model_filepath2 = '/Users/apple/Documents/Data Projects/Machine_Learning_Apartment_Rent/Application/static/resources/Random_Forest_Model.sav'
loaded_model2 = pickle.load(open(rf_model_filepath2, 'rb'))

In [None]:
loaded_model2.predict(X_new)

In [None]:
loaded_model.predict(X_new)

In [23]:
X_new = [1, 1, 750, 0.02, 2018, 1, 1, 1, 0, 0, 100, 100, 70000, 50000, 0, 0]
X_new = pd.to_numeric(X_new)
X_new = np.reshape(X_new, (1, 16))
print(model_rf.predict(X_new))

[2750.38095238]


# Model (IV) - Neural Network Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline

In [None]:
print(len(X_train.columns))
print(X_train.columns)

In [None]:
scaler = MinMaxScaler()

In [None]:
# define neural network model
model_nn = Sequential()
model_nn.add(Dense(12, input_dim=16, kernel_initializer='random_uniform', activation='relu'))
model_nn.add(Dense(8, kernel_initializer='random_uniform', activation='relu'))
model_nn.add(Dense(4, kernel_initializer='random_uniform', activation='relu'))
model_nn.add(Dense(1, kernel_initializer='random_uniform', activation='relu'))

# compile model
model_nn.compile(loss='mse', optimizer='adam')

In [None]:
model_nn.fit(X_train, y_train, batch_size=300, epochs=10000, 
             verbose=1, 
             callbacks=[EarlyStopping(monitor='loss', min_delta=1000, patience=300)])

In [None]:
filepath = "//weights.{epoch:02d}-{loss:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', 
                             verbose=0, save_best_only=True, 
                             save_weights_only=False, mode='min', period=100)

In [None]:
y_predict = model_nn.predict(X_test)
print(r2_score(y_test, y_predict))

In [None]:
X_new = [1, 1, 750, 0.02, 2018, 1, 1, 1, 0, 0, 100, 100, 70000, 50000, 0, 0]
X_new = pd.to_numeric(X_new)
X_new = np.reshape(X_new, (1, 16))

In [None]:
model_nn.predict(X_new)

In [None]:
X_train.columns

# Model (V) - K Nearest Neighbour

In [None]:
from sklearn.neighbors import KNeighborsRegressor
k_neighbour = KNeighborsRegressor()

from sklearn.model_selection import GridSearchCV
param_grid = {'n_neighbors' : list(range(1,999,2))}
model_k_neighbour = GridSearchCV(k_neighbour, param_grid, verbose=3)

model_k_neighbour.fit(X_train, y_train)

model_k_neighbour.best_params_

model_k_neighbour.best_score_

y_pred_k_neighbour = model_k_neighbour.predict(X_test)

# Residuals

In [None]:
plt.scatter(y_test_predicted, y_test_predicted-y_test)
plt.hlines(y=0, xmin=y_test_predicted.min(), xmax=y_test_predicted.max())
plt.show()