**Melbourne House Rent Prediction**

In [None]:
#imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error

In [None]:
#loading dataset
melb_house_data = pd.read_csv('melb_data.csv')
melb_house_data = melb_house_data.dropna()
melb_house_data.head()

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,1.0,0.0,156.0,79.0,1900.0,Yarra,-37.8079,144.9934,Northern Metropolitan,4019.0
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,2.0,0.0,134.0,150.0,1900.0,Yarra,-37.8093,144.9944,Northern Metropolitan,4019.0
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,1.0,2.0,120.0,142.0,2014.0,Yarra,-37.8072,144.9941,Northern Metropolitan,4019.0
6,Abbotsford,124 Yarra St,3,h,1876000.0,S,Nelson,7/05/2016,2.5,3067.0,...,2.0,0.0,245.0,210.0,1910.0,Yarra,-37.8024,144.9993,Northern Metropolitan,4019.0
7,Abbotsford,98 Charles St,2,h,1636000.0,S,Nelson,8/10/2016,2.5,3067.0,...,1.0,2.0,256.0,107.0,1890.0,Yarra,-37.806,144.9954,Northern Metropolitan,4019.0


In [None]:
#describing dataset
melb_house_data.describe()

Unnamed: 0,Rooms,Price,Distance,Postcode,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,Lattitude,Longtitude,Propertycount
count,6196.0,6196.0,6196.0,6196.0,6196.0,6196.0,6196.0,6196.0,6196.0,6196.0,6196.0,6196.0,6196.0
mean,2.931407,1068828.0,9.751097,3101.947708,2.902034,1.57634,1.573596,471.00694,141.568645,1964.081988,-37.807904,144.990201,7435.489509
std,0.971079,675156.4,5.612065,86.421604,0.970055,0.711362,0.929947,897.449881,90.834824,38.105673,0.07585,0.099165,4337.698917
min,1.0,131000.0,0.0,3000.0,0.0,1.0,0.0,0.0,0.0,1196.0,-38.16492,144.54237,389.0
25%,2.0,620000.0,5.9,3044.0,2.0,1.0,1.0,152.0,91.0,1940.0,-37.855438,144.926198,4383.75
50%,3.0,880000.0,9.0,3081.0,3.0,1.0,1.0,373.0,124.0,1970.0,-37.80225,144.9958,6567.0
75%,4.0,1325000.0,12.4,3147.0,3.0,2.0,2.0,628.0,170.0,2000.0,-37.7582,145.0527,10175.0
max,8.0,9000000.0,47.4,3977.0,9.0,8.0,10.0,37000.0,3112.0,2018.0,-37.45709,145.52635,21650.0


In [None]:
#defining prediction varibale or 'y'
y = melb_house_data.Price
print(y)

1        1035000.0
2        1465000.0
4        1600000.0
6        1876000.0
7        1636000.0
           ...    
12205     601000.0
12206    1050000.0
12207     385000.0
12209     560000.0
12212    2450000.0
Name: Price, Length: 6196, dtype: float64


In [None]:
#defining factors or 'X'
features = ['Rooms', 'Distance', 'Bedroom2', 'Bathroom', 'Landsize', 'BuildingArea', 'YearBuilt']
X = melb_house_data[features]
X.head()

Unnamed: 0,Rooms,Distance,Bedroom2,Bathroom,Landsize,BuildingArea,YearBuilt
1,2,2.5,2.0,1.0,156.0,79.0,1900.0
2,3,2.5,3.0,2.0,134.0,150.0,1900.0
4,4,2.5,3.0,1.0,120.0,142.0,2014.0
6,3,2.5,4.0,2.0,245.0,210.0,1910.0
7,2,2.5,2.0,1.0,256.0,107.0,1890.0


In [None]:
#spliting data into training and testing dataset
train_X, val_X, train_y, val_y = train_test_split(X, y, random_state = 1)


In [None]:
#selecting model
melb_model = DecisionTreeRegressor(random_state = 1)

In [None]:
#fitting model
melb_model.fit(train_X, train_y)

DecisionTreeRegressor(random_state=1)

In [None]:
#making predictions
predictions = melb_model.predict(val_X)
print(predictions)

[ 450000. 1205000.  535000. ... 1435000.  815000. 1425000.]


In [None]:
#validating predictions
mae = mean_absolute_error(predictions, val_y)
print(mae)

320402.75919948355


In [26]:
#finding out best size for our decision tree

def best_size(max_leaf, train_X, val_X, train_y, val_y):
  temp_model = DecisionTreeRegressor(max_leaf_nodes = max_leaf, random_state = 1)
  temp_model.fit(train_X, train_y)
  temp_predictions = temp_model.predict(val_X)
  temp_mae = mean_absolute_error(temp_predictions, val_y)
  return temp_mae

leaf_sizes = [5, 25, 50 ,100, 250, 500, 750, 1000, 2500, 5000]
scores = {}
for i in leaf_sizes:
  i_mae = (best_size(i, train_X, val_X, train_y, val_y))
  scores[i] = i_mae
best_tree_size = min(scores, key = scores.get)
print(best_tree_size)

100


In [28]:
#creating final model with best tree size
final_model = DecisionTreeRegressor(max_leaf_nodes = best_tree_size, random_state = 1)
final_model.fit(X, y)
final_predictions = final_model.predict(X)
final_mae = mean_absolute_error(final_predictions, y)
print(final_mae)

225523.8433246413


**END**
