In [33]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor as DTR
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import mean_absolute_error

In [20]:
house_data = pd.read_csv("Housing.csv")
house_data.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,2
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,2
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,2
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,2


In [21]:
y = house_data.price
y

0      13300000
1      12250000
2      12250000
3      12215000
4      11410000
         ...   
540     1820000
541     1767150
542     1750000
543     1750000
544     1750000
Name: price, Length: 545, dtype: int64

In [22]:
house_data.columns

Index(['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',
       'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
       'parking', 'prefarea', 'furnishingstatus'],
      dtype='object')

In [41]:
features = ['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad','guestroom', 'basement', 'hotwaterheating', 'airconditioning','parking', 'prefarea', 'furnishingstatus']
X = house_data[features]
X.head()

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,7420,4,2,3,1,0,0,0,1,2,1,2
1,8960,4,4,4,1,0,0,0,1,3,0,2
2,9960,3,2,2,1,0,1,0,0,2,1,1
3,7500,4,2,2,1,0,1,0,1,3,1,2
4,7420,4,1,2,1,1,1,0,1,2,0,2


In [61]:
def best_tree_depth(max_nodes, train_X, val_X, train_y, val_y):
    house_model = DTR(max_leaf_nodes=max_nodes, random_state=1)
    house_model.fit(train_X, train_y)
    predictions = house_model.predict(val_X)
    return [mean_absolute_error(val_y, predictions), predictions]
train_X, val_X, train_y, val_y = tts(X, y, random_state=1)
trees = [5, 25, 50, 75, 100, 500, 1000, 1500, 2000]
trees_depth = {}
for tree in trees:
    depth = best_tree_depth(tree, train_X, val_X, train_y, val_y)
    trees_depth[tree] = depth[0]
print(trees_depth)
print(min(trees_depth, key=trees_depth.get))

{5: 1065059.2247578695, 25: 960603.8492883168, 50: 926163.9924369061, 75: 1002276.3043828676, 100: 1019723.0810827027, 500: 1102189.3430656935, 1000: 1102189.3430656935, 1500: 1102189.3430656935, 2000: 1102189.3430656935}
50


In [62]:
best_tree_depth(50, train_X, val_X, train_y, val_y)

[926163.9924369061,
 array([7050615.38461538, 6088250.        , 7050615.38461538,
        5899250.        , 4300578.94736842, 3822700.        ,
        2884456.52173913, 6088250.        , 2856000.        ,
        4473466.66666667, 3699888.88888889, 3374233.33333333,
        3374233.33333333, 3822700.        , 8680000.        ,
        7794420.        , 3698692.30769231, 7313833.33333333,
        5624062.5       , 2884456.52173913, 3822700.        ,
        4662000.        , 4217500.        , 3668913.04347826,
        4982600.        , 5899250.        , 2884456.52173913,
        7182000.        , 5939500.        , 3668913.04347826,
        7050615.38461538, 7050615.38461538, 4217500.        ,
        4606350.        , 6088250.        , 3668913.04347826,
        4982600.        , 2884456.52173913, 4473466.66666667,
        4594333.33333333, 3140666.66666667, 3822700.        ,
        3699888.88888889, 4473466.66666667, 8771000.        ,
        4606350.        , 4606350.        , 314066

In [63]:
from sklearn.ensemble import RandomForestRegressor

In [64]:
house_model_rf = RandomForestRegressor(random_state=1)

In [65]:
house_model_rf.fit(train_X, train_y)
predictions1 = house_model_rf.predict(val_X)
print(predictions1)

[7459159.4        5725580.         7825790.         6182050.
 4278610.         3396575.         2833600.         5659319.4
 2890534.5        4646250.         3589880.         3111710.
 4520390.         4069345.         6944309.4        7983455.2
 4300450.         4371255.         6019335.         2628430.
 3965220.         4864790.         4698680.         3386600.
 5265365.         6967135.         2630600.         7762924.4
 6901300.         3570248.5        6804688.8        6796522.6
 3932040.         4391415.         5333084.4        3949820.
 6258560.         2711030.         5115600.         5595450.
 4827445.         3452540.         3422440.         4708305.
 6641460.         3475010.         5219480.         3039960.
 2764324.5        2281300.         4405065.         4017895.
 2503445.         3836490.         4674530.         4471460.
 3090080.         4424070.         4512480.         4910920.
 6724900.         5698589.4        2511950.         4476360.
 4443635.         22

In [60]:
mean_absolute_error(val_y, predictions1)

870579.2462287105