1. Prepare dataset

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn import metrics

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import RepeatedStratifiedKFold

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
# load data function
def load_dataset(filename):
    # load data from csv file
    df_data = pd.read_csv(filename, header=None)
    # convert df to numpy array
    dataset = df_data.values

    # split dataset into input (X) and output (y)
    X = dataset[:, :-1]
    y = dataset[:, -1]
     
    return X, y

In [4]:
# load data
X, y = load_dataset('housing.csv')
print(X.shape)

(506, 13)


In [5]:
# split data into train & test set
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=1)

2. Train model

2.1 Model DecisionTreeRegressor

In [6]:

# declare model object
dtr_ml = DecisionTreeRegressor() 

# train model
dtr_ml.fit(X_train, y_train)


In [7]:
# evaluate model

# predict on test set
y_pred = dtr_ml.predict(X_test)

# find the accuracy of predcition using training data

print("Training Accuracy:", dtr_ml.score(X_train,y_train))

accuracy = dtr_ml.score(X_test, y_test)
print("Testing Accuracy:", accuracy)
# evaluate predictions

mse = mean_squared_error(y_test, y_pred)
print(f"Model MSE score: {mse}")

# evaluation predictions
mae = mean_absolute_error(y_test, y_pred)
print(f"Model MAE score: {mae}")

Training Accuracy: 1.0
Testing Accuracy: 0.8502578552598262
Model MSE score: 13.724539473684212
Model MAE score: 2.6861842105263154


In [18]:
# use crosss validation
# declare object 

dtr_ml = DecisionTreeRegressor() 

# define the model evaluation procedure
cv=KFold(n_splits=3, shuffle=True, random_state=1)

# evalue model
result = cross_val_score(dtr_ml, X, y, cv=cv)


print(f"Cross validation score: {result.mean()}")

Cross validation score: 0.7006795442912863


2.2 RandomForestRegressor Model

In [19]:
rdf_ml = RandomForestRegressor(criterion='absolute_error',n_jobs=-1, n_estimators=10,max_depth=6, min_samples_leaf=1, random_state=3)

In [21]:
rdf_ml.fit(X_train,y_train)

In [22]:
# predict the output for test data
y_pred = rdf_ml.predict(X_test)
# find the accuracy of predcition using training data
accuracy = rdf_ml.score(X_test, y_test)
# compute the Mean Square error using MSE function from sklearn.metrics module.
mse = mean_squared_error(y_test, y_pred)

# evaluation predictions
mae = mean_absolute_error(y_test, y_pred)


# print the final results
print("Training Accuracy:", rdf_ml.score(X_train,y_train))
print("Testing Accuracy:", accuracy)
print(f"Model MSE score: {mse}")
print(f"Model MAE score: {mae}")

Training Accuracy: 0.924433473562793
Testing Accuracy: 0.8931270757281005
Model MSE score: 9.795383059210529
Model MAE score: 2.329111842105263


In [23]:
# use crosss validation
# declare object 

rdf_ml = RandomForestRegressor(criterion='absolute_error',n_jobs=-1, n_estimators=10,max_depth=6, min_samples_leaf=1, random_state=3)

# define the model evaluation procedure
cv=KFold(n_splits=3, shuffle=True, random_state=3)

# evalue model
result = cross_val_score(rdf_ml, X, y, cv=cv)


print(f"Cross validation score: {result.mean()}")

Cross validation score: 0.8352786744628776
