In [25]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import KFold
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import ElasticNet
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.pipeline import Pipeline

import pandas as pd
import numpy as np

In [35]:
def train_gradient_boosting(X, y, n):
    pipeline = []
    pipeline.append(('estimator', GradientBoostingRegressor(n_estimators = 300)))
    pipeline = Pipeline(pipeline)
    
    adjusted_r2_train = []
    adjusted_r2_test = []
    
    MSE_train = []
    MSE_test = []
    
    MAE_train = []
    MAE_test = []
    
    kfold = KFold(n_splits=n, shuffle=True)
    
    for train_index, test_index in kfold.split(X):
        
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        pipeline.fit(X_train, y_train)
        
        y_train_predict = pipeline.predict(X_train)
        y_test_predict = pipeline.predict(X_test)
        
        MSE_train.append(mean_squared_error(y_train, y_train_predict))
        MSE_test.append(mean_squared_error(y_test, y_test_predict))
        
        y_train_score = pipeline.score(X_train, y_train)
        y_test_score = pipeline.score(X_test, y_test)
        
        adjusted_r2_train.append(1 - (1-y_train_score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1))
        adjusted_r2_test.append(1 - (1-y_test_score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
        
        MAE_train.append(mean_absolute_error(y_train, y_train_predict))
        MAE_test.append(mean_absolute_error(y_test, y_test_predict))
    
    print("Gradient Boosted Regressor Results:")
    print("Adjusted R^2 Train: {0}".format(np.mean(adjusted_r2_train)))
    print("Adjusted R^2 Test: {0}".format(np.mean(adjusted_r2_test)))
    print("RMSE Train: {0}".format(np.sqrt(np.mean(MSE_train))))
    print("RMSE Test: {0}".format(np.sqrt(np.mean(MSE_test))))
    print("MAE Train: {0}".format(np.mean(MAE_train)))
    print("MAE Test: {0}".format(np.mean(MAE_test)))
    
def train_elastic_net(X, y, n):
    pipeline = []
    pipeline.append(('estimator', ElasticNet(alpha=0.1)))
    pipeline = Pipeline(pipeline)
    
    adjusted_r2_train = []
    adjusted_r2_test = []
    
    MSE_train = []
    MSE_test = []
    
    MAE_train = []
    MAE_test = []
    
    kfold = KFold(n_splits=n, shuffle=True)
    
    for train_index, test_index in kfold.split(X):
        
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        pipeline.fit(X_train, y_train)
        
        y_train_predict = pipeline.predict(X_train)
        y_test_predict = pipeline.predict(X_test)
        
        MSE_train.append(mean_squared_error(y_train, y_train_predict))
        MSE_test.append(mean_squared_error(y_test, y_test_predict))
        
        y_train_score = pipeline.score(X_train, y_train)
        y_test_score = pipeline.score(X_test, y_test)
        
        adjusted_r2_train.append(1 - (1-y_train_score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1))
        adjusted_r2_test.append(1 - (1-y_test_score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
        
        MAE_train.append(mean_absolute_error(y_train, y_train_predict))
        MAE_test.append(mean_absolute_error(y_test, y_test_predict))
    
    print("Elastic Net Results:")
    print("Adjusted R^2 Train: {0}".format(np.mean(adjusted_r2_train)))
    print("Adjusted R^2 Test: {0}".format(np.mean(adjusted_r2_test)))
    print("RMSE Train: {0}".format(np.sqrt(np.mean(MSE_train))))
    print("RMSE Test: {0}".format(np.sqrt(np.mean(MSE_test))))
    print("MAE Train: {0}".format(np.mean(MAE_train)))
    print("MAE Test: {0}".format(np.mean(MAE_test)))
    
def train_neural_net(X, y, n):
    pipeline = []
    pipeline.append(('estimator', MLPRegressor(
        hidden_layer_sizes=(500,300, 100),
        solver='lbfgs',
        verbose=True,
        activation='relu',
        learning_rate='constant',
        early_stopping=True,
        max_iter=250,
        batch_size = 300)))
    pipeline = Pipeline(pipeline)
    
    adjusted_r2_train = []
    adjusted_r2_test = []
    
    MSE_train = []
    MSE_test = []
    
    MAE_train = []
    MAE_test = []
    
    kfold = KFold(n_splits=n, shuffle=True)
    
    for train_index, test_index in kfold.split(X):
        
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        pipeline.fit(X_train, y_train)
        
        y_train_predict = pipeline.predict(X_train)
        y_test_predict = pipeline.predict(X_test)
        
        MSE_train.append(mean_squared_error(y_train, y_train_predict))
        MSE_test.append(mean_squared_error(y_test, y_test_predict))
        
        y_train_score = pipeline.score(X_train, y_train)
        y_test_score = pipeline.score(X_test, y_test)
        
        adjusted_r2_train.append(1 - (1-y_train_score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1))
        adjusted_r2_test.append(1 - (1-y_test_score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
        
        MAE_train.append(mean_absolute_error(y_train, y_train_predict))
        MAE_test.append(mean_absolute_error(y_test, y_test_predict))
    
    print("Neural Net Results:")
    print("RMSE Train: {0}".format(np.sqrt(np.mean(MSE_train))))
    print("RMSE Test: {0}".format(np.sqrt(np.mean(MSE_test))))
    print("MAE Train: {0}".format(np.mean(MAE_train)))
    print("MAE Test: {0}".format(np.mean(MAE_test)))

In [3]:
data = pd.DataFrame(np.load('image_process_data.npy', allow_pickle=True))

In [4]:
data.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,461,462,463,464,465,466,467,468,469,470
0,575000.0,0,1,1,0,0,0,1,0,0,...,0,0.0,0,0,0,43.4633,0,0.0,0,0
1,1175000.0,1,1,0,1,0,0,1,0,0,...,0,0.0,0,0,0,0.0,0,0.0,0,0
2,2100000.0,2,1,1,1,0,0,1,0,0,...,0,0.0,0,0,0,0.0,0,0.0,0,0
3,1450000.0,3,1,0,1,0,0,0,0,0,...,0,0.0,0,0,0,0.0,0,0.0,0,0
4,450000.0,4,1,0,0,0,0,1,0,0,...,0,17.588,0,0,0,0.0,0,4.90104,0,0


In [16]:
prices = data[0]
house_info = data.drop(columns=[0, 2])

In [17]:
prices.head(5)

0       575000
1    1.175e+06
2      2.1e+06
3     1.45e+06
4       450000
Name: 0, dtype: object

In [18]:
house_info.head(5)

Unnamed: 0,1,3,4,5,6,7,8,9,10,11,...,461,462,463,464,465,466,467,468,469,470
0,0,1,0,0,0,1,0,0,0,0,...,0,0.0,0,0,0,43.4633,0,0.0,0,0
1,1,0,1,0,0,1,0,0,0,0,...,0,0.0,0,0,0,0.0,0,0.0,0,0
2,2,1,1,0,0,1,0,0,0,0,...,0,0.0,0,0,0,0.0,0,0.0,0,0
3,3,0,1,0,0,0,0,0,0,0,...,0,0.0,0,0,0,0.0,0,0.0,0,0
4,4,0,0,0,0,1,0,0,0,0,...,0,17.588,0,0,0,0.0,0,4.90104,0,0


In [24]:
train_gradient_boosting(house_info, prices, 5)

Gradient Boosted Regressor Results:
Adjusted R^2 Train: 0.7465568446427813
Adjusted R^2 Test: 0.6104627584444341
RMSE Train: 100808.53979588763
RMSE Test: 121414.15443202626
MAE Train: 58778.543382326294
MAE Test: 64858.52847110598


In [29]:
train_elastic_net(house_info, prices, 5)

Elastic Net Results:
Adjusted R^2 Train: 0.578953785125816
Adjusted R^2 Test: 0.5401758244657529
RMSE Train: 129928.08033434201
RMSE Test: 131905.78923254408
MAE Train: 73099.62446344952
MAE Test: 74253.25627655715


In [36]:
train_neural_net(house_info, prices, 5)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

Neural Net Results:
RMSE Train: 235758.80615387222
RMSE Test: 236413.85158283808
MAE Train: 157705.7656348957
MAE Test: 158046.9201820454
