In [41]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
dataset = pd.read_csv('Housing.csv', quoting=3)

In [42]:
X = dataset.drop(columns='price').values
y = dataset['price'].values

In [43]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, StandardScaler




steps = [
         ('mainroad', OrdinalEncoder(categories=[["no", "yes"]],dtype=np.int8),[4]),
         ('guestroom', OrdinalEncoder(categories=[["no", "yes"]],dtype=np.int8),[5]),
         ('basement', OrdinalEncoder(categories=[["no", "yes"]],dtype=np.int8),[6]),
         ('hotwater', OrdinalEncoder(categories=[["no", "yes"]],dtype=np.int8),[7]),
         ('airconditioning', OrdinalEncoder(categories=[["no", "yes"]],dtype=np.int8),[8]),
         ('prefarea', OrdinalEncoder(categories=[["no", "yes"]],dtype=np.int8), [10]),
         ('furnishing', OrdinalEncoder(categories=[["unfurnished", "semi-furnished", "furnished"]], dtype=np.int8), [11])
         ]

transformer = ColumnTransformer(transformers=steps, remainder='passthrough')


In [44]:
from sklearn.linear_model import ElasticNet

regressor = ElasticNet(random_state=100)


In [45]:
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score

folds = 10
kf = KFold(random_state=100,shuffle=True, n_splits=folds)
i=1
r2s = []
mapes = []
for train, test in kf.split(X):
    X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test]
    scaler = StandardScaler()
    X_train = transformer.fit_transform(X_train)
    X_test = transformer.transform(X_test)
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    y_train = scaler.fit_transform(y_train.reshape(-1,1))
    y_test = scaler.transform(y_test.reshape(-1,1))
    regressor.fit(X_train,y_train)
    y_pred = regressor.predict(X_test)
    r2 = r2_score(y_test,y_pred)
    mape = mean_squared_error(y_test,y_pred)
    mape = np.sqrt(mape)
    r2s.append(r2)
    mapes.append(mape)
    print("FOLD: "+str(i))
    print("R squared: "+ str(r2))
    print("Root Mean Squared Error :  "+ str(mape))
    print()
    i+=1

FOLD: 1
R squared: 0.03669064839927638
Root Mean Squared Error :  1.020573187721559

FOLD: 2
R squared: 0.015751795400773094
Root Mean Squared Error :  0.9974486581273214

FOLD: 3
R squared: 0.017905046712131445
Root Mean Squared Error :  1.1284330406004148

FOLD: 4
R squared: 0.019427004860909203
Root Mean Squared Error :  1.030811061469578

FOLD: 5
R squared: 0.000903741963211746
Root Mean Squared Error :  0.9458017446735189

FOLD: 6
R squared: 0.033986115498691016
Root Mean Squared Error :  0.7494512882347261

FOLD: 7
R squared: -0.029851070813603053
Root Mean Squared Error :  0.8388699065647447

FOLD: 8
R squared: 0.0027900091471786403
Root Mean Squared Error :  0.9308178311344145

FOLD: 9
R squared: 0.03108618069635949
Root Mean Squared Error :  0.7427632352934569

FOLD: 10
R squared: 0.010637082534359132
Root Mean Squared Error :  1.3875515725902825



In [46]:
results = [type(regressor).__name__, folds,np.mean(r2s).round(4), np.std(r2s).round(4) , np.mean(mapes).round(4), np.std(mapes).round(4)]

In [47]:
print("Regressor: "+ results[0])
print("Number of folds: " + str(results[1]))
print("Mean R squared: " + str(results[2]))
print("STD R squared: "+ str(results[3]))
print("Mean RMSE: " + str(results[4]))
print("STD RMSE: "+ str(results[5]))

Regressor: ElasticNet
Number of folds: 10
Mean R squared: 0.0139
STD R squared: 0.0187
Mean RMSE: 0.9773
STD RMSE: 0.1804


In [48]:
from csv import writer

with open('results.csv','a') as f_object:
    writer_object = writer(f_object)
    writer_object.writerow(results)
    f_object.close()