In [1]:
from sklearn.datasets import fetch_openml
import pandas as pd 
import numpy as np

data = fetch_openml(name="house_prices", as_frame=True)
X, y = data.data, data.target

In [2]:
house_prices_data = pd.concat([X, y], axis = 1)

house_prices_data.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


### Cleaning Data

In [3]:
print(f'NA Data:')
print(house_prices_data.isna().sum())
print()
print(f'Data shape    : {house_prices_data.shape}')
print()
print(f'Duplicate Data: {house_prices_data.duplicated().sum()}')


NA Data:
Id                 0
MSSubClass         0
MSZoning           0
LotFrontage      259
LotArea            0
                ... 
MoSold             0
YrSold             0
SaleType           0
SaleCondition      0
SalePrice          0
Length: 81, dtype: int64

Data shape    : (1460, 81)

Duplicate Data: 0


Hasil print Data NA tidak dapat menampilkan gambaran data NA yang lebih jelas. Pakai cara berikut. 

In [4]:
columns_with_na = []

for i in house_prices_data.columns: 
    if house_prices_data[i].isna().any(): 
        columns_with_na.append(i)

print(columns_with_na)
print()
print(f'Jumlah kolom yang memiliki data NA: {len(columns_with_na)}')

['LotFrontage', 'Alley', 'MasVnrType', 'MasVnrArea', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Electrical', 'FireplaceQu', 'GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageQual', 'GarageCond', 'PoolQC', 'Fence', 'MiscFeature']

Jumlah kolom yang memiliki data NA: 19


Karena jumlah features ada 81, maka 19 features yang memiliki jumlah NA, dihapus. Mungkin yang terhapus adalah feature-feature yang penting. Mengingat ini adalah test code, maka aspek performansi diabaikan. Yang terpenting adalah apakah algoritma yang dibangun secara scratch dapat berfungsi atau tidak. 

In [5]:
house_prices_data = house_prices_data.drop_duplicates(keep='last')
house_prices_data = house_prices_data.drop(columns = columns_with_na)

print(f'Data setelah 19 feature di-drop: {house_prices_data.shape}')

Data setelah 19 feature di-drop: (1460, 62)


In [6]:
X = house_prices_data.drop('SalePrice', axis=1)
y = house_prices_data['SalePrice']

### Splitting

In [7]:
from sklearn.model_selection import train_test_split 

X_train, X_not_train, y_train, y_not_train = train_test_split(X, y, test_size=0.2, random_state=123)

X_valid, X_test, y_valid, y_test           = train_test_split(X_not_train, y_not_train, test_size=0.5, random_state=123)

print('X train shape:', X_train.shape)
print('y train shape:', y_train.shape)
print('X test shape :', X_test.shape)
print('y test shape :', y_test.shape)
print('X valid shape:', X_valid.shape)
print('y valid shape:', y_valid.shape)

X train shape: (1168, 61)
y train shape: (1168,)
X test shape : (146, 61)
y test shape : (146,)
X valid shape: (146, 61)
y valid shape: (146,)


In [8]:
# Check if the column is numeric

num_features = []
cat_features = []

for i in X_train.columns:
    if pd.api.types.is_numeric_dtype(X_train[i]):
        num_features.append(i)
    else:
        cat_features.append(i)


In [9]:
cat_features

['MSZoning',
 'Street',
 'LotShape',
 'LandContour',
 'Utilities',
 'LotConfig',
 'LandSlope',
 'Neighborhood',
 'Condition1',
 'Condition2',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'Exterior2nd',
 'ExterQual',
 'ExterCond',
 'Foundation',
 'Heating',
 'HeatingQC',
 'CentralAir',
 'KitchenQual',
 'Functional',
 'PavedDrive',
 'SaleType',
 'SaleCondition']

In [10]:
num_features

['Id',
 'MSSubClass',
 'LotArea',
 'OverallQual',
 'OverallCond',
 'YearBuilt',
 'YearRemodAdd',
 'BsmtFinSF1',
 'BsmtFinSF2',
 'BsmtUnfSF',
 'TotalBsmtSF',
 '1stFlrSF',
 '2ndFlrSF',
 'LowQualFinSF',
 'GrLivArea',
 'BsmtFullBath',
 'BsmtHalfBath',
 'FullBath',
 'HalfBath',
 'BedroomAbvGr',
 'KitchenAbvGr',
 'TotRmsAbvGrd',
 'Fireplaces',
 'GarageCars',
 'GarageArea',
 'WoodDeckSF',
 'OpenPorchSF',
 'EnclosedPorch',
 '3SsnPorch',
 'ScreenPorch',
 'PoolArea',
 'MiscVal',
 'MoSold',
 'YrSold']

### Scaling & One Hot Encoder

In [11]:
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, StandardScaler

y_scaler = StandardScaler()

y_train_scaled = y_scaler.fit_transform(y_train.values.reshape(-1, 1)).flatten()
y_valid_scaled = y_scaler.transform(y_valid.values.reshape(-1, 1)).flatten()
y_test_scaled  = y_scaler.transform(y_test.values.reshape(-1, 1)).flatten()

In [12]:
# OHE for categorical features
cat_encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False, drop='first') 
X_train_cat = cat_encoder.fit_transform(X_train[cat_features])
X_valid_cat = cat_encoder.transform(X_valid[cat_features])
X_test_cat  = cat_encoder.transform(X_test[cat_features])

# Take the column names 
cat_feature_names = cat_encoder.get_feature_names_out(cat_features)

# Scale numerical features
num_scaler = MinMaxScaler()
X_train_num= num_scaler.fit_transform(X_train[num_features])
X_valid_num= num_scaler.transform(X_valid[num_features])
X_test_num = num_scaler.transform(X_test[num_features])

# combine cat and num into df
X_train_processed = pd.DataFrame(np.hstack([X_train_cat, X_train_num]), 
                                 columns = list(cat_feature_names) + num_features,
                                 index = X_train.index)

X_valid_processed = pd.DataFrame(np.hstack([X_valid_cat, X_valid_num]), 
                                 columns = list(cat_feature_names) + num_features,
                                 index = X_valid.index)


X_test_processed = pd.DataFrame(np.hstack([X_test_cat, X_test_num]), 
                                 columns = list(cat_feature_names) + num_features,
                                 index = X_test.index)

                            





In [13]:
X_train_processed.describe()

Unnamed: 0,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Pave,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,...,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold
count,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,...,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0
mean,0.04024,0.011986,0.784247,0.154966,0.994863,0.025685,0.006849,0.646404,0.03339,0.024829,...,0.330909,0.113289,0.083624,0.038234,0.007148,0.029716,0.004673,0.003064,0.481164,0.464683
std,0.196605,0.10887,0.41152,0.362027,0.071519,0.158261,0.082512,0.47829,0.179731,0.15567,...,0.14979,0.149245,0.120064,0.108819,0.060636,0.114725,0.060836,0.035345,0.244391,0.333255
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.23237,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.363636,0.25
50%,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.337094,0.0,0.043876,0.0,0.0,0.0,0.0,0.0,0.454545,0.5
75%,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.406206,0.196033,0.121115,0.0,0.0,0.0,0.0,0.0,0.636364,0.75
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [14]:
X_valid_processed.describe()

Unnamed: 0,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Pave,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,...,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold
count,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,...,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0
mean,0.082192,0.006849,0.773973,0.136986,1.0,0.041096,0.006849,0.589041,0.020548,0.020548,...,0.348784,0.093269,0.089567,0.052462,0.002427,0.03994,0.0,0.001025,0.503113,0.388699
std,0.275602,0.082761,0.419697,0.345016,0.0,0.199195,0.082761,0.493701,0.142354,0.142354,...,0.157743,0.133381,0.119466,0.127699,0.029325,0.118101,0.0,0.006196,0.2383,0.330922
min,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.231135,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.363636,0.0
50%,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.336389,0.0,0.050274,0.0,0.0,0.0,0.0,0.0,0.454545,0.375
75%,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.418371,0.186698,0.128885,0.0,0.0,0.0,0.0,0.0,0.636364,0.75
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.956276,0.669778,0.6234,0.576087,0.354331,0.597917,0.0,0.045161,1.0,1.0


In [15]:
X_test_processed.describe()

Unnamed: 0,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Pave,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,...,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold
count,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,...,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0
mean,0.041096,0.006849,0.835616,0.116438,1.0,0.034247,0.006849,0.575342,0.054795,0.027397,...,0.33949,0.100118,0.094463,0.039384,0.00751,0.036102,0.0,0.002519,0.485679,0.433219
std,0.199195,0.082761,0.371899,0.321854,0.0,0.182488,0.082761,0.495992,0.228362,0.1638,...,0.151552,0.13305,0.131182,0.107536,0.05514,0.125599,0.0,0.014562,0.264438,0.316624
min,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.258992,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.272727,0.25
50%,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.342031,0.0,0.051188,0.0,0.0,0.0,0.0,0.0,0.454545,0.5
75%,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.422426,0.186114,0.144424,0.0,0.0,0.0,0.0,0.0,0.636364,0.75
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.74189,0.497083,0.665448,0.478261,0.482283,0.854167,0.0,0.129032,1.0,1.0


In [16]:
X_train_processed[cat_feature_names]

Unnamed: 0,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Pave,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,...,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
318,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
580,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
961,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
78,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
5,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1041,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1122,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1346,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1406,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0


In [17]:
X_valid_processed[cat_feature_names]

Unnamed: 0,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Pave,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,...,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
995,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
680,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
694,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
388,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1296,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
985,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
167,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
154,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0


In [18]:
X_test_processed[cat_feature_names]

Unnamed: 0,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Pave,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,...,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
141,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
384,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
973,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
784,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1126,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1011,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
734,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
529,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
1108,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [19]:
# convert data from data frame to numpy array 

X_train_processed = X_train_processed.values
X_valid_processed = X_valid_processed.values
X_test_processed  = X_test_processed.values

### Modeling

In [20]:
# custom model
from Linear_Regression import linear_regression
from KNN_Regressor import KNearest_Neighbors_Regressor
from SVR_Regressor import SVr_

from DecTree_Regressor import Decision_Tree_Regressor
from RandomForest_Regressor import Random_Forest_Regressor
from AdaBoost_Regressor import Adaboost_Regressor

# sklearn model
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor

# metrics
from sklearn.metrics import mean_squared_error

##### Linear Regression

In [21]:
# Custom Linear Regression
lre_cus = linear_regression(learning_rate=0.001, n_iterations=1000)
lre_cus.fit(X_train_processed, y_train_scaled)

# Sklearn Linear Regression
lre_skl = LinearRegression()
lre_skl.fit(X_train_processed, y_train_scaled)

# Predictions for validation and test sets
cus_y_pred_valid = lre_cus.predict(X_valid_processed)
cus_y_pred_test  = lre_cus.predict(X_test_processed)

skl_y_pred_valid = lre_skl.predict(X_valid_processed)
skl_y_pred_test  = lre_skl.predict(X_test_processed)

# Calculate Mean Squared Error (MSE) for validation and test sets
cus_err_valid = mean_squared_error(y_valid_scaled, cus_y_pred_valid)
cus_err_test  = mean_squared_error(y_test_scaled, cus_y_pred_test)

skl_err_valid = mean_squared_error(y_valid_scaled, skl_y_pred_valid)
skl_err_test  = mean_squared_error(y_test_scaled, skl_y_pred_test)

# Print Results
print('Custom Linear Regression MS Error Valid  :', cus_err_valid)
print('Custom Linear Regression MS Error Test   :', cus_err_test)
print()
print('SKLearn Linear Regression MS Error Valid :', skl_err_valid)
print('SKLearn Linear Regression MS Error Test  :', skl_err_test)


Custom Linear Regression MS Error Valid  : 0.3584134389463394
Custom Linear Regression MS Error Test   : 0.6151909362153132

SKLearn Linear Regression MS Error Valid : 0.08546733732770194
SKLearn Linear Regression MS Error Test  : 0.13995697739673346


##### KNN Regressor

In [22]:
# Custom KNN Regressor
knn_cus = KNearest_Neighbors_Regressor(k_value=7, distance_metric='manhattan', average_metric='weighted mean')
knn_cus.fit(X_train_processed, y_train_scaled)

# Sklearn KNN Regressor
knn_skl = KNeighborsRegressor(n_neighbors=5) 
knn_skl.fit(X_train_processed, y_train_scaled)

# Predictions for validation and test sets
cus_y_pred_valid = knn_cus.predict(X_valid_processed)
cus_y_pred_test  = knn_cus.predict(X_test_processed)

skl_y_pred_valid = knn_skl.predict(X_valid_processed)
skl_y_pred_test  = knn_skl.predict(X_test_processed)

# Calculate Mean Squared Error (MSE) for validation and test sets
cus_err_valid = mean_squared_error(y_valid_scaled, cus_y_pred_valid)
cus_err_test  = mean_squared_error(y_test_scaled, cus_y_pred_test)

skl_err_valid = mean_squared_error(y_valid_scaled, skl_y_pred_valid)
skl_err_test  = mean_squared_error(y_test_scaled, skl_y_pred_test)

# Print Results
print('Custom KNN Regressor MSE Valid :', cus_err_valid)
print('Custom KNN Regressor MSE Test  :', cus_err_test)
print()
print('SKLearn KNN Regressor MSE Valid:', skl_err_valid)
print('SKLearn KNN Regressor MSE Test :', skl_err_test)

Custom KNN Regressor MSE Valid : 0.19210221674129116
Custom KNN Regressor MSE Test  : 0.32192013477391795

SKLearn KNN Regressor MSE Valid: 0.2275365756071374
SKLearn KNN Regressor MSE Test : 0.46342819031431737


##### SV Regressor

In [23]:
# Custom SV Regressor
svr_cus = SVr_(lambda_param=0.001, learning_rate=0.001, num_of_iters=1000, epsilon=0.1) 
svr_cus.fit(X_train_processed, y_train_scaled)

# Sklearn SV Regressor
svr_skl = SVR()
svr_skl.fit(X_train_processed, y_train_scaled)

# Predictions for validation and test sets
cus_y_pred_valid = svr_cus.predict(X_valid_processed)
cus_y_pred_test  = svr_cus.predict(X_test_processed)

skl_y_pred_valid = svr_skl.predict(X_valid_processed)
skl_y_pred_test  = svr_skl.predict(X_test_processed)

# Calculate Mean Squared Error (MSE) for validation and test sets
cus_err_valid = mean_squared_error(y_valid_scaled, cus_y_pred_valid)
cus_err_test  = mean_squared_error(y_test_scaled, cus_y_pred_test)

skl_err_valid = mean_squared_error(y_valid_scaled, skl_y_pred_valid)
skl_err_test  = mean_squared_error(y_test_scaled, skl_y_pred_test)

# Print Results
print('Custom SV Regressor MSE Valid :', cus_err_valid)
print('Custom SV Regressor MSE Test  :', cus_err_test)
print()
print('SKLearn SV Regressor MSE Valid:', skl_err_valid)
print('SKLearn SV Regressor MSE Test :', skl_err_test)


Custom SV Regressor MSE Valid : 0.08618370172879644
Custom SV Regressor MSE Test  : 0.17147148444441604

SKLearn SV Regressor MSE Valid: 0.08365721898603286
SKLearn SV Regressor MSE Test : 0.21245351993110312


##### Decision Tree

In [24]:
# Custom Decision Tree Regressor
dtr_cus = Decision_Tree_Regressor(max_depth=100)
dtr_cus.fit(X_train_processed, y_train_scaled)

# Sklearn Decision Tree Regressor
dtr_skl = DecisionTreeRegressor(max_depth=5)
dtr_skl.fit(X_train_processed, y_train_scaled)

# Predictions for validation and test sets
cus_y_pred_valid = dtr_cus.predict(X_valid_processed)
cus_y_pred_test  = dtr_cus.predict(X_test_processed)

skl_y_pred_valid = dtr_skl.predict(X_valid_processed)
skl_y_pred_test  = dtr_skl.predict(X_test_processed)

# Calculate Mean Squared Error (MSE) for validation and test sets
cus_err_valid = mean_squared_error(y_valid_scaled, cus_y_pred_valid)
cus_err_test  = mean_squared_error(y_test_scaled, cus_y_pred_test)

skl_err_valid = mean_squared_error(y_valid_scaled, skl_y_pred_valid)
skl_err_test  = mean_squared_error(y_test_scaled, skl_y_pred_test)

# Print Results
print('Custom Decision Tree Regressor MSE Valid :', cus_err_valid)
print('Custom Decision Tree Regressor MSE Test  :', cus_err_test)
print()
print('SKLearn Decision Tree Regressor MSE Valid:', skl_err_valid)
print('SKLearn Decision Tree Regressor MSE Test :', skl_err_test)


Custom Decision Tree Regressor MSE Valid : 0.15945952995369123
Custom Decision Tree Regressor MSE Test  : 0.27847324098689896

SKLearn Decision Tree Regressor MSE Valid: 0.2296303475769851
SKLearn Decision Tree Regressor MSE Test : 0.27305882380428764


##### Random Forest

In [25]:
# Custom Random Forest Regressor
rfo_cus = Random_Forest_Regressor(n_trees=10, max_depth=5)
rfo_cus.fit(X_train_processed, y_train_scaled)

# Sklearn Random Forest Regressor
rfo_skl = RandomForestRegressor(n_estimators=10, max_depth=5)
rfo_skl.fit(X_train_processed, y_train_scaled)

# Predictions for validation and test sets
cus_y_pred_valid = rfo_cus.predict(X_valid_processed)
cus_y_pred_test  = rfo_cus.predict(X_test_processed)

skl_y_pred_valid = rfo_skl.predict(X_valid_processed)
skl_y_pred_test  = rfo_skl.predict(X_test_processed)

# Calculate Mean Squared Error (MSE) for validation and test sets
cus_err_valid = mean_squared_error(y_valid_scaled, cus_y_pred_valid)
cus_err_test  = mean_squared_error(y_test_scaled, cus_y_pred_test)

skl_err_valid = mean_squared_error(y_valid_scaled, skl_y_pred_valid)
skl_err_test  = mean_squared_error(y_test_scaled, skl_y_pred_test)

# Print Results
print('Custom Random Forest Regressor MSE Valid :', cus_err_valid)
print('Custom Random Forest Regressor MSE Test  :', cus_err_test)
print()
print('SKLearn Random Forest Regressor MSE Valid:', skl_err_valid)
print('SKLearn Random Forest Regressor MSE Test :', skl_err_test)


Custom Random Forest Regressor MSE Valid : 0.1649024976758325
Custom Random Forest Regressor MSE Test  : 0.18017615404492685

SKLearn Random Forest Regressor MSE Valid: 0.15558307376130154
SKLearn Random Forest Regressor MSE Test : 0.15793751293698582


##### Ada Boost Regressor

In [26]:
# Custom AdaBoost Regressor
ada_cus = Adaboost_Regressor(n_clf=10)
ada_cus.fit(X_train_processed, y_train_scaled)

# Sklearn AdaBoost Regressor
ada_skl = AdaBoostRegressor(n_estimators=10)
ada_skl.fit(X_train_processed, y_train_scaled)

# Predictions for validation and test sets
cus_y_pred_valid = ada_cus.predict(X_valid_processed)
cus_y_pred_test  = ada_cus.predict(X_test_processed)

skl_y_pred_valid = ada_skl.predict(X_valid_processed)
skl_y_pred_test  = ada_skl.predict(X_test_processed)

# Calculate Mean Squared Error (MSE) for validation and test sets
cus_err_valid = mean_squared_error(y_valid_scaled, cus_y_pred_valid)
cus_err_test  = mean_squared_error(y_test_scaled, cus_y_pred_test)

skl_err_valid = mean_squared_error(y_valid_scaled, skl_y_pred_valid)
skl_err_test  = mean_squared_error(y_test_scaled, skl_y_pred_test)

# Print Results
print('Custom AdaBoost Regressor MSE Valid :', cus_err_valid)
print('Custom AdaBoost Regressor MSE Test  :', cus_err_test)
print()
print('SKLearn AdaBoost Regressor MSE Valid:', skl_err_valid)
print('SKLearn AdaBoost Regressor MSE Test :', skl_err_test)


Custom AdaBoost Regressor MSE Valid : 2.249079907845352
Custom AdaBoost Regressor MSE Test  : 15.75304587552068

SKLearn AdaBoost Regressor MSE Valid: 0.23191369088370367
SKLearn AdaBoost Regressor MSE Test : 0.23207143251475693
