In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import  r2_score
import sklearn.metrics as mt
from  sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, LassoCV, Lasso, ElasticNet, ElasticNetCV
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
import joblib

In [2]:
df = pd.read_excel('house_ads_kayseri.xlsx')

df['Number Rooms'] = df['Number Rooms'].apply(lambda x: int(float(x.replace('weekend', '').replace('+1', '').strip())))
df['Floor'] = df['Floor'].apply(lambda x: x.replace('layers', '').strip())
df['Floor'] = df['Floor'].apply(lambda x: int(x.split('.')[0]) if x.split('.')[0].isdigit() else 0)
df['Size'] = df['Size'].apply(lambda x: x.replace('texture', '').replace('m2', '').strip())

df['Price'] = df['Price'].apply(lambda x: x.replace('.', '').replace('TL', '').strip())

df['Address'] = df['Address'].apply(lambda x: x.split('-')[1].strip() if len(x.split('-')) > 1 else None)

# Filter lines starting with 'event'
df = df[~df['Size'].str.startswith('event')]

# Save cleaned data to a new Excel file
df.to_excel('cleared_data_kayseri.xlsx', index=False)

In [4]:
data = pd.read_excel('cleared_data_kayseri.xlsx')


# Convert categorical column to dummy variables
data = pd.get_dummies(data, columns=['Address'], drop_first=False)

data.head()


Unnamed: 0,Number Rooms,Floor,Size,Price,Address_Kocasinan,Address_Melikgazi,Address_Talas
0,2,0,120,990000,1,0,0
1,3,11,165,2500000,0,1,0
2,3,9,150,1875000,1,0,0
3,4,7,200,2350000,0,0,1
4,3,9,165,2050000,1,0,0


In [5]:
y = data["Price"]
X = data.drop("Price", axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [6]:
def mse(prediction):
    result = mt.mean_squared_error(y_test, prediction)
    return result

def rmse(prediction):
    result = mt.mean_squared_error(y_test, prediction, squared = False)
    return result

def mae(prediction):
    result = mt.mean_absolute_error(y_test, prediction)
    return result

def confirmation(model):
    scores = cross_val_score(model, X, y, cv = 5)
    return scores.mean()

In [17]:
# Random Forest Regressor
rf = RandomForestRegressor()
rf.fit(X_train, y_train)

predictions = rf.predict(X_test)

# Calculate R-squared for training set
y_train_pred = rf.predict(X_train)
r2_train = r2_score(y_train, y_train_pred)

# Evaluating the model's performance
mse1 = mse(predictions)
rmse1 = rmse(predictions)
mae1 = mae(predictions)
score = confirmation(rf)
r2_test = r2_score(y_test, predictions)

                    
print("R-square Train:\t{}\nR-square Test:\t{}\nMSE:\t{}\nRMSE:\t{}\nMAE:\t{}\nConfirmation:\t{}".format(r2_train, r2_test,mse1, rmse1, mae1, score))



R-square Train:	0.9893104128886402
R-square Test:	0.9903292397042894
MSE:	3768726335.6035686
RMSE:	61389.95305099661
MAE:	15821.846393017619
Confirmation:	0.989592718015669


In [9]:
# Lineer
lr = LinearRegression()
lr.fit(X_train, y_train)
prediction = lr.predict(X_test)

r2_train = lr.score(X_train, y_train)
r2 = mt.r2_score(y_test, prediction)

mse1 = mse(prediction)
rmse1 = rmse(prediction)
mae1 = mae(prediction)
score = confirmation(lr)

#Retrieving the model
joblib.dump(lr,'house-data-lineer-model.pkl')

print("R-square Train:\t{}\nR-square Test:\t{}\nMSE:\t{}\nRMSE:\t{}\nMAE:\t{}\nConfirmation:\t{}".format(r2_train, r2,mse1, rmse1, mae1, score))


R-square Train:	0.6084756516911859
R-square Test:	0.6111284041404045
MSE:	151544509394.3938
RMSE:	389287.18113289296
MAE:	317038.8283137231
Confirmation:	0.6112516060896958


In [11]:
#Ridge Regression

lambdalar = 10**np.linspace(10,-2,100)*0.5
ridge_cv = RidgeCV(alphas = lambdalar, scoring = "r2")
ridge_cv.fit(X_train, y_train)


lr_rid = Ridge(alpha = 0.4348745013088917)
lr_rid.fit(X_train, y_train)


prediction2 = lr_rid.predict(X_test)

r2_rid_train = lr_rid.score(X_train,y_train)
r2_rid = mt.r2_score(y_test, prediction2)
mse_rid = mse(prediction2)
rmse_rid = rmse(prediction2)
mae_rid = mae(prediction2)
score_rid = confirmation(lr_rid)

print("R-square Train:\t{}\nR-square Test:\t{}\nMSE:\t{}\nRMSE:\t{}\nMAE:\t{}\nConfirmation:\t{}".format(r2_rid_train,r2_rid,mse_rid,rmse_rid,mae_rid,score_rid))


R-square Train:	0.608475160473701
R-square Test:	0.6111136659217116
MSE:	151550252925.53604
RMSE:	389294.55804767687
MAE:	317021.83291187015
Confirmation:	0.6112511999809119


In [12]:
#LASSO Regression

lamb = LassoCV(cv = 10, max_iter = 10000).fit(X_train,y_train).alpha_


lr_las = Lasso(alpha = 222.21405622009905)
lr_las.fit(X_train, y_train)
prediction3 = lr_las.predict(X_test)

r2_las_train = lr_las.score(X_train,y_train)
r2_las = mt.r2_score(y_test, prediction3)
mse_las = mse(prediction3)
rmse_las = rmse(prediction3)
mae_las = mae(prediction3)
score_las = confirmation(lr_las)

print("R-square Train:\t{}\nR-square Test:\t{}\nMSE:\t{}\nRMSE:\t{}\nMAE:\t{}\nConfirmation:\t{}".format(r2_las_train,r2_las,mse_las,rmse_las,rmse_las,score_las))


R-square Train:	0.6084727362029263
R-square Test:	0.6109380518297378
MSE:	151618690300.99435
RMSE:	389382.4473457867
MAE:	389382.4473457867
Confirmation:	0.6112487567186221


In [13]:
#Elastic Net Regression

lr_elas = ElasticNet(alpha = 0.1)
lr_elas.fit(X_train, y_train)
prediction4 = lr_elas.predict(X_test)

r2_elas_train = lr_elas.score(X_train,y_train)
r2_elas = mt.r2_score(y_test, prediction4)
mse_elas = mse(prediction4)
rmse_elas = rmse(prediction4)
mae_elas = mae(prediction4)
score_elas = confirmation(lr_elas)

print("R-square Train:\t{}\nR-square Test:\t{}\nMSE:\t{}\nRMSE:\t{}\nMAE:\t{}\nConfirmation:\t{}".format(r2_elas_train,r2_elas,mse_elas,rmse_elas,mae_elas,score_elas))


R-square Train:	0.6047394083723714
R-square Test:	0.6065275816762419
MSE:	153337464679.80365
RMSE:	391583.2793669868
MAE:	316835.72972072574
Confirmation:	0.6072823755776929


In [14]:
# Decision Tree Regressor
dt_reg = DecisionTreeRegressor(random_state=0)
dt_reg.fit(X_train, y_train)
prediction_dt = dt_reg.predict(X_test)

r2_dt_train = dt_reg.score(X_train, y_train)
r2_dt = r2_score(y_test, prediction_dt)
mse_dt = mse(prediction_dt)
rmse_dt = rmse(prediction_dt)
mae_dt = mae(prediction_dt)
score_dt = confirmation(dt_reg)

print("R-square Train:\t{}\nR-square Test:\t{}\nMSE:\t{}\nRMSE:\t{}\nMAE:\t{}\nConfirmation:\t{}".format(r2_dt_train, r2_dt, mse_dt, rmse_dt, mae_dt, score_dt))

R-square Train:	0.9893104233101516
R-square Test:	0.990327022834706
MSE:	3769590256.797583
RMSE:	61396.98898804063
MAE:	15823.262839879155
Confirmation:	0.9895907180916627


In [15]:
# Gradient Boosting Regressor

gb_reg = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_reg.fit(X_train, y_train)
prediction_gb = gb_reg.predict(X_test)

r2_gb_train = gb_reg.score(X_train, y_train)
r2_gb = r2_score(y_test, prediction_gb)
mse_gb = mse(prediction_gb)
rmse_gb = rmse(prediction_gb)
mae_gb = mae(prediction_gb)
score_gb = confirmation(gb_reg)


print("R-square Train:\t{}\nR-square Test:\t{}\nMSE:\t{}\nRMSE:\t{}\nMAE:\t{}\ncCnfirmation:\t{}".format(r2_gb_train, r2_gb, mse_gb, rmse_gb, mae_gb, score_gb))

R-square Train:	0.9887504600810885
R-square Test:	0.9897428680603153
MSE:	3997237247.8300495
RMSE:	63223.70795698438
MAE:	28260.97007540884
cCnfirmation:	0.9890531215619909
