In [1]:
import xgboost as xgb
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

data = pd.read_excel("data.xlsx")
# Separating independent variables and target variable
X = data.drop(['_id', 'Fiyat'], axis=1)
y = data['Fiyat']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# XGBoost Regressor
xg_reg = xgb.XGBRegressor(objective ='reg:squarederror', 
                          colsample_bytree = 0.3, 
                          learning_rate = 0.1,
                          max_depth = 4, 
                          n_estimators = 50)

# Training the model
xg_reg.fit(X_train,y_train)

# Making predictions
y_pred = xg_reg.predict(X_test)

errors = abs(y_pred - y_test)
error_percentage = (errors / y_test) * 100

# Displaying the predicted and actual prices
predictions = pd.DataFrame({'Gerçek Fiyat': y_test, 'Tahmin Edilen Fiyat': y_pred, 'Hata Oranı (%)': error_percentage})
pd.set_option('display.max_rows', None)
print(predictions)

# Error measurement
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("RMSE: %f" % (rmse))


       Gerçek Fiyat  Tahmin Edilen Fiyat  Hata Oranı (%)
20486        689000         6.662089e+05        3.307856
12320        498000         7.324533e+05       47.078978
8293         535000         7.228206e+05       35.106659
9798         815000         6.181124e+05       24.157983
4976         245000         3.993108e+05       62.984005
16409        445000         3.505907e+05       21.215576
40850       1280000         1.717279e+06       34.162412
37998        940000         1.059978e+06       12.763657
42263        475000         5.404904e+05       13.787447
43513        650000         5.515702e+05       15.143038
20404        435000         4.988507e+05       14.678312
34746        705000         8.776438e+05       24.488484
20513       1100000         1.287815e+06       17.074114
2174         620000         8.339610e+05       34.509839
16180        490000         5.059484e+05        3.254770
32896        485000         8.231851e+05       69.728879
36594        648000         5.5

In [2]:
import xgboost as xgb
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


data = pd.read_csv("output_file.csv")
# Separating independent variables and target variable
X = data.drop(['_id', 'Fiyat'], axis=1)
y = data['Fiyat']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# XGBoost Regressor
xg_reg = xgb.XGBRegressor(objective ='reg:squarederror', 
                          colsample_bytree = 0.3, 
                          learning_rate = 0.1,
                          max_depth = 4, 
                          alpha = 0, 
                          n_estimators = 50)

# Training the model
xg_reg.fit(X_train,y_train)

# Making predictions
y_pred = xg_reg.predict(X_test)

errors = abs(y_pred - y_test)
error_percentage = (errors / y_test) * 100

# Displaying the predicted and actual prices
predictions = pd.DataFrame({'Gerçek Fiyat': y_test, 'Tahmin Edilen Fiyat': y_pred, 'Hata Oranı (%)': error_percentage})
pd.set_option('display.max_rows', None)
print(predictions)

# Error measurement
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("RMSE: %f" % (rmse))


       Gerçek Fiyat  Tahmin Edilen Fiyat  Hata Oranı (%)
35043        530000         6.840892e+05       29.073432
34903        580000         5.546708e+05        4.367101
15570        486500         4.031861e+05       17.125167
24864        405000         2.761934e+05       31.804105
38541        638500         3.908018e+05       38.793765
24228        430000         3.046647e+05       29.147740
7094         820000         8.265006e+05        0.792759
39012        350000         2.788178e+05       20.337768
15956        759000         6.467956e+05       14.783193
399          620900         9.558954e+05       53.953193
15699       1058000         1.126929e+06        6.515064
32200        705000         4.371762e+05       37.989198
45661        640000         5.804038e+05        9.311914
38227        639000         6.158962e+05        3.615620
18622        575000         4.328454e+05       24.722543
14522        939000         6.106349e+05       34.969655
9444         529000         5.2

In [4]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split, cross_val_score, KFold

data = pd.read_csv("output_file.csv")

# Separating independent variables and target variable
X = data.drop(['_id', 'Fiyat'], axis=1)
y = data['Fiyat']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# XGBoost Regression Model
xg_reg = xgb.XGBRegressor(objective ='reg:squarederror', 
                          colsample_bytree = 0.3, 
                          learning_rate = 0.1,
                          max_depth = 4, 
                          n_estimators = 50)

# Performing K-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(xg_reg, X, y, cv=kfold, scoring='neg_mean_squared_error')

# results
rmse_scores = np.sqrt(-scores)
print("RMSE Scores:", rmse_scores)
print("Mean RMSE:", np.mean(rmse_scores))

RMSE Scores: [ 1229530.45722592   892642.98092618   596823.74885915   659543.12798312
   775899.37486133   629071.44244091   792688.99258663 11425362.46632889
  1935393.44107619   880645.04673329]
Mean RMSE: 1981760.107902161


In [12]:
# daha sonra 
import numpy as np
import pandas as pd
import re

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from xgboost import XGBRegressor
useful_df = pd.read_csv("output_file.csv")


# Çeyreklerin hesaplanması
Q1 = useful_df["Fiyat"].quantile(0.25)
Q3 = useful_df["Fiyat"].quantile(0.75)

# IQR'nin hesaplanması
IQR = Q3 - Q1

# Aykırı değerlerin tanımlanması
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Aykırı değerlerin belirlenmesi
outliers = useful_df["Fiyat"][(useful_df["Fiyat"] < lower_bound) | (useful_df["Fiyat"] > upper_bound)]

print("Aykırı Değerler:")
print(outliers)

ready_to_train = useful_df.drop(outliers.index)
ready_to_train.reset_index(drop=True, inplace=True)


X = ready_to_train.drop("Fiyat", axis = 1).drop("_id",axis=1)
y = ready_to_train["Fiyat"]


X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size = 0.3,
                                                    random_state = 12
                                                    )

scaler_X_test = MinMaxScaler() # bu minmaxscaler işlemi sadece yapay sinir ağı eğitiminde kullanmak için var
scaler_X_train = MinMaxScaler() # yapay sinir ağına vermek için verilerin normalize edilmiş versiyonunu saklıyorum

scaled_X_test = pd.DataFrame(scaler_X_test.fit_transform(X_test), columns=X.columns)
scaled_X_train = pd.DataFrame(scaler_X_train.fit_transform(X_train), columns=X.columns)

# LinerRegression modeli
model_lr = LinearRegression()
model_lr.fit(scaled_X_train, y_train)
# Eğitim ve test hatasını hesaplayalım
train_predictions_lr = model_lr.predict(scaled_X_train)
test_predictions_lr = model_lr.predict(scaled_X_test)
train_rmse_lr = mean_squared_error(y_train, train_predictions_lr, squared=False)
test_rmse_lr = mean_squared_error(y_test, test_predictions_lr, squared=False)

print("Eğitim RMSE:", train_rmse_lr)
print("Test RMSE:", test_rmse_lr)

Aykırı Değerler:
33       1865900
41       1670000
57       2350000
83       1650000
88       1485750
          ...   
46078    2450000
46081    1520000
46082    1636000
46171    1679000
46200    1785000
Name: Fiyat, Length: 2632, dtype: int64
Eğitim RMSE: 159838.86014659857
Test RMSE: 217137.3506348087


