## Verilerin Lineer Regresyon ile Tahmin Edilmesi

In [32]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [33]:
df= pd.read_csv("data_cleaned.csv")

In [34]:
df["city"]= df["city"].astype("category")
df["district"]= df["district"].astype("category")
df["neighborhood"]= df["neighborhood"].astype("category")
df["room"]= df["room"].astype("int64")
df["living_room"]= df["living_room"].astype("int64")
df["area"]= df["area"].astype("int64")
df["age"]= df["age"].astype("int64")
df["floor"]= df["floor"].astype("int64")
df["price"]= df["price"].astype("int64")
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211 entries, 0 to 210
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   city          211 non-null    category
 1   district      211 non-null    category
 2   neighborhood  211 non-null    category
 3   room          211 non-null    int64   
 4   living_room   211 non-null    int64   
 5   area          211 non-null    int64   
 6   age           211 non-null    int64   
 7   floor         211 non-null    int64   
 8   price         211 non-null    int64   
dtypes: category(3), int64(6)
memory usage: 16.4 KB
None


In [35]:
categorical_features =["city","district","neighborhood"]
numerical_features =["room","living_room","area","age","floor"]

In [36]:
full_pipeline= ColumnTransformer([
    ("num", StandardScaler(), numerical_features),
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
])

In [37]:
X= df.drop("price",axis= 1)
y= df["price"]

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [39]:
model = Pipeline([
    ("preparation", full_pipeline),
    ("model",LinearRegression())
])

In [40]:
model.fit(X_train, y_train)

In [42]:
y_pred= model.predict(X_test)
mse=mean_squared_error(y_test,y_pred)
rmse= np.sqrt(mse)
r2=r2_score(y_test,y_pred)

In [43]:
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R^2: {r2}")

MSE: 15294476.161883973
RMSE: 3910.815280971983
R^2: 0.059357157287005635


In [44]:
feature_importances= model.named_steps["model"].coef_
print(feature_importances)

[ 2.02759104e+03  0.00000000e+00  2.97275294e+02 -2.21455585e+02
  6.31428301e+02  4.43788024e+02 -4.43788024e+02 -2.58113021e+02
  2.66962762e+02  5.09952771e+02 -3.91879415e+03 -1.26629694e+03
 -5.36455338e+02  8.71211607e+02  1.46474161e+03 -2.70081919e+03
  2.45917571e+02  6.56568039e+03 -2.51720308e+03  1.84473199e+03
  5.71743183e+00 -4.92716798e+02  3.02497155e+03  1.01439055e+03
 -6.22554616e+02 -2.28720634e+03 -1.21411877e+03  8.48363435e+02
  1.08599637e+03 -2.14677061e+02  2.41636536e+03  5.86609594e+03
  1.46474161e+03 -6.30983515e+03 -2.31706855e+03 -9.82038636e+03
 -8.76341461e+02 -5.24811277e+03 -2.31044368e+03 -5.34739320e+02
 -1.10100702e+03 -3.60852266e+03 -3.44132736e+03  3.97719682e+03
  4.05291347e+03 -6.82188806e+02 -4.44381208e+03 -3.25591211e+03
 -4.88198214e+03 -1.62370064e+03 -1.28611233e+03 -6.07488695e+03
 -1.78912840e+03  1.69201144e+03  4.89139047e+03  7.17024135e+03
 -8.80900857e+02 -4.01855739e+02  3.99953943e+03 -3.35349954e+03
 -4.19840985e+03  2.95372

In [45]:
print("Numerical Features")
for i in range(len(numerical_features)):
    print(numerical_features[i], feature_importances[i])

Numerical Features
room 2027.5910372427973
living_room 0.0
area 297.27529374657684
age -221.45558504585952
floor 631.4283008048562


In [46]:
print("Categorical Features")
for i in range(len(categorical_features)):
    for j in range(len(model.named_steps["preparation"].transformers_[1][1].categories_[i])):
        print(model.named_steps["preparation"].transformers_[1][1].categories_[i][j], feature_importances[len(numerical_features)+j])

Categorical Features
antalya 443.7880244086453
bursa -443.78802440899017
aksu 443.7880244086453
alanya -443.78802440899017
dosemealti -258.11302138212386
elmali 266.9627616240302
finike 509.95277133575024
gazipasa -3918.794151415851
gemlik -1266.2969352140613
kas -536.4553375357124
kemer 871.2116066037659
kepez 1464.7416099913232
konyaalti -2700.819186180705
korkuteli 245.9175708107557
kumluca 6565.680387639014
manavgat -2517.2030801172714
mudanya 1844.7319942082318
muratpasa 5.717431834233607
nilufer -492.7167978343001
osmangazi 3024.971545618299
serik 1014.39055138406
yildirim -622.5546155465987
30_agustos_zafer 443.7880244086453
ahatli -443.78802440899017
aktoprak -258.11302138212386
altinkale 266.9627616240302
altintas 509.95277133575024
andifli -3918.794151415851
arslanbucak -1266.2969352140613
asagipazar -536.4553375357124
ataturk 871.2116066037659
avsallar 1464.7416099913232
aydin -2700.819186180705
baglik 245.9175708107557
bahceyaka 6565.680387639014
baris -2517.2030801172714
b

In [58]:
new_data=pd.DataFrame({
    "city": ["antalya"],
    "district": ["alanya"],
    "neighborhood": ["mahmutlar"],
    "room": [1],
    "living_room": [1],
    "area": [50],
    "age": ["1"],
    "floor": ["3"],
})
print(model.predict(new_data))

[12181.03491465]


In [57]:
print(df[(df["city"]=="antalya")&
         (df["district"]=="alanya")&
          (df["neighborhood"]=="mahmutlar")
         ])

        city district neighborhood  room  living_room  area  age  floor  price
38   antalya   alanya    mahmutlar     1            1    70    5      2  15000
39   antalya   alanya    mahmutlar     1            1    54    1      3  10000
43   antalya   alanya    mahmutlar     2            1    90    5      2  15000
61   antalya   alanya    mahmutlar     2            1   100    3      5  15000
70   antalya   alanya    mahmutlar     2            1   115   15      4  20000
87   antalya   alanya    mahmutlar     1            1    60    0      3  12000
110  antalya   alanya    mahmutlar     1            1    70    5      3  10000
119  antalya   alanya    mahmutlar     2            1   120   15      4  13000
142  antalya   alanya    mahmutlar     3            1   160    1     -1  15000
146  antalya   alanya    mahmutlar     2            1   125   10      5  15000
151  antalya   alanya    mahmutlar     1            1    65    3      3  15000
154  antalya   alanya    mahmutlar     2            