# Regression - Ev Fiyatları Tahmin Etme

In [7]:
import pandas as pd 
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.metrics import r2_score, mean_squared_error

import warnings #uyarıları kapatmak
warnings.filterwarnings('ignore')

#dosyayı okutma
df=pd.read_csv('kc_house_data.csv')

#outlier çıkartma
df=df[df['bedrooms']<7]
df=df[df['bathrooms']<5]

#type'ını kategori yapma 
df['zipcode']=df['zipcode'].astype('category')

df['age']=2023-df['yr_built'] # yaşını hesaplıyacağız
df['bedrooms']=df['bedrooms']**2 
df['bathrooms']=df['bathrooms']**2 
df['sqft_living']=df['sqft_living']**2 
df['grade']=df['grade']**2
df['condition']=df['condition']**2
df['basement']=np.where(df['sqft_basement']>0,1,0) # bodrum katı varsa 1 yaz
df['renovated']=np.where(df['yr_renovated']>0,1,0) # yenilendiyse 1 yaz

outliers=df.quantile(.97) # 0.97 sağdan %3, 0.03 soldan keser
df=df[df['price']<outliers['price']]
df=df[df['bedrooms']<outliers['bedrooms']]

In [8]:
df.columns

Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long', 'sqft_living15', 'sqft_lot15', 'age', 'basement',
       'renovated'],
      dtype='object')

In [9]:
df.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,age,basement,renovated
0,7129300520,20141013T000000,221900.0,9,1.0,1392400,5650,1.0,0,0,...,1955,0,98178,47.5112,-122.257,1340,5650,68,0,0
1,6414100192,20141209T000000,538000.0,9,5.0625,6604900,7242,2.0,0,0,...,1951,1991,98125,47.721,-122.319,1690,7639,72,1,1
2,5631500400,20150225T000000,180000.0,4,1.0,592900,10000,1.0,0,0,...,1933,0,98028,47.7379,-122.233,2720,8062,90,0,0
3,2487200875,20141209T000000,604000.0,16,9.0,3841600,5000,1.0,0,0,...,1965,0,98136,47.5208,-122.393,1360,5000,58,1,0
4,1954400510,20150218T000000,510000.0,9,4.0,2822400,8080,1.0,0,0,...,1987,0,98074,47.6168,-122.045,1800,7503,36,0,0


In [10]:
x=df[['bedrooms', 'bathrooms', 'sqft_living', 'grade',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'zipcode', 'sqft_living15', 'sqft_lot15', 'age', 'basement', 'renovated']]
y=df['price']

x=pd.get_dummies(x,drop_first=True) 

x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.20,random_state=42)

lr = LinearRegression()
R=Ridge()
L=Lasso()

## Model 1

In [11]:
model=lr.fit(x_train,y_train) #Fit makinanın öğrenmesini sağlıyor

In [12]:
tahmin=model.predict(x_test) #Tahmin ettik

In [13]:
r2_score(tahmin,y_test) #7983

0.8139365298925918

In [14]:
mean_squared_error(tahmin,y_test)**.5 #86049

89133.55866872334

## Model 2

In [15]:
model2=L.fit(x_train,y_train)

In [16]:
tahmin2=model2.predict(x_test)

In [17]:
r2_score(tahmin2,y_test)

0.8138587361746479

In [18]:
mean_squared_error(tahmin2,y_test)**.5

89131.15822160276

## Model 3

In [19]:
model3=R.fit(x_train,y_train)

In [20]:
tahmin3=model3.predict(x_test)

In [21]:
r2_score(tahmin3,y_test)

0.8121447830678756

In [22]:
mean_squared_error(tahmin3,y_test)**.5

89200.27012685574

### HATA HESAPLAMA

In [23]:
mean_squared_error(tahmin,y_test)**.5  #HATAYI HESAPLIYOR LinearRegression()

89133.55866872334

In [24]:
mean_squared_error(tahmin2,y_test)**.5 #HATAYI HESAPLIYOR Lasso()

89131.15822160276

In [25]:
mean_squared_error(tahmin3,y_test)**.5 #HATAYI HESAPLIYOR Ridge()

89200.27012685574

# HEDEF = R2> 0.80

### SKOR

In [26]:
r2_score(tahmin,y_test)

0.8139365298925918

In [27]:
r2_score(tahmin2,y_test)

0.8138587361746479

In [28]:
r2_score(tahmin3,y_test)

0.8121447830678756