# Türkiye İçin Kira Tahmin Uygulaması

In [1]:
import os 
import numpy as np
import pandas as pd

In [2]:
def drop_columns(df, cols):
    """
    This function removes the columns from a DataFrame.
    """
    for col in cols:
        try:
            df.drop([col], axis=1, inplace=True)
        except Exception as e:
            print(f"Bir hata oluştu: {e}")    

In [3]:
df = pd.read_csv('HepsiEmlak/hepsiemlak.csv')

In [4]:
os.listdir('HepsiEmlak')

['hepsiemlak (1).csv',
 'hepsiemlak (2).csv',
 'hepsiemlak (3).csv',
 'hepsiemlak.csv']

In [5]:
files = os.listdir('HepsiEmlak')

In [6]:
df = pd.concat([pd.read_csv(f"HepsiEmlak/{file}") for file in files], ignore_index=True)

In [7]:
df.drop(['photo-count',
       'list-view-date', 'left','eids-badge__description', 'he-lazy-image src', 'eids-badge__label',
       'list-view-header', 'list-view-location', 'img-wrp href',
       'he-lazy-image src 2', 'listing-card--owner-info__firm-name',
       'he-lazy-image src 3', 'wp-btn', 'listing-card--owner-info__name'], axis=1, inplace=True)

In [8]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28671 entries, 0 to 28670
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   img-link href    28671 non-null  object
 1   list-view-price  28671 non-null  object
 2   currency         28671 non-null  object
 3   celly            28671 non-null  object
 4   celly 2          28671 non-null  object
 5   celly 3          28671 non-null  object
 6   celly 4          27234 non-null  object
dtypes: object(7)
memory usage: 1.5+ MB
None


In [9]:
df.drop(['currency'], axis=1, inplace=True)

In [10]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28671 entries, 0 to 28670
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   img-link href    28671 non-null  object
 1   list-view-price  28671 non-null  object
 2   celly            28671 non-null  object
 3   celly 2          28671 non-null  object
 4   celly 3          28671 non-null  object
 5   celly 4          27234 non-null  object
dtypes: object(6)
memory usage: 1.3+ MB
None


In [11]:
print(df.shape)

(28671, 6)


In [12]:
df['location'] = df['img-link href'].apply(lambda x: x.split('/')[3])
df['city'] = df['location'].str.split('-').str[0]
df['district'] = df['location'].str.split('-').str[1]
df['neighborhood'] = df['location'].str.split('-').str[2:-1] 
df['neighborhood'] = df['neighborhood'].apply(lambda x: '-'.join(x))

In [13]:
drop_columns(df, ['img-link href', 'location'])

In [14]:
print(df['celly'].unique())

['3 +\n                          1' '1 +\n                          1'
 '4 +\n                          1' '2 +\n                          1'
 '5 +\n                          1' '2 +\n                          0'
 '3 +\n                          3' '3 +\n                          0'
 '2 +\n                          12' '4 +\n                          2'
 '7 +\n                          2' '3 +\n                          2'
 '8 +\n                          0' '2 +\n                          2'
 '5 +\n                          2' '6 +\n                          1'
 'Stüdyo' '2 +\n                          10'
 '6 +\n                          0' '1 +\n                          3'
 '7 +\n                          1' '6 +\n                          2'
 '9 +\n                          3' '4 +\n                          0'
 '1 +\n                          10' '8 +\n                          1'
 '12 +\n                          1' '3 +\n                          10'
 '7 +\n                    

In [15]:
df['celly'] = df['celly'].apply(lambda x: x.replace('Stüdyo','1 + 0'))
df['celly'] = df['celly'].apply(lambda x: x.replace('\n',''))
df['room'] = df['celly'].apply(lambda x: x.split('+')[0]).astype(int)
df['livingroom'] = df['celly'].apply(lambda x: x.split('+')[1]).astype(int)

In [16]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28671 entries, 0 to 28670
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   list-view-price  28671 non-null  object
 1   celly            28671 non-null  object
 2   celly 2          28671 non-null  object
 3   celly 3          28671 non-null  object
 4   celly 4          27234 non-null  object
 5   city             28671 non-null  object
 6   district         28671 non-null  object
 7   neighborhood     28671 non-null  object
 8   room             28671 non-null  int32 
 9   livingroom       28671 non-null  int32 
dtypes: int32(2), object(8)
memory usage: 2.0+ MB
None


In [17]:
drop_columns(df,['celly'])

In [18]:
print(df['celly 2'].unique())

['110 m²' '55 m²' '160 m²' '95 m²' '158 m²' '275 m²' '124 m²' '65 m²'
 '170 m²' '150 m²' '130 m²' '115 m²' '100 m²' '165 m²' '128 m²' '85 m²'
 '74 m²' '175 m²' '200 m²' '192 m²' '94 m²' '48 m²' '135 m²' '106 m²'
 '174 m²' '136 m²' '45 m²' '80 m²' '90 m²' '195 m²' '60 m²' '96 m²'
 '180 m²' '52 m²' '250 m²' '120 m²' '72 m²' '70 m²' '36 m²' '145 m²'
 '129 m²' '185 m²' '39 m²' '235 m²' '125 m²' '190 m²' '105 m²' '50 m²'
 '210 m²' '950 m²' '75 m²' '176 m²' '140 m²' '108 m²' '569 m²' '123 m²'
 '47 m²' '230 m²' '400 m²' '380 m²' '64 m²' '295 m²' '58 m²' '220 m²'
 '280 m²' '155 m²' '300 m²' '20 m²' '240 m²' '25 m²' '82 m²' '119 m²'
 '600 m²' '420 m²' '92 m²' '209 m²' '35 m²' '199 m²' '138 m²' '265 m²'
 '77 m²' '245 m²' '68 m²' '169 m²' '56 m²' '126 m²' '1.350 m²' '117 m²'
 '730 m²' '66 m²' '330 m²' '260 m²' '164 m²' '550 m²' '350 m²' '62 m²'
 '499 m²' '118 m²' '22 m²' '219 m²' '142 m²' '109 m²' '101 m²' '148 m²'
 '57 m²' '54 m²' '98 m²' '42 m²' '161 m²' '238 m²' '188 m²' '167 m²'
 '97 m²' '222

In [19]:
df['celly 2'] = df['celly 2'].apply(lambda x: x.replace('.',''))
df['area'] = df['celly 2'].apply(lambda x: x.split(' ')[0]).astype(int)

In [20]:
drop_columns(df, ['celly 2'])

In [21]:
print(df['celly 3'].unique())

['30\n                        Yaşında'
 '5\n                        Yaşında' 'Sıfır Bina'
 '15\n                        Yaşında'
 '12\n                        Yaşında'
 '9\n                        Yaşında'
 '20\n                        Yaşında'
 '16\n                        Yaşında'
 '21\n                        Yaşında'
 '37\n                        Yaşında'
 '28\n                        Yaşında'
 '6\n                        Yaşında'
 '10\n                        Yaşında'
 '18\n                        Yaşında'
 '2\n                        Yaşında'
 '11\n                        Yaşında'
 '24\n                        Yaşında'
 '1\n                        Yaşında'
 '25\n                        Yaşında'
 '4\n                        Yaşında'
 '23\n                        Yaşında'
 '14\n                        Yaşında'
 '3\n                        Yaşında' '8\n                        Yaşında'
 '22\n                        Yaşında'
 '7\n                        Yaşında'
 '35\n                

In [22]:
df['celly 3'] = df['celly 3'].apply(lambda x: x.replace('Sıfır Bina','0'))
df['celly 3'] = df['celly 3'].apply(lambda x: x.replace('\n',' '))
df['age'] = df['celly 3'].apply(lambda x: x.split(' ')[0]).astype(int)

In [23]:
drop_columns(df,['celly 3'])

In [24]:
print(df['celly 4'].unique())

['1. Kat' '4. Kat' 'Yüksek Giriş' '3. Kat' '2. Kat' '6. Kat' '8. Kat'
 '5. Kat' nan 'Giriş Katı' 'En Üst Kat' '12. Kat' 'Zemin' 'Ara Kat'
 'Bahçe Katı' '7. Kat' 'Kot 3' '9. Kat' 'Villa Katı' 'Çatı Katı' 'Kot 1'
 '10. Kat' 'Tripleks' '11. Kat' 'Teras Katı' '15. Kat' '13. Kat' 'Kot 2'
 '14. Kat' 'Yarı Bodrum' '21 ve üzeri' '18. Kat' '19. Kat' '17. Kat'
 '16. Kat' 'Bodrum' 'Bodrum ve Zemin' '20. Kat' 'Asma Kat']


In [27]:
replace_dict = {
    'Yüksek Giriş': '1. Kat',
    'Giriş Katı': '0. Kat',
    'En Üst Kat': '5. Kat',
    'Zemin' : '0. Kat',
    'Ara Kat' : '3. Kat',
    'Bahçe Katı' : '0. Kat',
    'Kot 3' : '-3. Kat',
    'Villa Katı' : '0. Kat',
    'Çatı Katı' : '5. Kat',
    'Yarı Bodrum' : '0. Kat',
    'Kot 1' : '-1. Kat',
    'Tripleks' : '0. Kat',
    'Teras Katı': '5. Kat',
    'Kot 2' : '-2. Kat',
    '21 ve üzeri' : '21. Kat',
    'Bodrum' : '0. Kat',
    'Bodrum ve Zemin' : '0. Kat',
    'Asma Kat' : '1. Kat',
    'nan' : '2. Kat',
}
df['celly 4'] = df['celly 4'].replace(replace_dict.keys(), replace_dict.values()).astype(str)
df['celly 4'].dropna(inplace=True)
df['floor'] = df['celly 4'].astype(str).apply(lambda x: x.split('.')[0]).astype(int)

In [28]:
print(df['floor'].unique())

[ 1  4  3  2  6  8  5  0 12  7 -3  9 -1 10 11 15 13 -2 14 21 18 19 17 16
 20]


In [29]:
drop_columns(df, ['celly 4'])

In [30]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28671 entries, 0 to 28670
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   list-view-price  28671 non-null  object
 1   city             28671 non-null  object
 2   district         28671 non-null  object
 3   neighborhood     28671 non-null  object
 4   room             28671 non-null  int32 
 5   livingroom       28671 non-null  int32 
 6   area             28671 non-null  int32 
 7   age              28671 non-null  int32 
 8   floor            28671 non-null  int32 
dtypes: int32(5), object(4)
memory usage: 1.4+ MB
None


In [31]:
df['list-view-price'] = df['list-view-price'].astype(str).apply(lambda x: x.replace('.',''))
df['price'] = df['list-view-price'].astype(int)

In [34]:
drop_columns(df, ['list-view-price'])

In [36]:
df.to_csv('data.csv')