# Ege Bölgesi için bir kira tahmin uygulaması

In [97]:
import os
import numpy as np
import pandas as pd

In [98]:
def drop_columns(df, cols):
    """
    This function removes the columns from a DataFrame.
    """
    for col in cols:
        try:
            df.drop([col], axis=1, inplace=True)
        except Exception as e:
            print(f"Bir hata oluştu: {e}")

In [99]:
files = os.listdir('hepsiemlak')
df = pd.concat([pd.read_csv(f"hepsiemlak/{file}") for file in files], ignore_index=True)

In [100]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8136 entries, 0 to 8135
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   img-link href        8136 non-null   object 
 1   photo-count          8130 non-null   float64
 2   list-view-price      8136 non-null   object 
 3   list-view-date       8136 non-null   object 
 4   celly                8136 non-null   object 
 5   celly 2              8136 non-null   object 
 6   celly 3              8136 non-null   object 
 7   celly 4              6858 non-null   object 
 8   list-view-header     8129 non-null   object 
 9   list-view-location   8136 non-null   object 
 10  he-lazy-image src    7649 non-null   object 
 11  left                 8114 non-null   object 
 12  img-wrp href         4555 non-null   object 
 13  he-lazy-image src 2  4067 non-null   object 
 14  branded__text        134 non-null    object 
 15  branded__text 2      134 non-null    o

In [101]:
print(df.columns)

Index(['img-link href', 'photo-count', 'list-view-price', 'list-view-date',
       'celly', 'celly 2', 'celly 3', 'celly 4', 'list-view-header',
       'list-view-location', 'he-lazy-image src', 'left', 'img-wrp href',
       'he-lazy-image src 2', 'branded__text', 'branded__text 2',
       'img-wrp href 2', 'he-lazy-image src 3'],
      dtype='object')


In [102]:
cols = ['photo-count', 'list-view-date', 'list-view-header', 'list-view-location', 'he-lazy-image src', 'left', 'img-wrp href', 'he-lazy-image src 2', 'branded__text', 'branded__text 2', 'img-wrp href 2', 'he-lazy-image src 3']
drop_columns(df, cols)

In [103]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8136 entries, 0 to 8135
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   img-link href    8136 non-null   object
 1   list-view-price  8136 non-null   object
 2   celly            8136 non-null   object
 3   celly 2          8136 non-null   object
 4   celly 3          8136 non-null   object
 5   celly 4          6858 non-null   object
dtypes: object(6)
memory usage: 381.5+ KB
None


In [104]:
df['location'] = df['img-link href'].apply(lambda x: x.split('/')[3])
df['city'] = df['location'].str.split('-').str[0]
df['district'] = df['location'].str.split('-').str[1]
df['neighborhood'] = df['location'].str.split('-').str[2:-1]
df['neighborhood'] = df['neighborhood'].apply(lambda x: '_'.join(x))

In [105]:
drop_columns(df, ['location', 'img-link href'])

In [106]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8136 entries, 0 to 8135
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   list-view-price  8136 non-null   object
 1   celly            8136 non-null   object
 2   celly 2          8136 non-null   object
 3   celly 3          8136 non-null   object
 4   celly 4          6858 non-null   object
 5   city             8136 non-null   object
 6   district         8136 non-null   object
 7   neighborhood     8136 non-null   object
dtypes: object(8)
memory usage: 508.6+ KB
None


In [107]:
print(df[['city', 'district', 'neighborhood']].sample(50))

         city      district           neighborhood
5872    mugla        bodrum                  geris
1243    izmir         cesme               reisdere
2562    izmir      bayrakli                 adalet
1726    izmir         konak                 kultur
6362    mugla        bodrum                 gumbet
7237    mugla       fethiye              pazaryeri
4188  denizli  merkezefendi              muratdede
4169  denizli     pamukkale          dokuzkavaklar
4810    izmir     karsiyaka              donanmaci
148     izmir         cesme                  ilica
5311    izmir         cigli            kucuk_cigli
7217    mugla       dalaman                    ege
654     izmir         konak               alsancak
152     izmir         cesme                alacati
6250    mugla       mentese               orhaniye
4616  denizli     pamukkale              zeytinkoy
7457    mugla         milas                   emek
6326    mugla       fethiye              karaculha
1250    izmir    karabaglar    

In [108]:
print(df.columns)

Index(['list-view-price', 'celly', 'celly 2', 'celly 3', 'celly 4', 'city',
       'district', 'neighborhood'],
      dtype='object')


In [109]:
print(df['celly'].unique())

['3 +\n                        1' '2 +\n                        1'
 '1 +\n                        1' '4 +\n                        1'
 '2 +\n                        0' 'Stüdyo'
 '3 +\n                        0' '1 +\n                        15'
 '2 +\n                        2' '6 +\n                        2'
 '4 +\n                        2' '3 +\n                        2'
 '9 +\n                        1' '6 +\n                        1'
 '8 +\n                        1' '5 +\n                        1'
 '41 +\n                        4' '4 +\n                        0'
 '5 +\n                        2' '1 +\n                        25'
 '7 +\n                        1' '21 +\n                        1'
 '7 +\n                        2' '6 +\n                        0'
 '9 +\n                        4' '4 +\n                        4'
 '3 +\n                        3' '31 +\n                        1'
 '9 +\n                        3' '25 +\n                        5'
 '8 +\n      

In [110]:
df['celly'] = df['celly'].apply(lambda x: x.replace('Stüdyo', '1 + 0'))
df['celly'] = df['celly'].apply(lambda x: x.replace('\n', ''))
df['room'] = df['celly'].apply(lambda x: x.split('+')[0]).astype(int)
df['living_room'] = df['celly'].apply(lambda x: x.split('+')[1]).astype(int)

In [111]:
print(df['room'].unique())
print(df['living_room'].unique())

[ 3  2  1  4  6  9  8  5 41  7 21 31 25 14 10]
[ 1  0 15  2  4 25  3  5  9  6]


In [112]:
drop_columns(df, ['celly'])

In [113]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8136 entries, 0 to 8135
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   list-view-price  8136 non-null   object
 1   celly 2          8136 non-null   object
 2   celly 3          8136 non-null   object
 3   celly 4          6858 non-null   object
 4   city             8136 non-null   object
 5   district         8136 non-null   object
 6   neighborhood     8136 non-null   object
 7   room             8136 non-null   int64 
 8   living_room      8136 non-null   int64 
dtypes: int64(2), object(7)
memory usage: 572.2+ KB
None


In [114]:
print(df['celly 2'].unique())

['150 m²' '110 m²' '60 m²' '50 m²' '300 m²' '117 m²' '70 m²' '140 m²'
 '130 m²' '75 m²' '160 m²' '165 m²' '135 m²' '85 m²' '120 m²' '48 m²'
 '125 m²' '80 m²' '95 m²' '100 m²' '105 m²' '180 m²' '3.200 m²' '90 m²'
 '76 m²' '55 m²' '65 m²' '145 m²' '45 m²' '115 m²' '35 m²' '53 m²'
 '155 m²' '148 m²' '78 m²' '151 m²' '30 m²' '270 m²' '440 m²' '132 m²'
 '121 m²' '141 m²' '170 m²' '122 m²' '200 m²' '87 m²' '146 m²' '220 m²'
 '108 m²' '370 m²' '86 m²' '380 m²' '47 m²' '43 m²' '63 m²' '245 m²'
 '204 m²' '183 m²' '190 m²' '600 m²' '119 m²' '72 m²' '350 m²' '142 m²'
 '520 m²' '166 m²' '870 m²' '330 m²' '96 m²' '41 m²' '143 m²' '280 m²'
 '64 m²' '57 m²' '286 m²' '340 m²' '81 m²' '185 m²' '40 m²' '168 m²'
 '116 m²' '93 m²' '209 m²' '59 m²' '77 m²' '400 m²' '98 m²' '106 m²'
 '174 m²' '205 m²' '74 m²' '500 m²' '68 m²' '1.301 m²' '275 m²' '52 m²'
 '137 m²' '167 m²' '217 m²' '25 m²' '250 m²' '290 m²' '112 m²' '196 m²'
 '184 m²' '450 m²' '159 m²' '195 m²' '640 m²' '230 m²' '260 m²' '285 m²'
 '235 m²' '

In [115]:
df['celly 2'] = df['celly 2'].apply(lambda x: x.replace('.', ''))
df['area'] = df['celly 2'].apply(lambda x: x.split(' ')[0]).astype(int)

In [116]:
print(df['area'].unique())

[ 150  110   60   50  300  117   70  140  130   75  160  165  135   85
  120   48  125   80   95  100  105  180 3200   90   76   55   65  145
   45  115   35   53  155  148   78  151   30  270  440  132  121  141
  170  122  200   87  146  220  108  370   86  380   47   43   63  245
  204  183  190  600  119   72  350  142  520  166  870  330   96   41
  143  280   64   57  286  340   81  185   40  168  116   93  209   59
   77  400   98  106  174  205   74  500   68 1301  275   52  137  167
  217   25  250  290  112  196  184  450  159  195  640  230  260  285
  235  337  211  240  134  832 1000   38  147  152   88  213  126  255
  199  127   11  154   22   66  399  187   67  210   89  355   97   84
   42   51   73  325   71 3000  175  320  156  162   18   56  136  131
  580  375  161  163  149  206  138  475  359  124   91  194 4500   20
  103   58  302  505  800  368  178  750   92  181  460  390  360  550
 4000  295  128  341  745  118  144   15   82   49  176  182  981  256
   62 

In [117]:
drop_columns(df, ['celly 2'])

In [118]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8136 entries, 0 to 8135
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   list-view-price  8136 non-null   object
 1   celly 3          8136 non-null   object
 2   celly 4          6858 non-null   object
 3   city             8136 non-null   object
 4   district         8136 non-null   object
 5   neighborhood     8136 non-null   object
 6   room             8136 non-null   int64 
 7   living_room      8136 non-null   int64 
 8   area             8136 non-null   int64 
dtypes: int64(3), object(6)
memory usage: 572.2+ KB
None


In [119]:
print(df['celly 3'].unique())

['20\n                      Yaşında' '8\n                      Yaşında'
 'Sıfır Bina' '10\n                      Yaşında'
 '11\n                      Yaşında' '5\n                      Yaşında'
 '4\n                      Yaşında' '3\n                      Yaşında'
 '15\n                      Yaşında' '1\n                      Yaşında'
 '24\n                      Yaşında' '2\n                      Yaşında'
 '17\n                      Yaşında' '30\n                      Yaşında'
 '28\n                      Yaşında' '25\n                      Yaşında'
 '16\n                      Yaşında' '21\n                      Yaşında'
 '18\n                      Yaşında' '31\n                      Yaşında'
 '22\n                      Yaşında' '7\n                      Yaşında'
 '13\n                      Yaşında' '9\n                      Yaşında'
 '6\n                      Yaşında' '35\n                      Yaşında'
 '39\n                      Yaşında' '14\n                      Yaşında'
 '19\n    

In [120]:
df['celly 3'] = df['celly 3'].apply(lambda x: x.replace('Sıfır Bina', '0 Yaşında'))
df['celly 3'] = df['celly 3'].apply(lambda x: x.replace('\n', ' '))
df['age'] = df['celly 3'].apply(lambda x: x.split(' ')[0]).astype(int)

In [121]:
print(df['age'].unique())

[ 20   8   0  10  11   5   4   3  15   1  24   2  17  30  28  25  16  21
  18  31  22   7  13   9   6  35  39  14  19  33  26  12  29  49  27  32
  52  40  38  44  50  34  23  45  42  55  36  43  37  47 300  48 199 100
  60  61 600]


In [122]:
drop_columns(df, ['celly 3'])

In [123]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8136 entries, 0 to 8135
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   list-view-price  8136 non-null   object
 1   celly 4          6858 non-null   object
 2   city             8136 non-null   object
 3   district         8136 non-null   object
 4   neighborhood     8136 non-null   object
 5   room             8136 non-null   int64 
 6   living_room      8136 non-null   int64 
 7   area             8136 non-null   int64 
 8   age              8136 non-null   int64 
dtypes: int64(4), object(5)
memory usage: 572.2+ KB
None


In [124]:
print(df['celly 4'].unique())

['Kot 2' '1. Kat' '4. Kat' '5. Kat' '7. Kat' '3. Kat' '2. Kat' 'Kot 1'
 'Yüksek Giriş' '9. Kat' 'Ara Kat' 'En Üst Kat' 'Bahçe Katı' 'Yarı Bodrum'
 nan 'Bodrum' 'Kot 3' 'Çatı Katı' 'Zemin' '8. Kat' 'Giriş Katı' '6. Kat'
 '16. Kat' '17. Kat' 'Villa Katı' '10. Kat' '13. Kat' '12. Kat' '11. Kat'
 '14. Kat' '21 ve üzeri' 'Bodrum ve Zemin' 'Asma Kat' '18. Kat' 'Tripleks'
 'Teras Katı' '15. Kat' '20. Kat' '19. Kat']


In [128]:
replace_dict = {
    'Kot 2': '-2. Kat',
    'Kot 1': '-1. Kat',
    'Yüksek Giriş': '1. Kat',
    'Ara Kat': '3. Kat',
    'En Üst Kat': '5. Kat',
    'Bahçe Katı': '0. Kat',
    'Yarı Bodrum': '0. Kat',
    'Bodrum': '0. Kat',
    'Kot 3': '-3. Kat',
    'Çatı Katı': '5. Kat',
    'Zemin': '0. Kat',
    'Giriş Katı': '0. Kat',
    'Villa Katı': '0. Kat',
    '21 ve üzeri': '21. Kat',
    'Bodrum ve Zemin': '0. Kat',
    'Asma Kat': '1. Kat',
    'Tripleks': '0. Kat',
    'Teras Katı': '5. Kat',
    'nan': '2. Kat',
}
df['celly 4'] = df['celly 4'].replace(replace_dict.keys(), replace_dict.values()).astype(str)
df['celly 4'].dropna(inplace=True)
df['floor'] = df['celly 4'].apply(lambda x: x.split('.')[0]).astype(int)

In [129]:
print(df['floor'].unique())

[-2  1  4  5  7  3  2 -1  9  0 -3  8  6 16 17 10 13 12 11 14 21 18 15 20
 19]


In [130]:
drop_columns(df, ['celly 4'])

In [131]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8136 entries, 0 to 8135
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   list-view-price  8136 non-null   object
 1   city             8136 non-null   object
 2   district         8136 non-null   object
 3   neighborhood     8136 non-null   object
 4   room             8136 non-null   int64 
 5   living_room      8136 non-null   int64 
 6   area             8136 non-null   int64 
 7   age              8136 non-null   int64 
 8   floor            8136 non-null   int64 
dtypes: int64(5), object(4)
memory usage: 572.2+ KB
None
