## Verilerin İstatiksel Olarak İncelenmesi ve Aykırı Değerlerin Ayıklanması

Gerekli paket ve modüllerin yüklenmesi

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Verilerin yüklenmesi

In [2]:
df = pd.read_csv('hepsiemlak_cleaned.csv')

In [3]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9452 entries, 0 to 9451
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   city          9452 non-null   object
 1   district      9452 non-null   object
 2   neighborhood  9452 non-null   object
 3   room          9452 non-null   int64 
 4   livingroom    9452 non-null   int64 
 5   area          9452 non-null   int64 
 6   age           9452 non-null   int64 
 7   floor         9452 non-null   int64 
 8   price         9452 non-null   int64 
dtypes: int64(6), object(3)
memory usage: 664.7+ KB
None


In [4]:
df['city'] = df['city'].astype('category') # city sütununu kategorik veri tipine dönüştür
df['district'] = df['district'].astype('category') # district sütununu kategorik veri tipine dönüştür
df['neighborhood'] = df['neighborhood'].astype('category') # neighborhood sütununu kategorik veri tipine dönüştür
df['room'] = df['room'].astype('int') # room sütununu tamsayı veri tipine dönüştür
df['livingroom'] = df['livingroom'].astype('int') # livingroom sütununu tamsayı veri tipine dönüştür
df['area'] = df['area'].astype('int') # area sütununu tamsayı veri tipine dönüştür
df['age'] = df['age'].astype('int') # age sütununu tamsayı veri tipine dönüştür
df['floor'] = df['floor'].astype('int') # floor sütununu tamsayı veri tipine dönüştür
df['price'] = df['price'].astype('int') # price sütununu tamsayı veri tipine dönüştür

In [5]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9452 entries, 0 to 9451
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   city          9452 non-null   category
 1   district      9452 non-null   category
 2   neighborhood  9452 non-null   category
 3   room          9452 non-null   int32   
 4   livingroom    9452 non-null   int32   
 5   area          9452 non-null   int32   
 6   age           9452 non-null   int32   
 7   floor         9452 non-null   int32   
 8   price         9452 non-null   int32   
dtypes: category(3), int32(6)
memory usage: 359.8 KB
None


Nümerik değişkenlerin minimum, maximum ve çeyreklik değerlerinin bulunması

In [6]:
columns = df.select_dtypes(include=[np.number]).columns
min_values = []
max_values = []
for column in columns:
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    min_value = Q1 - 1.5 * IQR
    max_value = Q3 + 1.5 * IQR
    min_values.append(min_value)
    max_values.append(max_value)
    print(f"{column} - Min: {min_value}, Max: {max_value}")


room - Min: 0.5, Max: 4.5
livingroom - Min: 1.0, Max: 1.0
area - Min: -7.5, Max: 220.5
age - Min: -26.5, Max: 57.5
floor - Min: -3.5, Max: 8.5
price - Min: -9000.0, Max: 63000.0


Aykırı Değerlerin Temizlenmesi

In [7]:
for i, column in enumerate(columns):
    print(column, min_values[i], max_values[i])



room 0.5 4.5
livingroom 1.0 1.0
area -7.5 220.5
age -26.5 57.5
floor -3.5 8.5
price -9000.0 63000.0


In [8]:
for i, column in enumerate(columns):
    df = df[(df[column] >= min_values[i]) & (df[column] <= max_values[i])]

In [9]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 7652 entries, 0 to 9451
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   city          7652 non-null   category
 1   district      7652 non-null   category
 2   neighborhood  7652 non-null   category
 3   room          7652 non-null   int32   
 4   livingroom    7652 non-null   int32   
 5   area          7652 non-null   int32   
 6   age           7652 non-null   int32   
 7   floor         7652 non-null   int32   
 8   price         7652 non-null   int32   
dtypes: category(3), int32(6)
memory usage: 368.5 KB
None


In [10]:
print(df.describe())

              room  livingroom        area          age        floor  \
count  7652.000000      7652.0  7652.00000  7652.000000  7652.000000   
mean      2.202039         1.0   104.44276    16.690016     2.545217   
std       0.847212         0.0    36.73992    12.895895     1.839791   
min       1.000000         1.0    20.00000     0.000000    -3.000000   
25%       2.000000         1.0    75.00000     5.000000     1.000000   
50%       2.000000         1.0   100.00000    15.000000     2.000000   
75%       3.000000         1.0   130.00000    27.000000     4.000000   
max       4.000000         1.0   220.00000    55.000000     8.000000   

              price  
count   7652.000000  
mean   25580.608991  
std    11028.641160  
min      625.000000  
25%    17500.000000  
50%    23900.000000  
75%    30625.000000  
max    63000.000000  


In [None]:
df.to_csv('hepsiemlak_cleaned_final.csv', index=False)  # Temizlenmiş veriyi kaydet