# Real Estate Price Analysis - EDA
Exploratory Data Analysis on a real dataset from Emlakjet (Istanbul).

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
sns.set(style="whitegrid")

df = pd.read_csv("Real Estate in ISTANBUL (Emlakjet).csv")
df = df[["İl", "İlçe", "Mahalle", "Oda_Sayısı", "Brüt_Metrekare", "Binanın_Yaşı",
         "Bulunduğu_Kat", "Banyo_Sayısı", "Fiyatı"]].dropna()
df.columns = ["city", "district", "neighborhood", "number_of_rooms", "area_sqm",
              "building_age", "floor", "number_of_bathrooms", "price"]
df["room_numeric"] = df["number_of_rooms"].str.extract(r"(\d+)").astype(float)
df["number_of_bathrooms"] = pd.to_numeric(df["number_of_bathrooms"], errors="coerce")
df.dropna(inplace=True)

## Descriptive Statistics

In [None]:
df.describe()

## Correlation Matrix

In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap="coolwarm")
plt.title("Correlation Matrix")
plt.show()

## Price Distribution

In [None]:
plt.figure(figsize=(10,5))
sns.histplot(df['price'], bins=50, kde=True)
plt.title("Price Distribution")
plt.xlabel("Price (TL)")
plt.show()

## Average Price by Number of Rooms

In [None]:
plt.figure(figsize=(10,5))
sns.barplot(data=df, x="number_of_rooms", y="price", estimator=np.mean, ci=None)
plt.title("Average Price by Number of Rooms")
plt.xticks(rotation=45)
plt.show()

## Bonus: Average Price by City

In [None]:
plt.figure(figsize=(10, 5))
sns.barplot(data=df, x="city", y="price", estimator=np.mean, ci=None)
plt.title("Average Price by City")
plt.xlabel("City")
plt.ylabel("Average Price (TL)")
plt.tight_layout()
plt.show()

## Bonus: Area vs Price (Scatter Plot)

In [None]:
plt.figure(figsize=(10, 5))
sns.scatterplot(data=df, x="area_sqm", y="price", alpha=0.6)
plt.title("Area vs Price")
plt.xlabel("Area (sqm)")
plt.ylabel("Price (TL)")
plt.tight_layout()
plt.show()

## Bonus: Price by Number of Bathrooms (Boxplot)

In [None]:
plt.figure(figsize=(10, 5))
sns.boxplot(data=df, x="number_of_bathrooms", y="price")
plt.title("Price by Number of Bathrooms")
plt.xlabel("Number of Bathrooms")
plt.ylabel("Price (TL)")
plt.tight_layout()
plt.show()

## Bonus: Building Age Distribution

In [None]:
df["building_age_grouped"] = df["building_age"].replace({
    "0-1": "0-5", "2": "0-5", "3-4": "0-5", "5-10": "5-10",
    "11-15": "11-20", "16-20": "11-20", "21 Ve Üzeri": "21+"
})
plt.figure(figsize=(10, 5))
sns.countplot(data=df, x="building_age_grouped", order=["0-5", "5-10", "11-20", "21+"])
plt.title("Building Age Distribution")
plt.xlabel("Building Age Group")
plt.ylabel("Number of Listings")
plt.tight_layout()
plt.show()