In [None]:
# Import necessary libraries for data manipulation and visualization
import pandas as pd       
import numpy as np        
import matplotlib.pyplot as plt  
import seaborn as sns    

# Load the dataset from a CSV file (update the file path as needed)
df = pd.read_csv("/Users/anilyigitsel/Documents/personal/dev/istanbul-rental-analysis/data/istanbul_rental_apartments.csv") 

# Display general information about the dataset (column types, non-null counts)
df.info()

# Get descriptive statistics (e.g., mean, std, min, max) for numerical columns
df.describe()

# Preview the first 5 rows of the dataset
df.head()

# Remove rows with missing values to ensure clean data for analysis
data = df.dropna()

In [None]:
# Calculate and display the average rental price by district, sorted from highest to lowest.

average_price_by_district = df.groupby('district')['price'].mean().sort_values(ascending=False)

plt.figure(figsize=(12, 8))
average_price_by_district.plot(kind='bar', color='skyblue', edgecolor='black')

plt.title('Average Rental Prices by District in Istanbul', fontsize=16)
plt.xlabel('Districts', fontsize=12)
plt.ylabel('Average Rental Price (TRY)', fontsize=12)

plt.xticks(rotation=90)  
plt.grid(True)
plt.show()

In [None]:
# Calculate and display the average rental price by floor, sorted from highest to lowest.

average_price_by_floor = df.groupby('floor')['price'].mean().sort_values(ascending=False)

plt.figure(figsize=(12, 8))
average_price_by_floor.plot(kind='bar', color='skyblue', edgecolor='black')

plt.title('Average Rental Prices by Floor in Istanbul', fontsize=16)
plt.xlabel('Floor', fontsize=12)
plt.ylabel('Average Rental Price (TRY)', fontsize=12)

plt.xticks(rotation=90)  
plt.grid(True)
plt.show()

In [None]:
# Calculate and display the average price per square meter by district, sorted from lowest to highest.

df['price_per_m2'] = df['price'] / df['area (m2)']

avg_price_per_m2_by_district = df.groupby('district')['price_per_m2'].mean().sort_values(ascending=True)

plt.figure(figsize=(12, 8))
avg_price_per_m2_by_district.plot(kind='barh', color='skyblue', edgecolor='black')

plt.title('Average Price Per Square Meter by District in Istanbul', fontsize=16)
plt.xlabel('Average Price Per Square Meter (TRY)', fontsize=12)
plt.ylabel('District', fontsize=12)
plt.grid(True)
plt.show()

In [None]:
# Calculate and display the average building age by district, sorted from oldest to youngest.

avg_age_by_district = df.groupby('district')['age'].mean().sort_values(ascending=False)

plt.figure(figsize=(12, 8))
avg_age_by_district.plot(kind='barh', color='skyblue', edgecolor='black')

plt.title('Average Building Age by District', fontsize=16)
plt.xlabel('Average Age of Buildings (Years)', fontsize=12)
plt.ylabel('District', fontsize=12)

plt.grid(True)
plt.show()

In [None]:
# Remove unnecessary spaces from district names
df['district'] = df['district'].str.strip()  

# Define Anatolia and Europe districts
anatolia = [
    'Kadıköy', 'Maltepe', 'Kartal', 'Pendik', 'Tuzla', 
    'Üsküdar', 'Ataşehir', 'Ümraniye', 'Sancaktepe', 
    'Sultanbeyli', 'Çekmeköy', 'Adalar', 'Şile', 'Beykoz'
]

europe = [
    'Bakırköy', 'Fatih', 'Beyoğlu', 'Bahçelievler', 'Arnavutköy', 
    'Beylikdüzü', 'Avcılar', 'Esenyurt', 'Eyüpsultan', 'Beşiktaş', 
    'Şişli', 'Başakşehir', 'Kağıthane', 'Küçükçekmece', 'Sarıyer', 
    'Zeytinburnu', 'Büyükçekmece', 'Bağcılar', 'Güngören', 
    'Gaziosmanpaşa', 'Bayrampaşa', 'Silivri', 'Sultangazi', 'Esenler'
]


df['region'] = df['district'].apply(lambda x: 'Anatolia' if x in anatolia else ('Europe' if x in europe else 'Unknown')) 

In [None]:
# Calculate and display the average rent price by region, comparing Anatolia and Europe

avg_rent_anatolia = df[df['region'] == 'Anatolia']['price'].mean()  
avg_rent_europe = df[df['region'] == 'Europe']['price'].mean()  
plt.figure(figsize=(8, 5)) 
plt.bar(['Anatolia', 'Europe'], [avg_rent_anatolia, avg_rent_europe], color=['blue', 'green'])  

plt.title('Average Rent Price Comparison', fontsize=14)  
plt.xlabel('Region', fontsize=12)  
plt.ylabel('Average Rent Price', fontsize=12)  

plt.show()  

In [None]:
# Calculate and display the average building age by region, comparing Anatolia and Europe

avg_age_by_region = df.groupby('region')['age'].mean()

plt.figure(figsize=(8, 6))  
avg_age_by_region.plot(kind='bar', color=['lightblue', 'salmon'])

plt.title('Average Building Age by Region', fontsize=16)
plt.xlabel('Region', fontsize=12)
plt.ylabel('Average Building Age', fontsize=12)

plt.grid(True, axis='y', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()

In [None]:
# Compare the price per m² between Anatolia and Europe regions

df_anatolia = df[df['region'] == 'Anatolia']
df_europe = df[df['region'] == 'Europe']

avg_price_per_m2_anatolia = df_anatolia['price_per_m2'].mean()
avg_price_per_m2_europe = df_europe['price_per_m2'].mean()

plt.figure(figsize=(8, 6))
plt.bar(['Anatolia', 'Europe'], [avg_price_per_m2_anatolia, avg_price_per_m2_europe], color=['green', 'blue'])
plt.title('Price per m² Comparison between Anatolia and Europe')
plt.xlabel('Region')
plt.ylabel('Average Price per m²')
plt.show()