# Exploratory Data Analysis of Istanbul Airbnb Data

## Import Required Packages

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib as plt
import geopandas as gpd

## Load Data

In [2]:
import_fp = Path("../data/processed/istanbul_airbnb_processed.csv")
airbnb_df = pd.read_csv(import_fp, encoding = "utf-8-sig")

import_fp = Path("../data/processed/istanbul_airbnb_processed_shapefile.shp")
airbnb_gdf = gpd.read_file(import_fp)

import_fp = Path("../data/external/district_income.xlsx")
extra_stats_df = pd.read_excel(import_fp, sheet_name = "main")

import_fp = Path("../data/external/istanbul_districts.shp")
istanbul_districts_gdf = gpd.read_file(import_fp)

## Summary Statistics

### Get column names

In [3]:
column_names = airbnb_df.columns
column_names

Index(['listing_id', 'name', 'host_id', 'host_name', 'district_eng',
       'latitude', 'longitude', 'room_type', 'price', 'district_tr'],
      dtype='object')

### Average Price


In [4]:
airbnb_avg_price = airbnb_df.loc[:,"price"].mean()
airbnb_avg_price

379.7643146796431

### Minimum price and Maximum Price

In [5]:
airbnb_min_max_price = (airbnb_df.loc[:,"price"].min(), airbnb_df.loc[:,"price"].max())
airbnb_min_max_price 

(18, 67609)

### Average rental price per district

In [6]:
airbnb_grouped_by_district = airbnb_df.groupby("district_eng")
airbnb_avg_price_per_district = airbnb_grouped_by_district["price"].mean().sort_values(ascending = False)
airbnb_avg_price_per_district

district_eng
Catalca          976.000000
Silivri          891.542857
Beykoz           871.010417
Sariyer          818.111455
Kucukcekmece     722.720588
Buyukcekmece     647.426357
Sile             570.688312
Fatih            546.076466
Bayrampasa       509.833333
Basaksehir       507.928962
Arnavutkoy       473.600000
Adalar           461.964706
Bagcilar         416.900826
Esenyurt         395.024123
Beyoglu          389.503997
Sisli            371.013299
Zeytinburnu      364.734043
Besiktas         339.360078
Eyup             337.804054
Gaziosmanpasa    322.750000
Bakirkoy         318.975510
Kartal           306.297101
Tuzla            299.301587
Sultanbeyli      295.222222
Uskudar          278.786687
Beylikduzu       277.855422
Atasehir         276.695205
Gungoren         274.875000
Maltepe          244.020690
Esenler          238.947368
Cekmekoy         236.681818
Kagithane        235.238979
Avcilar          212.664062
Kadikoy          212.068602
Umraniye         200.012195
Bahceli

### Average rental price per room type 

In [7]:
airbnb_grouped_by_room_type = airbnb_df.groupby("room_type")
airbnb_avg_price_per_room_type = airbnb_grouped_by_room_type["price"].mean().sort_values(ascending = False)
airbnb_avg_price_per_room_type

room_type
Entire home/apt    473.327796
Private room       303.886878
Shared room        163.501859
Name: price, dtype: float64

## Spatial Distribution

### How many rentals in each district?

In [8]:
airbnb_listing_count_per_district = airbnb_df.loc[:,"district_eng"].value_counts()
airbnb_listing_count_per_district

Beyoglu          4879
Sisli            2707
Fatih            2354
Kadikoy          1895
Besiktas         1533
Uskudar           661
Esenyurt          456
Kagithane         431
Sariyer           323
Atasehir          292
Maltepe           290
Bakirkoy          245
Bahcelievler      203
Basaksehir        183
Adalar            170
Umraniye          164
Pendik            163
Eyup              148
Kartal            138
Kucukcekmece      136
Buyukcekmece      129
Avcilar           128
Bagcilar          121
Beykoz             96
Zeytinburnu        94
Beylikduzu         83
Sile               77
Gungoren           64
Tuzla              63
Gaziosmanpasa      60
Cekmekoy           44
Silivri            35
Sancaktepe         34
Bayrampasa         24
Esenler            19
Sultanbeyli        18
Sultangazi         17
Arnavutkoy         10
Catalca             8
Name: district_eng, dtype: int64

## Bivariate Analysis