# 🏠 Airbnb Price Prediction - EDA
This notebook performs initial exploratory data analysis on the Airbnb dataset.

In [None]:
# Setup: Clone repo and set working directory
# This allows anyone opening this notebook in Colab to access all project files
!git clone https://github.com/babakbaradaran/ml-projects.git
%cd ml-projects/01_airbnb_price_regression/

In [None]:
# Step 1: Import libraries
# We import Python libraries that help with:
#- Reading and working with data (`pandas`, `numpy`)
#- Plotting (`matplotlib`, `seaborn`)
#- Setting options to view more columns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', 100)
sns.set(style='whitegrid')

In [None]:
# Step 2: Load the dataset
df = pd.read_csv('data/raw/listings.csv')
df.head()

In [None]:
# Step 3: Data overview
print("Shape:", df.shape)
print("\nData Types:\n", df.dtypes)

In [None]:
# Step 4: Check missing values
df.isnull().sum().sort_values(ascending=False).head(15)

In [None]:
# Step 5: Price distribution
plt.figure(figsize=(10, 5))
sns.histplot(df['price'], bins=100, kde=True)
plt.title('Distribution of Price (Raw)')
plt.xlabel('Price')
plt.show()

In [None]:
# Step 6: Log-transformed price (optional)
df['price_log'] = np.log1p(df['price'])
sns.histplot(df['price_log'], bins=50)
plt.title('Distribution of Log-Transformed Price')
plt.show()