In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 2. Load Dataset
df = pd.read_csv('listings.csv')  # <-- make sure your CSV file is in the same folder

# 3. Quick Look at Data
print(df.shape)
print(df.columns)
print(df.head())

# 4. Data Cleaning

# Clean 'price' column if it exists (remove $ sign and commas, make numeric)
if 'price' in df.columns:
    df['price'] = df['price'].replace({'\$':'', ',':''}, regex=True).astype(float)

df.fillna('Missing', inplace=True)

# 5. Basic Analysis

# Top 10 neighborhoods by listing count
top_neighborhoods = df['neighbourhood_cleansed'].value_counts().head(10)
print(top_neighborhoods)

# Average price by neighborhood
avg_price_neighborhood = df.groupby('neighbourhood_cleansed')['price'].mean().sort_values(ascending=False).head(10)
print(avg_price_neighborhood)

# Plot top neighborhoods
plt.figure(figsize=(10,6))
sns.barplot(x=top_neighborhoods.values, y=top_neighborhoods.index, palette='viridis')
plt.title('Top 10 Neighborhoods by Number of Listings')
plt.xlabel('Number of Listings')
plt.ylabel('Neighborhood')
plt.show()

# Plot average prices
plt.figure(figsize=(10,6))
sns.barplot(x=avg_price_neighborhood.values, y=avg_price_neighborhood.index, palette='magma')
plt.title('Top 10 Neighborhoods by Average Price')
plt.xlabel('Average Price ($)')
plt.ylabel('Neighborhood')
plt.show()

# 7. Save Cleaned Data (Optional)
df.to_csv('airbnb_cleaned.csv', index=False)
