In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from folium.plugins import HeatMap
import folium

# Load the datasets
listings_df = pd.read_csv('/Users/harshapatel/downloads/Seattle_Airbnb/listings.csv')
reviews_df = pd.read_csv('/Users/harshapatel/downloads/Seattle_Airbnb/reviews.csv')
calendar_df = pd.read_csv('/Users/harshapatel/downloads/Seattle_Airbnb/calendar.csv')

# Preprocess data
## Convert price to numeric
calendar_df['price'] = calendar_df['price'].str.replace('[\$,]', '', regex=True).astype(float)
listings_df['price'] = listings_df['price'].str.replace('[\$,]', '', regex=True).astype(float)

## Handle missing values and convert data types as necessary
listings_df['review_scores_rating'] = listings_df['review_scores_rating'].fillna(listings_df['review_scores_rating'].mean())

# Visualizations
## Price Distribution by Property Type
plt.figure(figsize=(12, 8))
sns.boxplot(x='price', y='property_type', data=listings_df, order=listings_df['property_type'].value_counts().index)
plt.title('Price Distribution by Property Type')
plt.xlabel('Price ($)')
plt.ylabel('Property Type')
plt.xlim(0, 600)  # Limiting x-axis to remove extreme outliers for better visualization
plt.show()

## Geographic Distribution of Listings (Heatmap)
map = folium.Map(location=[listings_df['latitude'].mean(), listings_df['longitude'].mean()], zoom_start=12)
HeatMap(listings_df[['latitude', 'longitude']], radius=10).add_to(map)
map.save('listings_heatmap.html')

## Average Price by Property Type
average_price_by_property_type = listings_df.groupby('property_type')['price'].mean().sort_values(ascending=False)
plt.figure(figsize=(12, 8))
average_price_by_property_type.plot(kind='bar', color='cadetblue')
plt.title('Average Price by Property Type')
plt.xlabel('Property Type')
plt.ylabel('Average Price ($)')
plt.xticks(rotation=45, ha='right')
plt.show()

## Distribution of Review Scores
plt.figure(figsize=(10, 6))
sns.histplot(listings_df['review_scores_rating'].dropna(), bins=20, kde=True, color='skyblue')
plt.title('Distribution of Review Scores Rating')
plt.xlabel('Review Score Rating')
plt.ylabel('Number of Listings')
plt.grid(axis='y', linestyle='--')
plt.show()

