In [None]:
# NYC Airbnb Analysis

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization styles
sns.set(style="whitegrid")

# Load the data
data = pd.read_csv('Data/AB_NYC_2019.csv')

# Display the first few rows
data.head()


## Data Overview and Cleaning

Before diving into the analysis, let's check for any missing values, review the data types, and ensure the data is ready for analysis.


In [None]:
# Check for missing values
data.isnull().sum()

# Drop unnecessary columns (e.g., 'name' if it's not needed for analysis)
data.drop(['name'], axis=1, inplace=True)

# Fill or drop missing values for relevant columns
data['reviews_per_month'].fillna(0, inplace=True)
data['last_review'].fillna('No Review', inplace=True)

# Check data types
data.info()


### Summary Statistics for Price, Minimum Nights, and Availability

This section provides summary statistics for the main metrics of interest: `price`, `minimum_nights`, and `availability_365`.


In [None]:
# Summary statistics
data[['price', 'minimum_nights', 'availability_365']].describe()


### Average Price by Neighborhood

Visualizing the average price by neighborhood helps identify which areas command the highest and lowest prices, providing insights into potential profitability.


In [None]:
# Average price by neighborhood
avg_price_neighborhood = data.groupby('neighbourhood_group')['price'].mean().sort_values()

# Plot
plt.figure(figsize=(10, 6))
sns.barplot(x=avg_price_neighborhood.index, y=avg_price_neighborhood.values, palette="viridis")
plt.title('Average Price by Neighborhood')
plt.xlabel('Neighborhood')
plt.ylabel('Average Price ($)')
plt.show()


In [None]:
### Distribution of Room Types

Examining the distribution of room types shows what types of accommodations are most common on Airbnb in NYC.


In [None]:
# Room type distribution
plt.figure(figsize=(8, 5))
sns.countplot(data=data, x='room_type', palette="coolwarm")
plt.title('Room Type Distribution')
plt.xlabel('Room Type')
plt.ylabel('Number of Listings')
plt.show()


In [None]:
### Availability Analysis by Price Range

Breaking down availability by price range helps us understand if more expensive listings are available as often as budget-friendly ones.


In [None]:
# Define price ranges and categorize
price_bins = [0, 50, 150, 500, 1000, data['price'].max()]
price_labels = ['Low', 'Medium', 'High', 'Very High', 'Luxury']
data['price_range'] = pd.cut(data['price'], bins=price_bins, labels=price_labels)

# Average availability by price range
availability_by_price = data.groupby('price_range')['availability_365'].mean()

# Plot
plt.figure(figsize=(8, 5))
sns.barplot(x=availability_by_price.index, y=availability_by_price.values, palette="magma")
plt.title('Average Availability by Price Range')
plt.xlabel('Price Range')
plt.ylabel('Average Availability (days)')
plt.show()


### Seasonal Review Trends

Analyzing reviews by month can highlight seasonality, indicating peak tourist seasons for NYC Airbnb listings.


In [None]:
# Convert last_review to datetime
data['last_review'] = pd.to_datetime(data['last_review'], errors='coerce')

# Extract the month from last_review
data['review_month'] = data['last_review'].dt.month

# Count of reviews per month
monthly_reviews = data['review_month'].value_counts().sort_index()

# Plot
plt.figure(figsize=(10, 6))
sns.lineplot(x=monthly_reviews.index, y=monthly_reviews.values, marker='o')
plt.title('Monthly Review Counts')
plt.xlabel('Month')
plt.ylabel('Number of Reviews')
plt.xticks(range(1, 13))
plt.show()


### Top Hosts by Number of Listings

Identifying hosts with the most listings helps reveal larger property managers or more active hosts in the market.


In [None]:
# Top 10 hosts by number of listings
top_hosts = data['host_id'].value_counts().head(10)

# Plot
plt.figure(figsize=(10, 6))
sns.barplot(x=top_hosts.index, y=top_hosts.values, palette="inferno")
plt.title('Top 10 Hosts by Number of Listings')
plt.xlabel('Host ID')
plt.ylabel('Number of Listings')
plt.show()


In [None]:
## Conclusion

This analysis revealed several key insights into the NYC Airbnb market:
- **Average Price by Neighborhood**: Manhattan has the highest average price, while the Bronx is more budget-friendly.
- **Room Type Distribution**: Entire homes are the most popular room type, followed by private rooms.
- **Availability Trends by Price**: Luxury listings have lower availability, indicating demand, while budget listings are generally more available.
- **Seasonal Demand**: Monthly review trends suggest peak demand in summer months.

These insights can guide Airbnb hosts, investors, and guests in understanding the dynamics of NYC’s short-term rental market.
