In [None]:
print("Data Visualization with Airbnb NYC Dataset")

In [None]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
sns.set(style='whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)

In [None]:
df = pd.read_csv("C:\\Users\\Hardik\\Desktop\\Data-wrangling-visualization\\Airbnb_Visualization\\AB_NYC_2019.csv")
df.head()

In [None]:
# Check for missing values
df.isnull().sum()

In [None]:
# Drop rows with missing price or name
df = df.dropna(subset=["name", "price"])

In [None]:
# Check for missing values
df.isnull().sum()

In [None]:
# Fill missing reviews with 0
df["reviews_per_month"] = df["reviews_per_month"].fillna(0)

In [None]:
print(df["reviews_per_month"].head(20))

In [None]:
avg_price = df.groupby("neighbourhood_group")["price"].mean().sort_values()

sns.lineplot(x=avg_price.index, y=avg_price.values, marker='o')
plt.title("Average Airbnb Price by Neighborhood Group")
plt.ylabel("Average Price ($)")
plt.xlabel("Neighborhood Group")
plt.tight_layout()


In [None]:
# Ensure the directory exists
os.makedirs("images", exist_ok=True)

In [None]:
# Save the figure
plt.savefig("images/lineplot_avg_price.png")
plt.show()

In [None]:
# Filter for reasonable prices to remove outliers
df_price = df[df['price'] < 500]

sns.histplot(df_price['price'], bins=40, kde=True)
plt.title("Distribution of Airbnb Prices (Under $500)")
plt.xlabel("Price ($)")
plt.tight_layout()
plt.savefig("images/histogram_price_distribution.png")
plt.show()


In [None]:
sns.boxplot(x="room_type", y="price", data=df_price)
plt.title("Price Distribution by Room Type")
plt.xlabel("Room Type")
plt.ylabel("Price ($)")
plt.tight_layout()
plt.savefig("images/boxplot_room_type.png")
plt.show()


In [None]:
# Filter again to remove extreme values
df_scatter = df[(df['price'] < 500) & (df['number_of_reviews'] < 200)]

In [None]:
sns.scatterplot(x='number_of_reviews', y='price', hue='room_type', data=df_scatter)
plt.title("Price vs. Number of Reviews by Room Type")
plt.xlabel("Number of Reviews")
plt.ylabel("Price ($)")
plt.tight_layout()
plt.savefig("images/scatter_price_reviews.png")
plt.show()

In [None]:
num_cols = df[["price", "minimum_nights", "number_of_reviews", "reviews_per_month", "availability_365"]]
corr = num_cols.corr()

sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap of Numerical Features")
plt.tight_layout()
plt.savefig("images/heatmap_correlation.png")
plt.show()


In [None]:
print("✅ All plots generated and saved in 'images/' folder!")
