In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings("ignore")
sns.set(style="whitegrid")

# ---------- LOAD DATA ----------
calendar = pd.read_csv("calendar.csv")
listings = pd.read_csv("listings.csv")
reviews = pd.read_csv("reviews.csv")

# ---------- CLEAN ----------
calendar["date"] = pd.to_datetime(calendar["date"], errors="coerce")
calendar["price"] = (
    calendar["price"]
    .replace('[\$,]', '', regex=True)
    .astype(float)
)

# Handle missing availability
calendar["available"] = calendar["available"].map({"t": True, "f": False})

# ---------- VIS 1: PRICE DISTRIBUTION ----------
plt.figure(figsize=(10, 5))
sns.histplot(calendar["price"], bins=50, kde=True, color="steelblue")
plt.title("Price Distribution of Listings")
plt.xlabel("Price per Night (€)")
plt.ylabel("Number of Days")
plt.tight_layout()
plt.savefig("price_distribution.png", dpi=300)
plt.close()

# ---------- VIS 2: AVAILABILITY OVER TIME ----------
availability_trend = (
    calendar.groupby("date")["available"]
    .mean()
    .reset_index()
)

plt.figure(figsize=(12, 5))
sns.lineplot(data=availability_trend, x="date", y="available", color="darkgreen")
plt.title("Average Availability Over Time")
plt.ylabel("Fraction of Available Listings")
plt.xlabel("Date")
plt.tight_layout()
plt.savefig("availability_trend.png", dpi=300)
plt.close()

# ---------- VIS 3: ROOM TYPE SHARE ----------
room_type_share = listings["room_type"].value_counts(normalize=True) * 100
plt.figure(figsize=(6, 6))
room_type_share.plot.pie(autopct="%.1f%%", startangle=90, colors=sns.color_palette("Set2"))
plt.title("Room Type Distribution")
plt.ylabel("")
plt.tight_layout()
plt.savefig("room_type_share.png", dpi=300)
plt.close()

# ---------- VIS 4: HOST CONCENTRATION ----------
top_hosts = (
    listings["host_name"]
    .value_counts()
    .head(10)
    .sort_values(ascending=True)
)

plt.figure(figsize=(8, 5))
top_hosts.plot.barh(color="teal")
plt.title("Top 10 Hosts by Number of Listings")
plt.xlabel("Number of Listings")
plt.tight_layout()
plt.savefig("top_hosts.png", dpi=300)
plt.close()

# ---------- VIS 5: AVG PRICE BY NEIGHBOURHOOD ----------
if "neighbourhood" in listings.columns:
    price_by_neigh = (
        listings.groupby("neighbourhood")["price"]
        .mean()
        .sort_values(ascending=False)
        .head(15)
    )

    plt.figure(figsize=(10, 6))
    price_by_neigh.plot(kind="bar", color="salmon")
    plt.title("Top 15 Neighbourhoods by Average Price")
    plt.ylabel("Average Price (€)")
    plt.tight_layout()
    plt.savefig("avg_price_by_neighbourhood.png", dpi=300)
    plt.close()

print(" Visuals saved: price_distribution.png, availability_trend.png, room_type_share.png, top_hosts.png, avg_price_by_neighbourhood.png")
