In [9]:
import pandas as pd
import matplotlib.pyplot as plt

df_bcn = pd.read_csv("../data/bcn_listings.csv", encoding="latin1")
df_mad = pd.read_csv("../data/mad_listings.csv", encoding="latin1")

In [10]:
def clean_for_host_analysis(df, city_name):
    df = df.copy()
    # Relevant columns to answer question 1
    cols = ["host_id", "id"]
    df = df[cols]
    # Eliminate rows with
    df = df.dropna(subset=["host_id", "id"])
    # Convert host_id to string type to avoid problems
    df["host_id"] = df["host_id"].astype(str)
    # Aggregate a city column to compare both dataframes
    df["city"] = city_name
    return df


df_bcn_clean = clean_for_host_analysis(df_bcn, "Barcelona")
df_mad_clean = clean_for_host_analysis(df_mad, "Madrid")


In [11]:
df_all = pd.concat([df_bcn_clean, df_mad_clean], ignore_index=True)


In [12]:
# Number of listing per host in each city
listings_per_host = df_all.groupby(["city", "host_id"]).size().reset_index(name="n_listings")

In [13]:
# Sort hosts from more properties to less
top_hosts = listings_per_host.sort_values(by=["city", "n_listings"], ascending=[True, False])
top_hosts.head(10)

Unnamed: 0,city,host_id,n_listings
3244,Barcelona,346367515,522
3333,Barcelona,357946540,423
805,Barcelona,1447144,355
1888,Barcelona,221480824,237
2989,Barcelona,32037490,228
4031,Barcelona,4459553,205
2396,Barcelona,265193861,164
4045,Barcelona,447375626,162
2765,Barcelona,299462,158
4530,Barcelona,504121118,139


In [14]:
# Proportion of the market controlled by top hosts
def top_host_concentration(df, top_n=10):
    df_city = df.copy()
    total = len(df_city)
    
    top_total = df_city.nlargest(top_n, "n_listings")["n_listings"].sum()
    
    return round((top_total / total) * 100, 2)


print("Barcelona:", top_host_concentration(listings_per_host[listings_per_host["city"]=="Barcelona"]), "%")
print("Madrid:", top_host_concentration(listings_per_host[listings_per_host["city"]=="Madrid"]), "%")


Barcelona: 39.07 %
Madrid: 22.73 %


In [15]:
# Create graph 3

# Data for each city
bcn_hosts = listings_per_host[listings_per_host["city"] == "Barcelona"]["n_listings"]
mad_hosts = listings_per_host[listings_per_host["city"] == "Madrid"]["n_listings"]

plt.figure(figsize=(10, 6))

# Overlapping histograms
plt.hist(bcn_hosts, bins=30, alpha=0.6, label="Barcelona")
plt.hist(mad_hosts, bins=30, alpha=0.6, label="Madrid")

plt.title("Distribution of the number of properties per host: Barcelona vs Madrid")
plt.xlabel("Number of properties per host")
plt.ylabel("Number of hosts")
plt.legend()

# Save in en reports/graph3.png
plt.savefig("../reports/graph3.png", dpi=300, bbox_inches="tight")

plt.close()



In [16]:
# Generate a conclusion based on the distribution of listings per host

# Average listings per host
bcn_mean = bcn_hosts.mean()
mad_mean = mad_hosts.mean()

# Max listings per host
bcn_max = bcn_hosts.max()
mad_max = mad_hosts.max()

# Number of hosts with more than 5 properties
bcn_big = (bcn_hosts > 5).sum()
mad_big = (mad_hosts > 5).sum()

conclusion = f"""
Conclusion on Host Concentration in Barcelona vs. Madrid

The analysis compares how listings are distributed among hosts in both cities.
Barcelona hosts have an average of {bcn_mean:.2f} listings, while Madrid hosts have an
average of {mad_mean:.2f}. The most active host in Barcelona manages {bcn_max} listings,
compared to {mad_max} in Madrid.

In terms of high-volume hosts (those with more than 5 properties), Barcelona has
{bcn_big} such hosts, whereas Madrid has {mad_big}. This suggests that {"Barcelona"
if bcn_mean > mad_mean else "Madrid"} tends to have a higher concentration of listings
in the hands of fewer hosts.

Overall, the distribution indicates that the market in {"Barcelona" if bcn_max > mad_max else "Madrid"}
shows a slightly stronger tendency toward host concentration.
"""

print(conclusion)



Conclusion on Host Concentration in Barcelona vs. Madrid

The analysis compares how listings are distributed among hosts in both cities.
Barcelona hosts have an average of 2.85 listings, while Madrid hosts have an
average of 2.28. The most active host in Barcelona manages 522 listings,
compared to 377 in Madrid.

In terms of high-volume hosts (those with more than 5 properties), Barcelona has
498 such hosts, whereas Madrid has 564. This suggests that Barcelona tends to have a higher concentration of listings
in the hands of fewer hosts.

Overall, the distribution indicates that the market in Barcelona
shows a slightly stronger tendency toward host concentration.

