In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<div style="
    background-color: #FFA500; 
    color: purple; 
    padding: 15px; 
    border-radius: 10px; 
    text-align: center; 
    font-size: 28px; 
    font-weight: bold;
">
    Impact of Regulation on AirBnB business in Paris
</div>


![Paris Airbnb View](https://media.cntraveller.com/photos/675aff845b9d1f653f9dadab/master/w_1920%2Cc_limit/paris-airbnb%2520paris-dec24.jpeg)

This analysis explores the evolution of Airbnb in Paris, examining how regulations, locality trends, and pricing dynamics have shaped host participation and customer demand.

## Executive Summary

- **Rapid Growth Pre-2017**: Airbnb in Paris expanded quickly, with a surge of new hosts entering the market until 2016.  
- **Regulatory Shift in 2017**: The introduction of stricter rental regulations slowed new host entries, reducing supply growth.  
- **Resilient Demand**: Traveler demand remained strong, leading to a supply-demand gap and upward pressure on prices.  
- **Key Opportunities**: Success now depends on focusing on premium localities, implementing dynamic pricing, and adapting to regulatory constraints.  


In [None]:
review_dict = pd.read_csv(r"/kaggle/input/airbnb-listings-reviews/Airbnb Data/Reviews_data_dictionary.csv")
review_dict

In [None]:
review = pd.read_csv(r"/kaggle/input/airbnb-listings-reviews/Airbnb Data/Reviews.csv")
review.head()

<div style="
    background-color: #FFA500; 
    color: purple; 
    padding: 15px; 
    border-radius: 10px; 
    text-align: center; 
    font-size: 28px; 
    font-weight: bold;
">
    Importing Data and Exploring Column Information
</div>


In [None]:
listings_dict = pd.read_csv(r"/kaggle/input/airbnb-listings-reviews/Airbnb Data/Listings_data_dictionary.csv")
listings_dict

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
listings = pd.read_csv(r"/kaggle/input/airbnb-listings-reviews/Airbnb Data/Listings.csv",
                      encoding = "ISO-8859-1",low_memory=False)
listings.head()

In [None]:
listings.info()

In [None]:
listings["city"].value_counts()

<div style="
    background-color: #FFA500; 
    color: purple; 
    padding: 15px; 
    border-radius: 10px; 
    text-align: center; 
    font-size: 28px; 
    font-weight: bold;
">
    Filtering rows to show only Paris data along with relevant columns
</div>


In [None]:
paris_listings = listings[listings["city"]=="Paris"]
paris_listings = paris_listings.loc[:,["listing_id","host_since","neighbourhood","city","accommodates","price"]]
paris_listings.info()

In [None]:
paris_listings.isna().sum()

In [None]:
paris_listings.head()

<div style="
    background-color: #FFA500; 
    color: purple; 
    padding: 15px; 
    border-radius: 10px; 
    text-align: center; 
    font-size: 28px; 
    font-weight: bold;
">
    Removing nulls
</div>


In [None]:
paris_listings.dropna(how = "any",inplace = True)

In [None]:
paris_listings.info()

In [None]:
paris_listings["host_since"] = pd.to_datetime(paris_listings["host_since"])

In [None]:
paris_listings.info()

In [None]:
paris_listings.describe()

<div style="
    background-color: #FFA500; 
    color: purple; 
    padding: 15px; 
    border-radius: 10px; 
    text-align: center; 
    font-size: 28px; 
    font-weight: bold;
">
    Removing zeros since they indicate missing data
</div>


In [None]:
paris_listings = paris_listings[(paris_listings["accommodates"]!=0) | (paris_listings["price"]!=0)]

In [None]:
paris_listings.info()

In [None]:
paris_listings.describe()

In [None]:
paris_listings = paris_listings[(paris_listings["price"]!=0)]

In [None]:
paris_listings.describe()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

<div style="
    background-color: #FFA500; 
    color: purple; 
    padding: 15px; 
    border-radius: 10px; 
    text-align: center; 
    font-size: 28px; 
    font-weight: bold;
">
    Detecting Outliers
</div>


In [None]:
sns.boxplot(paris_listings["price"])

In [None]:
sns.boxplot(paris_listings["accommodates"])

In [None]:
sns.histplot(paris_listings["price"],kde = True)

In [None]:
upper_limit = np.mean(paris_listings["price"]) + 3*np.std(paris_listings["price"])
upper_limit

<div style="
    background-color: #FFA500; 
    color: purple; 
    padding: 15px; 
    border-radius: 10px; 
    text-align: center; 
    font-size: 28px; 
    font-weight: bold;
">
    Removing Properties with Exponentially high prices, 3 Standard Deviation Away
</div>


In [None]:
paris_listings = paris_listings[(paris_listings["price"]<=upper_limit)]

In [None]:
paris_listings.describe()

### Data Cleaning Summary

- **Initial entries:** 64,690  
- **Final entries after cleaning:** 64,192  
- **Entries removed:** 498  

This means that a total of **498 records** were removed during the cleaning process,  
which accounts for only **~0.77% of the dataset**.  

The cleaning process therefore preserved the vast majority of data,  
while ensuring that inconsistent or invalid records were excluded.


In [None]:
paris_listings.describe(include = "object")

In [None]:
paris_listings["neighbourhood"].value_counts()

In [None]:
paris_listings_avg_price = paris_listings.groupby("neighbourhood").agg({"price":"mean"})
paris_listings_avg_price.sort_values("price",ascending = False,inplace = True)

<div style="
    background-color: #FFA500; 
    color: purple; 
    padding: 15px; 
    border-radius: 10px; 
    text-align: center; 
    font-size: 28px; 
    font-weight: bold;
">
    Looking at the Most relevant Localities in Paris
</div>


In [None]:
paris_listings_avg_price

In [None]:
plt.figure(figsize = (12,8))
sns.barplot(data = paris_listings_avg_price,
           x = "price",
           y = paris_listings_avg_price.index, color = "#00A699")
plt.title("Neighbourhoods in Paris (Premium Localities at the Top!)")
plt.xlabel("How Expensive? (Euros)")
plt.ylabel("Prominent Localities with AirBnBs")
plt.show()

### Insight on Average Nightly Prices by Locality in Paris

- The **most expensive areas** are central and prestigious districts such as **Elysee (€170/night)**, **Louvre (€150/night)**, and **Luxembourg (€144/night)**. These neighborhoods are close to iconic landmarks, luxury shopping, and government or cultural institutions, which drives up demand and prices.  

- Mid-range prices are seen in areas like **Temple (€126/night)**, **Pantheon (€118/night)**, and **Opera (€114/night)**. These are still popular, but slightly less exclusive compared to the top-tier localities.  

- The **more affordable neighborhoods** include **Menilmontant (€72/night)**, **Buttes-Chaumont (€73/night)**, and **Buttes-Montmartre (€81/night)**. These districts are further from the tourist-heavy city center but may appeal to budget-conscious travelers or those seeking a more local experience.  

**Overall:** The price gradient reflects proximity to cultural hotspots and affluence of the locality. Central, historic, and tourist-heavy areas command higher prices, while outer neighborhoods offer budget-friendly options.


<div style="
    background-color: #FFA500; 
    color: purple; 
    padding: 15px; 
    border-radius: 10px; 
    text-align: center; 
    font-size: 28px; 
    font-weight: bold;
">
    Does Price Change as per number of Guests?
</div>


In [None]:
paris_listings_accomodates = paris_listings.groupby("accommodates").agg({"price":"mean"})
paris_listings_accomodates.sort_values("price",ascending = False,inplace = True)

In [None]:
paris_listings_accomodates

### Insight on Accommodation Capacity and Average Prices

- As expected, listings that accommodate **more guests** tend to have **higher nightly prices**.  
  - Small units for **1–2 guests** are the cheapest, averaging around **€56–79/night**.  
  - Medium-sized listings for **4–6 guests** range from **€118–198/night**.  
  - Large properties hosting **10+ guests** are much more expensive, often exceeding **€300/night**.

- The **highest average prices** are for large group accommodations:  
  - **13 guests: €475/night**  
  - **14 guests: €438/night**  
  - **11 guests: €420/night**  

- Interestingly, beyond a certain point (e.g., 15–16 guests), the average price does not continue to rise proportionally, suggesting a niche but limited demand for **very large properties**.

**Overall:** Prices scale with accommodation size, but the relationship is not perfectly linear. Small apartments dominate budget travel, while large listings cater to premium or group stays, commanding much higher rates.


In [None]:
plt.figure(figsize=(12,8))
sns.scatterplot(x="accommodates", y="price", data=paris_listings_accomodates, s=100, color="blue")

# Add a trendline
sns.regplot(x=paris_listings_accomodates.index, y="price", data=paris_listings_accomodates,
            scatter=False, color="red", line_kws={"linewidth":2})

plt.title("Average Airbnb Price vs. Number of Guests Accommodated", fontsize=14, fontweight="bold")
plt.xlabel("Number of Guests (Accommodates)")
plt.ylabel("Average Price (€)")
plt.grid(alpha=0.3)
plt.show()

In [None]:
plt.figure(figsize = (12,8))
sns.barplot(data = paris_listings_accomodates,
           x = "price",
           y = paris_listings_accomodates.index, color = "#00A699",
           orient = "h",order = paris_listings_accomodates.index)
plt.title("Average Price as per Accomodates allowed")
plt.xlabel("How Expensive? (Euros)")
plt.ylabel("Number of Guests")
plt.show()

<div style="
    background-color: #FFA500; 
    color: purple; 
    padding: 15px; 
    border-radius: 10px; 
    text-align: center; 
    font-size: 28px; 
    font-weight: bold;
">
    Extracting Year to see time series Analysis of Price and New Host Joining
</div>


In [None]:
paris_listings["year_of_listing"] = paris_listings["host_since"].dt.year

### Why Use the *host_since* Column?

The **`host_since`** column records the date when a host first listed their property on Airbnb.  
Analyzing this column can provide valuable business insights because:

- **Market Entry Timing**: It shows when new hosts are joining the platform, which reflects Airbnb’s growth phases.  
- **Host Experience**: Longer tenure may correlate with more reliable hosts and better reviews, whereas recent hosts might need onboarding support.  
- **Business Cycles**: Peaks in new host sign-ups can indicate favorable periods for the short-term rental business (e.g., tourism booms, policy changes, or high-demand seasons).  
- **Impact of Regulations**: Sudden drops in new host listings may highlight external factors such as regulations or economic conditions affecting supply.  

In short, **tracking the `host_since` column helps identify the best times when the Airbnb business environment was most attractive for new entrants**, which can act as a proxy for "good times" in the market.


In [None]:
paris_listings.head()

In [None]:
paris_listings_final_analysis = paris_listings.groupby("year_of_listing").agg({
    "listing_id":"count","price":"mean"
})
paris_listings_final_analysis

In [None]:
plt.figure(figsize=(12,6))

plt.plot(paris_listings_final_analysis.index, paris_listings_final_analysis['listing_id'], 
         marker='o', linestyle='-', linewidth=2, color='teal', alpha=0.8)

# Titles and labels
plt.title("Number of New Airbnb Listings in Paris Over Time", fontsize=16, fontweight="bold", pad=15)
plt.xlabel("Year of Listing", fontsize=13)
plt.ylabel("Number of New Listings", fontsize=13)

# Grid and ticks
plt.grid(alpha=0.3, linestyle="--")
plt.xticks(paris_listings_final_analysis.index, rotation=45)
plt.yticks(fontsize=11)
plt.xticks(fontsize=11)

# Highlight regulation year (2017)
plt.axvline(x=2017, color='red', linestyle='--', linewidth=2, alpha=0.7, label="Regulation (2017)")
plt.legend(fontsize=11)

plt.tight_layout()
plt.show()


### Insight on Airbnb Listings Over Time

- From **2008 to 2015**, the number of new Airbnb listings in Paris grew rapidly, peaking around **2015–2016** with more than 12,000 new hosts entering the market. This indicates a period of **high attractiveness and strong growth** for Airbnb, with both **supply (hosts)** and **demand (customers)** expanding significantly.  

- Starting in **2017**, there is a sharp decline in the number of new listings. This coincides with the **regulation imposed in Paris in 2017** that aimed to control short-term rentals due to housing shortages. The regulation clearly discouraged many new hosts from entering the market, leading to a **reduction in supply growth**.  

- The continued decline after 2017 suggests that the **barriers to entry remained high**, keeping potential new hosts from joining. On the demand side, while tourists and customers likely still sought Airbnbs, the **limited supply growth** would have constrained options and potentially increased prices.  

**Overall:**  
- **Pre-2017:** Strong growth in both supply (hosts) and demand (customers).  
- **Post-2017:** Regulation led to a slowdown in host entry, reducing supply expansion, even though customer demand likely remained steady or growing.


In [None]:
plt.figure(figsize=(12,8))

# Line chart with markers
sns.lineplot(
    x="year_of_listing", 
    y="price", 
    data=paris_listings_final_analysis, 
    marker="o", 
    linewidth=2.5, 
    color="teal"
)

# Titles and labels
plt.title("Average Airbnb Prices in Paris Over Time", fontsize=18, fontweight="bold", pad=15)
plt.xlabel("Year of Listing", fontsize=14)
plt.ylabel("Average Price (€)", fontsize=14)

# Grid styling
plt.grid(alpha=0.3, linestyle="--")

# Highlight regulation year (2017)
plt.axvline(x=2017, color="red", linestyle="--", linewidth=2, alpha=0.7, label="Regulation (2017)")

# Improve ticks
plt.xticks(fontsize=12, rotation=45)
plt.yticks(fontsize=12)

# Legend
plt.legend(fontsize=12)

plt.tight_layout()
plt.show()


### Insight on Average Airbnb Prices Over Time

- **2008–2009:** Prices started relatively low (~€78) but spiked sharply in 2009 to over **€140**, possibly reflecting early market scarcity and premium pricing when Airbnb first entered Paris.  

- **2010–2014:** A consistent **downward trend** is visible, with average prices falling from ~€121 to ~€96. This suggests **increasing competition and supply** as more hosts entered the market, leading to price normalization.  

- **2015–2017:** Prices stabilized at just under €100–€101/night, marking a period of equilibrium between supply growth and customer demand.  

- **2018–2020:** Prices rose again to around **€115/night**, despite stricter **2017 regulations**. With supply growth slowing, fewer new listings may have pushed average prices upward as demand remained strong.  

- **2021 onwards:** A decline back toward ~€93 is visible, possibly reflecting **pandemic-related demand shocks** and a temporary oversupply compared to lower travel activity.  

**Overall:**  
- In the **early years**, scarcity kept prices high.  
- As supply expanded, **competition drove prices down**.  
- **Regulation (2017)** limited supply growth, causing a **price rebound**.  
- **Pandemic effects** likely caused the post-2020 decline.  


In [None]:
fig, ax = plt.subplots(figsize=(12,8))

# Plot number of listings (left y-axis)
ax.plot(
    paris_listings_final_analysis.index, 
    paris_listings_final_analysis["listing_id"], 
    color="teal", linewidth=2.5, marker="o", label="Number of Listings"
)
ax.set_xlabel("Year of Listing", fontsize=14)
ax.set_ylabel("Number of New Listings", fontsize=14, color="teal")
ax.tick_params(axis="y", labelcolor="teal")

# Secondary y-axis for price
ax2 = ax.twinx()
ax2.plot(
    paris_listings_final_analysis.index, 
    paris_listings_final_analysis["price"], 
    color="darkorange", linewidth=2.5, marker="s", label="Average Price (€)"
)
ax2.set_ylabel("Average Price (€)", fontsize=14, color="darkorange")
ax2.tick_params(axis="y", labelcolor="darkorange")
ax2.set_ylim(0)

# Titles and grid
plt.title("Airbnb Listings vs. Average Price in Paris (2008–2021)", fontsize=18, fontweight="bold", pad=15)
ax.grid(alpha=0.3, linestyle="--")

# Highlight regulation year (2017)
ax.axvline(x=2017, color="red", linestyle="--", linewidth=2, alpha=0.8, label="Regulation (2017)")

# Shade post-2017 period
ax.axvspan(2017, paris_listings_final_analysis.index.max(), color="red", alpha=0.1)

# Add text inside shaded area
ax.text(
    2017.5, 
    ax.get_ylim()[1]*0.9,   # position text near the top of shaded area
    "Post-regulation era", 
    color="red", fontsize=13, fontweight="bold"
)

# ---- Annotations ----
# Peak in listings (2015–2016)
peak_year = paris_listings_final_analysis["listing_id"].idxmax()
peak_val = paris_listings_final_analysis["listing_id"].max()
ax.annotate(
    f"Peak Listings\n({peak_year}: {peak_val:,})",
    xy=(peak_year, peak_val),
    xytext=(peak_year-2, peak_val+2000),
    arrowprops=dict(arrowstyle="->", color="teal"),
    fontsize=12, color="teal"
)

# Lowest price point (2014)
low_year = paris_listings_final_analysis["price"].idxmin()
low_val = paris_listings_final_analysis["price"].min()
ax2.annotate(
    f"Lowest Price\n({low_year}: €{low_val:.0f})",
    xy=(low_year, low_val),
    xytext=(low_year+1, low_val-15),
    arrowprops=dict(arrowstyle="->", color="darkorange"),
    fontsize=12, color="darkorange"
)

# Price rebound post-2017
ax2.annotate(
    "Price rebound\npost-regulation",
    xy=(2018, paris_listings_final_analysis.loc[2018, "price"]),
    xytext=(2014.5, 130),
    arrowprops=dict(arrowstyle="->", color="darkorange"),
    fontsize=12, color="darkorange"
)

# Legends
lines, labels = ax.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax.legend(lines + lines2, labels + labels2, fontsize=12, loc="upper left")

plt.tight_layout()
plt.show()


## Final Insights and Recommendations

### Key Insights
1. **Strong Growth Pre-2017**  
   - Paris experienced rapid growth in Airbnb listings between 2008–2016, peaking in 2015–2016.  
   - This reflects high demand and strong attractiveness of the short-term rental market.  

2. **Impact of 2017 Regulations**  
   - The number of new hosts dropped sharply after the 2017 regulations that restricted short-term rentals.  
   - While demand from travelers remained steady, the reduced inflow of hosts slowed supply growth.  

3. **Price Dynamics**  
   - Average prices decreased between 2010–2014 due to increased competition from new hosts.  
   - Post-2017, prices rebounded as supply growth slowed, suggesting a supply-demand imbalance.  
   - During the pandemic (2020–2021), prices declined again due to demand shocks.  

4. **Locality-Based Trends**  
   - Central, prestigious areas (Louvre, Elysee, Luxembourg) command premium prices.  
   - Outer neighborhoods (Menilmontant, Buttes-Chaumont) remain more affordable, appealing to budget travelers.  

5. **Accommodation Capacity**  
   - Prices scale with the number of guests a listing can accommodate, but not proportionally.  
   - Large properties (10+ guests) cater to a niche segment, with very high average nightly prices.  

---

### Recommendations for the Business
1. **Focus on High-Demand Localities**  
   - Prioritize investments and partnerships in central, premium areas where demand is strongest and prices are resilient.  
   - Simultaneously, target budget-friendly neighborhoods to attract cost-sensitive travelers.  

2. **Leverage Supply-Demand Gap Post-2017**  
   - With stricter entry for new hosts, existing hosts can capture greater market share.  
   - Encourage quality improvements and unique value-adds (professional photos, amenities, flexible check-in) to stand out.  

3. **Diversify Accommodation Sizes**  
   - Promote medium-capacity listings (3–6 guests) that balance affordability and profitability.  
   - Large properties should be marketed as group or family stays, potentially bundling with experiences.  

4. **Dynamic Pricing Strategy**  
   - Implement data-driven pricing models that account for seasonality, locality, and capacity.  
   - Post-pandemic recovery presents an opportunity to adjust prices as travel demand rebounds.  

5. **Customer-Centric Growth**  
   - Enhance guest experience through verified reviews, safety measures, and customer support.  
   - Focus on repeat customers and long-term stays, especially in regulatory-constrained markets.  

6. **Policy Adaptation**  
   - Stay agile in response to local regulations.  
   - Explore partnerships or compliance-based strategies (e.g., registered professional hosting) to maintain competitiveness.  

---

✅ **Overall:** Paris Airbnb experienced strong growth until 2017, after which regulations constrained supply. Demand has remained steady, pushing prices higher post-regulation. Businesses should focus on **balancing premium and budget offerings, adopting dynamic pricing, and aligning with regulatory frameworks** to remain competitive in this evolving market.


<div style="background-color:#FFA500; padding:25px; border-radius:12px; text-align:center;">
  <h1 style="color:purple;">✅ Thank You</h1>
  <p style="color:purple; font-size:18px;">
    We appreciate your time reviewing this analysis.<br>
    For any questions or discussions, feel free to connect.
  </p>
</div>

<p style="text-align:center; color:gray; font-size:14px; margin-top:10px;">
  Airbnb Paris Analysis | 2025
</p>
