<a href="https://colab.research.google.com/github/enkhe/CS506-TP/blob/main/TP01_002_collab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Section 01 - Download the Data


In [None]:
# Download the Data
print("Downloading data.")

import os
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
from urllib.parse import urlparse


class DataLoader:
    """
    Handles downloading and loading of Airbnb datasets for all cities and states in the United States.
    """
    def __init__(self, data_dir: str = 'data'):
        self.data_dir = data_dir
        os.makedirs(self.data_dir, exist_ok=True)

    def get_us_cities(self):
        """
        Scrapes the Inside Airbnb website to get all US cities and states with available data.
        """
        base_url = 'https://insideairbnb.com/get-the-data.html'
        response = requests.get(base_url)
        if response.status_code != 200:
            raise Exception(f"Failed to fetch data from {base_url}")

        soup = BeautifulSoup(response.text, 'html.parser')
        links = soup.find_all('a', href=True)

        us_cities = []
        for link in links:
            href = link['href']
            if 'http' in href and 'united-states' in href:
                # Extract city, state, and country from the link
                city, state, _ = self.extract_city_state_from_url(href)  # Ignore the country value
                if city and state:
                    us_cities.append((city, state, href))

        return us_cities

    def extract_city_state_from_url(self, url):
        """
        Extracts city, state, and country from the given URL.
        Assumes the URL contains country, state, and city information in the format:
        https://data.insideairbnb.com/{Country}/{State}/{City}/{Date}/{DataType}/{DataFile}
        """
        # Parse the URL path
        path = urlparse(url).path
        # Split the path into segments
        segments = path.strip('/').split('/')
        if len(segments) >= 3:
            country = segments[0].replace('-', '').title()  # Convert to "UnitedStates" format
            state = segments[1].upper()  # Convert to "OR" format
            city = segments[2].replace('-', ' ').title()  # Convert to "Portland" format
            return city, state, country
        return None, None, None

    def save_file_to_city_state_folder(self, url, file_content):
        """
        Saves the file content to a folder named after the city, state, and country in the format {CityName}-{State}-{Country}.
        """
        # Extract city, state, and country from the URL
        city, state, country = self.extract_city_state_from_url(url)
        if not city or not state or not country:
            raise ValueError("City, state, or country could not be extracted from the URL.")

        # Create folder name in the format {CityName}-{State}-{Country}
        folder_name = f"{city.replace(' ', '')}-{state}-{country}"
        folder_path = os.path.join(self.data_dir, folder_name)
        os.makedirs(folder_path, exist_ok=True)  # Create the folder if it doesn't exist

        # Save the file in the folder
        file_name = os.path.basename(urlparse(url).path)  # Extract the file name from the URL
        file_path = os.path.join(folder_path, file_name)
        with open(file_path, 'wb') as file:
            file.write(file_content)

        print(f"File saved to: {file_path}")

    def download_data(self, city: str, state: str, file_type: str, download_link: str):
        """
        Downloads the specified type of data file for the city and state.
        """
        print(f"Downloading data for {city}, {state} from {download_link}...")

        response = requests.get(download_link)
        if response.status_code == 200:
            self.save_file_to_city_state_folder(download_link, response.content)
        else:
            print(f"Failed to download data for {city}, {state}")


def main():
    data_loader = DataLoader()

    # Get all US cities and states with available data
    us_cities = data_loader.get_us_cities()
    print(f"Found {len(us_cities)} US cities with available data.")

    # Iterate through all cities and download their data
    for city, state, download_link in us_cities:
        try:
            data_loader.download_data(city, state, 'listings', download_link)
        except Exception as e:
            print(f"Error downloading data for {city}, {state}: {e}")


if __name__ == '__main__':
    main()



print("Done! Downloaded data.")

Downloading data.
Found 238 US cities with available data.
Downloading data for Albany, NY from https://data.insideairbnb.com/united-states/ny/albany/2025-03-02/data/listings.csv.gz...
File saved to: data/Albany-NY-Unitedstates/listings.csv.gz
Downloading data for Albany, NY from https://data.insideairbnb.com/united-states/ny/albany/2025-03-02/data/calendar.csv.gz...
File saved to: data/Albany-NY-Unitedstates/calendar.csv.gz
Downloading data for Albany, NY from https://data.insideairbnb.com/united-states/ny/albany/2025-03-02/data/reviews.csv.gz...
File saved to: data/Albany-NY-Unitedstates/reviews.csv.gz
Downloading data for Albany, NY from https://data.insideairbnb.com/united-states/ny/albany/2025-03-02/visualisations/listings.csv...
File saved to: data/Albany-NY-Unitedstates/listings.csv
Downloading data for Albany, NY from https://data.insideairbnb.com/united-states/ny/albany/2025-03-02/visualisations/reviews.csv...
File saved to: data/Albany-NY-Unitedstates/reviews.csv
Downloading 

# Section 02 - Data Analysis - Price Distributions of Cities

In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas
from shapely.geometry import Point
import os
import glob
from geopy.geocoders import Nominatim
from shapely.geometry import box
from datetime import datetime
import time

# Create the report directory if it doesn't exist
def makeReportDirectory():
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    reportDirectoryName = os.path.join("report", f"exports_report_{timestamp}")
    os.makedirs(reportDirectoryName, exist_ok=True)
    return reportDirectoryName

# Base directory containing city folders
base_data_dir = "data"
report_dir = makeReportDirectory()

# Set font for Matplotlib
plt.rcParams['font.family'] = 'Dejavu Sans'  # Changed font family here

def save_plot(city_name, plot_type):
    """Saves the current Matplotlib plot to the city's report folder with a timestamp."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    city_report_dir = os.path.join(report_dir, city_name)
    os.makedirs(city_report_dir, exist_ok=True)
    filename = os.path.join(city_report_dir, f"{city_name}_{plot_type}_{timestamp}.png")
    plt.savefig(filename)
    plt.close()
    print(f"Saved plot: {filename}")

def clean_data(df, city_name):
    """
    Cleans the input DataFrame to handle missing values and data type issues.

    Args:
        df (pd.DataFrame): The input DataFrame.
        city_name (str): The name of the city.

    Returns:
        pd.DataFrame: The cleaned DataFrame, or None if the data is invalid.
    """
    print(f"\n--- Cleaning data for {city_name} ---")

    # Check for empty DataFrame
    if df.empty:
        print(f"Error: DataFrame for {city_name} is empty.")
        return None

    # Basic column existence check
    required_columns = ['latitude', 'longitude', 'price', 'neighbourhood_group', 'room_type', 'neighbourhood', 'host_name', 'id', 'reviews_per_month', 'availability_365', 'minimum_nights', 'license']
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        print(f"Error: DataFrame for {city_name} is missing the following columns: {missing_columns}")
        return None

    # Convert 'price' to numeric, handling errors
    df['price'] = pd.to_numeric(df['price'], errors='coerce')
    print(f"Number of invalid price values in {city_name}: {df['price'].isnull().sum()}")
    df.dropna(subset=['price'], inplace=True)  # Drop rows where price is NaN after conversion

    # Handle missing values in other columns
    df['neighbourhood_group'].fillna('Unknown', inplace=True)
    df['room_type'].fillna('Unknown', inplace=True)
    df['neighbourhood'].fillna('Unknown', inplace=True)
    df['host_name'].fillna('Unknown', inplace=True)
    df['reviews_per_month'].fillna(0, inplace=True)  # Assume 0 for missing reviews
    df['availability_365'].fillna(0, inplace=True)
    df['minimum_nights'].fillna(1, inplace=True)  # Assume 1 night if missing
    df['license'].fillna('Unknown', inplace=True)

    # Check for valid latitude and longitude ranges
    if not ((-90 <= df['latitude'].min() <= 90) and (-180 <= df['longitude'].min() <= 180) and
            (-90 <= df['latitude'].max() <= 90) and (-180 <= df['longitude'].max() <= 180)):
        print(f"Error: Invalid latitude or longitude values in {city_name}")
        return None

    print(f"Data cleaning for {city_name} complete.")
    return df

# Function to perform the visualizations for a given city's DataFrame
def visualize_city_data(df, city_name):
    print(f"\n--- Generating visualizations for {city_name} ---")


    # Data Cleaning for boxplot (price vs. neighbourhood_group & room_type)
    df_cleaned_boxplot = df.dropna(subset=['neighbourhood_group', 'price', 'room_type']).copy()
    df_cleaned_boxplot['price'] = pd.to_numeric(df_cleaned_boxplot['price'], errors='coerce')
    df_cleaned_boxplot.dropna(subset=['price'], inplace=True)

    # 1. Map of Listings by Neighborhood (Price as Color)
    # ----------------------------------------------------
    # try:
    #     geometry = [Point(xy) for xy in zip(df['longitude'].dropna(), df['latitude'].dropna())]
    #     gdf = geopandas.GeoDataFrame(df.dropna(subset=['longitude', 'latitude']).copy(), geometry=geometry, crs="EPSG:4326")

    #     geolocator = Nominatim(user_agent="multi_city_geo_script")
    #     # Corrected geocoding: Extract city name from directory
    #     city_name_for_geocode = city_name.split('-')[0]  # Get the first part before the first hyphen
    #     location_query = city_name_for_geocode
    #     location = geolocator.geocode(location_query)

    #     if location:
    #         city_bounds = box(location.longitude - 0.1, location.latitude - 0.1,
    #                              location.longitude + 0.1, location.latitude + 0.1)
    #         city_map = geopandas.GeoDataFrame({'geometry': [city_bounds]}, crs="EPSG:4326")

    #         fig, ax = plt.subplots(1, 1, figsize=(12, 12))
    #         city_map.plot(ax=ax, color='lightgray', edgecolor='black')
    #         gdf.plot(ax=ax, column='price', cmap='viridis', markersize=20, alpha=0.6, legend=True)
    #         ax.set_title(f'Airbnb Listings in {city_name} by Price')
    #         ax.set_xlabel('Longitude')
    #         ax.set_ylabel('Latitude')
    #         plt.tight_layout()
    #         save_plot(city_name, "map_price")
    #     else:
    #         print(f"Could not retrieve map data for {city_name}.")
    # except Exception as e:
    #     print(f"Error generating map for {city_name}: {e}")

    # 2. Price Distribution by Neighborhood Group and Room Type
    # ---------------------------------------------------------
    try:

        #plt.figure(figsize=(12, 7))
        df = df_cleaned_boxplot

        plt.figure(figsize=(12, 7))
        # Use a more robust method for outlier handling (e.g., Tukey's fences)
        Q1 = df['price'].quantile(0.25)
        Q3 = df['price'].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df_filtered = df[(df['price'] >= lower_bound) & (df['price'] <= upper_bound)]

        # If the filtered data is too small, use the original data
        if len(df_filtered) < 0.8 * len(df):  # Keep 80% of the data
            df_filtered = df

        sns.boxplot(x='neighbourhood_group', y='price', hue='room_type', data=df_filtered)
        plt.title(f'Price Distribution in {city_name} by Neighborhood Group and Room Type')
        plt.xlabel('Neighborhood Group')
        plt.ylabel('Price')
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        save_plot(city_name, "price_distribution")

        # plt.figure(figsize=(12, 7))
        # # Use a more robust method for outlier handling (e.g., Tukey's fences)
        # Q1 = df['price'].quantile(0.25)
        # Q3 = df['price'].quantile(0.75)
        # IQR = Q3 - Q1
        # lower_bound = Q1 - 1.5 * IQR
        # upper_bound = Q3 + 1.5 * IQR
        # df_filtered = df[(df['price'] >= lower_bound) & (df['price'] <= upper_bound)]

        # # If the filtered data is too small, use the original data
        # if len(df_filtered) < 0.8 * len(df):  # Keep 80% of the data
        #     df_filtered = df
        # sns.boxplot(x='neighbourhood_group', y='price', hue='room_type', data=df_filtered)
        # plt.title(f'Price Distribution in {city_name} by Neighborhood Group and Room Type')
        # plt.xlabel('Neighborhood Group')
        # plt.ylabel('Price')
        # plt.xticks(rotation=45, ha='right')
        # plt.tight_layout()
        # save_plot(city_name, "price_distribution")
    except Exception as e:
        print(f"Error generating price distribution plot for {city_name}: {e}")

    # 3. Average Price per Night by Neighborhood
    # ------------------------------------------
    # try:
    #     average_price_by_neighborhood = df.groupby('neighbourhood')['price'].mean().sort_values(ascending=False)
    #     plt.figure(figsize=(12, 9))
    #     average_price_by_neighborhood.plot(kind='bar')
    #     plt.title(f'Average Price per Night by Neighborhood in {city_name}')
    #     plt.xlabel('Neighborhood')
    #     plt.ylabel('Average Price')
    #     plt.xticks(rotation=75, ha='right')
    #     plt.tight_layout()
    #     save_plot(city_name, "average_price")
    # except Exception as e:
    #     print(f"Error generating average price plot for {city_name}: {e}")

    # 4. Top Hosts by Number of Listings
    # ------------------------------------
    # try:
    #     host_listings_count = df.groupby('host_name')['id'].count().sort_values(ascending=False).head(10)
    #     plt.figure(figsize=(12, 8))
    #     host_listings_count.plot(kind='bar')
    #     plt.title(f'Top 10 Hosts in {city_name} by Number of Listings')
    #     plt.xlabel('Host Name')
    #     plt.ylabel('Number of Listings')
    #     plt.xticks(rotation=45, ha='right')
    #     plt.tight_layout()
    #     save_plot(city_name, "top_hosts")
    # except Exception as e:
    #     print(f"Error generating top hosts plot for {city_name}: {e}")

    # 5. Relationship Between Reviews per Month and Price
    # ---------------------------------------------------
    # try:
    #     plt.figure(figsize=(12, 8))
    #     sns.scatterplot(x='reviews_per_month', y='price', hue='room_type', data=df, alpha=0.6)
    #     plt.title(f'Relationship Between Reviews per Month and Price in {city_name}')
    #     plt.xlabel('Reviews per Month')
    #     plt.ylabel('Price')
    #     plt.tight_layout()
    #     save_plot(city_name, "reviews_vs_price")
    # except Exception as e:
    #     print(f"Error generating reviews vs. price plot for {city_name}: {e}")

    # 6. Distribution of Reviews per Month
    # --------------------------------------
    # try:
    #     plt.figure(figsize=(12, 8))
    #     sns.histplot(df['reviews_per_month'], kde=True)
    #     plt.title(f'Distribution of Reviews per Month in {city_name}')
    #     plt.xlabel('Reviews per Month')
    #     plt.ylabel('Frequency')
    #     plt.tight_layout()
    #     save_plot(city_name, "reviews_distribution")
    # except Exception as e:
    #     print(f"Error generating reviews distribution plot for {city_name}: {e}")

    # 7. Availability Across the Year
    # ---------------------------------
    # try:
    #     plt.figure(figsize=(12, 8))
    #     sns.histplot(df['availability_365'], bins=30, kde=True)
    #     plt.title(f'Distribution of Availability Across the Year in {city_name}')
    #     plt.xlabel('Availability (Days per Year)')
    #     plt.ylabel('Frequency')
    #     plt.tight_layout()
    #     save_plot(city_name, "availability")
    # except Exception as e:
    #     print(f"Error generating availability plot for {city_name}: {e}")

    # 8. Relationship Between Minimum Nights and Price
    # -------------------------------------------------
    # try:
    #     plt.figure(figsize=(12, 8))
    #     sns.scatterplot(x='minimum_nights', y='price', hue='room_type', data=df, alpha=0.6)
    #     plt.title(f'Relationship Between Minimum Nights and Price in {city_name}')
    #     plt.xlabel('Minimum Nights')
    #     plt.ylabel('Price')
    #     plt.tight_layout()
    #     save_plot(city_name, "min_nights_vs_price")
    # except Exception as e:
    #     print(f"Error generating minimum nights vs. price plot for {city_name}: {e}")

    # 9. Count of Listings by License Status
    # ---------------------------------------
    # try:
    #     plt.figure(figsize=(12, 8))
    #     df['license'].value_counts().plot(kind='bar')
    #     plt.title(f'Count of Listings by License Status in {city_name}')
    #     plt.xlabel('License Status')
    #     plt.ylabel('Number of Listings')
    #     plt.xticks(rotation=45, ha='right')
    #     plt.tight_layout()
    #     save_plot(city_name, "license_status")
    # except Exception as e:
    #     print(f"Error generating license status plot for {city_name}: {e}")

# Find all directories within the base data directory
city_directories = [d for d in os.listdir(base_data_dir) if os.path.isdir(os.path.join(base_data_dir, d))]
total_cities = len(city_directories)
start_time = time.time()

# Loop through each city directory with progress tracking
for i, city_dir in enumerate(city_directories):
    listings_file_path = os.path.join(base_data_dir, city_dir, "listings.csv")
    if os.path.exists(listings_file_path):
        try:
            city_df = pd.read_csv(listings_file_path)
            city_name = city_dir  # Use the directory name as the city name

            # Clean the data
            cleaned_df = clean_data(city_df, city_name)
            if cleaned_df is None:
                print(f"Skipping {city_name} due to data cleaning errors.")
                continue  # Skip to the next city

            print(f"\n--- Processing city: {city_name} ({i+1}/{total_cities}) ---")
            visualize_city_data(cleaned_df, city_name) # Use cleaned df

            elapsed_time = time.time() - start_time
            estimated_time_per_city = elapsed_time / (i + 1) if (i + 1) > 0 else 0
            remaining_cities = total_cities - (i + 1)
            estimated_remaining_time = remaining_cities * estimated_time_per_city
            print(f"Elapsed time: {time.strftime('%M:%S', time.gmtime(elapsed_time))}")
            print(f"Estimated remaining time: {time.strftime('%M:%S', time.gmtime(estimated_remaining_time))}")

        except Exception as e:
            print(f"Error reading or processing {listings_file_path}: {e}")
    else:
        print(f"Warning: {listings_file_path} not found.")

print("\n--- Analysis complete for all cities found. ---")



--- Cleaning data for SanDiego-CA-Unitedstates ---
Number of invalid price values in SanDiego-CA-Unitedstates: 1184
Data cleaning for SanDiego-CA-Unitedstates complete.

--- Processing city: SanDiego-CA-Unitedstates (1/34) ---

--- Generating visualizations for SanDiego-CA-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/SanDiego-CA-Unitedstates/SanDiego-CA-Unitedstates_price_distribution_20250504_225654.png
Elapsed time: 00:00
Estimated remaining time: 00:18

--- Cleaning data for SalemOr-OR-Unitedstates ---
Number of invalid price values in SalemOr-OR-Unitedstates: 57
Data cleaning for SalemOr-OR-Unitedstates complete.

--- Processing city: SalemOr-OR-Unitedstates (2/34) ---

--- Generating visualizations for SalemOr-OR-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/SalemOr-OR-Unitedstates/SalemOr-OR-Unitedstates_price_distribution_20250504_225654.png
Elapsed time: 00:00
Estimated remaining time: 00:13

--- Cleaning data for Oakland-CA-Unitedstates ---
Number of invalid price values in Oakland-CA-Unitedstates: 243
Data cleaning for Oakland-CA-Unitedstates complete.

--- Processing city: Oakland-CA-Unitedstates (3/34) ---

--- Generating visualizations for Oakland-CA-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Oakland-CA-Unitedstates/Oakland-CA-Unitedstates_price_distribution_20250504_225654.png
Elapsed time: 00:01
Estimated remaining time: 00:11

--- Cleaning data for Hawaii-HI-Unitedstates ---
Number of invalid price values in Hawaii-HI-Unitedstates: 4581
Data cleaning for Hawaii-HI-Unitedstates complete.

--- Processing city: Hawaii-HI-Unitedstates (4/34) ---

--- Generating visualizations for Hawaii-HI-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are se

Saved plot: report/exports_report_20250504_225653/Hawaii-HI-Unitedstates/Hawaii-HI-Unitedstates_price_distribution_20250504_225655.png
Elapsed time: 00:02
Estimated remaining time: 00:15

--- Cleaning data for BrowardCounty-FL-Unitedstates ---
Number of invalid price values in BrowardCounty-FL-Unitedstates: 1184
Data cleaning for BrowardCounty-FL-Unitedstates complete.

--- Processing city: BrowardCounty-FL-Unitedstates (5/34) ---

--- Generating visualizations for BrowardCounty-FL-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/BrowardCounty-FL-Unitedstates/BrowardCounty-FL-Unitedstates_price_distribution_20250504_225656.png
Elapsed time: 00:02
Estimated remaining time: 00:15

--- Cleaning data for Asheville-NC-Unitedstates ---
Number of invalid price values in Asheville-NC-Unitedstates: 336
Data cleaning for Asheville-NC-Unitedstates complete.

--- Processing city: Asheville-NC-Unitedstates (6/34) ---

--- Generating visualizations for Asheville-NC-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Asheville-NC-Unitedstates/Asheville-NC-Unitedstates_price_distribution_20250504_225656.png
Elapsed time: 00:02
Estimated remaining time: 00:13

--- Cleaning data for FortWorth-TX-Unitedstates ---
Number of invalid price values in FortWorth-TX-Unitedstates: 277
Data cleaning for FortWorth-TX-Unitedstates complete.

--- Processing city: FortWorth-TX-Unitedstates (7/34) ---

--- Generating visualizations for FortWorth-TX-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/FortWorth-TX-Unitedstates/FortWorth-TX-Unitedstates_price_distribution_20250504_225656.png
Elapsed time: 00:03
Estimated remaining time: 00:12

--- Cleaning data for SanFrancisco-CA-Unitedstates ---
Number of invalid price values in SanFrancisco-CA-Unitedstates: 1819
Data cleaning for SanFrancisco-CA-Unitedstates complete.

--- Processing city: SanFrancisco-CA-Unitedstates (8/34) ---

--- Generating visualizations for SanFrancisco-CA-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/SanFrancisco-CA-Unitedstates/SanFrancisco-CA-Unitedstates_price_distribution_20250504_225657.png
Elapsed time: 00:03
Estimated remaining time: 00:12

--- Cleaning data for JerseyCity-NJ-Unitedstates ---
Number of invalid price values in JerseyCity-NJ-Unitedstates: 123
Data cleaning for JerseyCity-NJ-Unitedstates complete.

--- Processing city: JerseyCity-NJ-Unitedstates (9/34) ---

--- Generating visualizations for JerseyCity-NJ-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/JerseyCity-NJ-Unitedstates/JerseyCity-NJ-Unitedstates_price_distribution_20250504_225658.png
Elapsed time: 00:04
Estimated remaining time: 00:12

--- Cleaning data for Columbus-OH-Unitedstates ---
Number of invalid price values in Columbus-OH-Unitedstates: 194
Data cleaning for Columbus-OH-Unitedstates complete.

--- Processing city: Columbus-OH-Unitedstates (10/34) ---

--- Generating visualizations for Columbus-OH-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Columbus-OH-Unitedstates/Columbus-OH-Unitedstates_price_distribution_20250504_225658.png
Elapsed time: 00:04
Estimated remaining time: 00:11

--- Cleaning data for SanMateoCounty-CA-Unitedstates ---
Number of invalid price values in SanMateoCounty-CA-Unitedstates: 544
Data cleaning for SanMateoCounty-CA-Unitedstates complete.

--- Processing city: SanMateoCounty-CA-Unitedstates (11/34) ---

--- Generating visualizations for SanMateoCounty-CA-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/SanMateoCounty-CA-Unitedstates/SanMateoCounty-CA-Unitedstates_price_distribution_20250504_225658.png
Elapsed time: 00:05
Estimated remaining time: 00:10

--- Cleaning data for WashingtonDc-DC-Unitedstates ---
Number of invalid price values in WashingtonDc-DC-Unitedstates: 1488
Data cleaning for WashingtonDc-DC-Unitedstates complete.

--- Processing city: WashingtonDc-DC-Unitedstates (12/34) ---

--- Generating visualizations for WashingtonDc-DC-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/WashingtonDc-DC-Unitedstates/WashingtonDc-DC-Unitedstates_price_distribution_20250504_225659.png
Elapsed time: 00:05
Estimated remaining time: 00:10

--- Cleaning data for Austin-TX-Unitedstates ---
Number of invalid price values in Austin-TX-Unitedstates: 4402
Data cleaning for Austin-TX-Unitedstates complete.

--- Processing city: Austin-TX-Unitedstates (13/34) ---

--- Generating visualizations for Austin-TX-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Austin-TX-Unitedstates/Austin-TX-Unitedstates_price_distribution_20250504_225659.png
Elapsed time: 00:06
Estimated remaining time: 00:09

--- Cleaning data for Portland-OR-Unitedstates ---
Number of invalid price values in Portland-OR-Unitedstates: 665
Data cleaning for Portland-OR-Unitedstates complete.

--- Processing city: Portland-OR-Unitedstates (14/34) ---

--- Generating visualizations for Portland-OR-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Portland-OR-Unitedstates/Portland-OR-Unitedstates_price_distribution_20250504_225700.png
Elapsed time: 00:06
Estimated remaining time: 00:09

--- Cleaning data for Bozeman-MT-Unitedstates ---
Number of invalid price values in Bozeman-MT-Unitedstates: 65
Data cleaning for Bozeman-MT-Unitedstates complete.

--- Processing city: Bozeman-MT-Unitedstates (15/34) ---

--- Generating visualizations for Bozeman-MT-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Bozeman-MT-Unitedstates/Bozeman-MT-Unitedstates_price_distribution_20250504_225700.png
Elapsed time: 00:06
Estimated remaining time: 00:08

--- Cleaning data for Boston-MA-Unitedstates ---
Number of invalid price values in Boston-MA-Unitedstates: 965
Data cleaning for Boston-MA-Unitedstates complete.

--- Processing city: Boston-MA-Unitedstates (16/34) ---

--- Generating visualizations for Boston-MA-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Boston-MA-Unitedstates/Boston-MA-Unitedstates_price_distribution_20250504_225700.png
Elapsed time: 00:07
Estimated remaining time: 00:07

--- Cleaning data for Cambridge-MA-Unitedstates ---
Number of invalid price values in Cambridge-MA-Unitedstates: 342
Data cleaning for Cambridge-MA-Unitedstates complete.

--- Processing city: Cambridge-MA-Unitedstates (17/34) ---

--- Generating visualizations for Cambridge-MA-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Cambridge-MA-Unitedstates/Cambridge-MA-Unitedstates_price_distribution_20250504_225700.png
Elapsed time: 00:07
Estimated remaining time: 00:07

--- Cleaning data for Seattle-WA-Unitedstates ---
Number of invalid price values in Seattle-WA-Unitedstates: 647
Data cleaning for Seattle-WA-Unitedstates complete.

--- Processing city: Seattle-WA-Unitedstates (18/34) ---

--- Generating visualizations for Seattle-WA-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are se

Saved plot: report/exports_report_20250504_225653/Seattle-WA-Unitedstates/Seattle-WA-Unitedstates_price_distribution_20250504_225701.png
Elapsed time: 00:09
Estimated remaining time: 00:08

--- Cleaning data for SantaClaraCounty-CA-Unitedstates ---
Number of invalid price values in SantaClaraCounty-CA-Unitedstates: 1179
Data cleaning for SantaClaraCounty-CA-Unitedstates complete.

--- Processing city: SantaClaraCounty-CA-Unitedstates (19/34) ---

--- Generating visualizations for SantaClaraCounty-CA-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/SantaClaraCounty-CA-Unitedstates/SantaClaraCounty-CA-Unitedstates_price_distribution_20250504_225703.png
Elapsed time: 00:09
Estimated remaining time: 00:07

--- Cleaning data for SantaCruzCounty-CA-Unitedstates ---
Number of invalid price values in SantaCruzCounty-CA-Unitedstates: 132
Data cleaning for SantaCruzCounty-CA-Unitedstates complete.

--- Processing city: SantaCruzCounty-CA-Unitedstates (20/34) ---

--- Generating visualizations for SantaCruzCounty-CA-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/SantaCruzCounty-CA-Unitedstates/SantaCruzCounty-CA-Unitedstates_price_distribution_20250504_225703.png
Elapsed time: 00:10
Estimated remaining time: 00:07

--- Cleaning data for Albany-NY-Unitedstates ---
Number of invalid price values in Albany-NY-Unitedstates: 50
Data cleaning for Albany-NY-Unitedstates complete.

--- Processing city: Albany-NY-Unitedstates (21/34) ---

--- Generating visualizations for Albany-NY-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Albany-NY-Unitedstates/Albany-NY-Unitedstates_price_distribution_20250504_225704.png
Elapsed time: 00:10
Estimated remaining time: 00:06

--- Cleaning data for TwinCitiesMsa-MN-Unitedstates ---
Number of invalid price values in TwinCitiesMsa-MN-Unitedstates: 591
Data cleaning for TwinCitiesMsa-MN-Unitedstates complete.

--- Processing city: TwinCitiesMsa-MN-Unitedstates (22/34) ---

--- Generating visualizations for TwinCitiesMsa-MN-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/TwinCitiesMsa-MN-Unitedstates/TwinCitiesMsa-MN-Unitedstates_price_distribution_20250504_225704.png
Elapsed time: 00:10
Estimated remaining time: 00:05

--- Cleaning data for Denver-CO-Unitedstates ---
Number of invalid price values in Denver-CO-Unitedstates: 573
Data cleaning for Denver-CO-Unitedstates complete.

--- Processing city: Denver-CO-Unitedstates (23/34) ---

--- Generating visualizations for Denver-CO-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Denver-CO-Unitedstates/Denver-CO-Unitedstates_price_distribution_20250504_225705.png
Elapsed time: 00:11
Estimated remaining time: 00:05

--- Cleaning data for Nashville-TN-Unitedstates ---
Number of invalid price values in Nashville-TN-Unitedstates: 3017
Data cleaning for Nashville-TN-Unitedstates complete.

--- Processing city: Nashville-TN-Unitedstates (24/34) ---

--- Generating visualizations for Nashville-TN-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Nashville-TN-Unitedstates/Nashville-TN-Unitedstates_price_distribution_20250504_225705.png
Elapsed time: 00:12
Estimated remaining time: 00:05

--- Cleaning data for Chicago-IL-Unitedstates ---
Number of invalid price values in Chicago-IL-Unitedstates: 1030
Data cleaning for Chicago-IL-Unitedstates complete.

--- Processing city: Chicago-IL-Unitedstates (25/34) ---

--- Generating visualizations for Chicago-IL-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Chicago-IL-Unitedstates/Chicago-IL-Unitedstates_price_distribution_20250504_225706.png
Elapsed time: 00:12
Estimated remaining time: 00:04

--- Cleaning data for RhodeIsland-RI-Unitedstates ---
Number of invalid price values in RhodeIsland-RI-Unitedstates: 726
Data cleaning for RhodeIsland-RI-Unitedstates complete.

--- Processing city: RhodeIsland-RI-Unitedstates (26/34) ---

--- Generating visualizations for RhodeIsland-RI-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are se

Saved plot: report/exports_report_20250504_225653/RhodeIsland-RI-Unitedstates/RhodeIsland-RI-Unitedstates_price_distribution_20250504_225706.png
Elapsed time: 00:13
Estimated remaining time: 00:04

--- Cleaning data for NewOrleans-LA-Unitedstates ---
Number of invalid price values in NewOrleans-LA-Unitedstates: 1202
Data cleaning for NewOrleans-LA-Unitedstates complete.

--- Processing city: NewOrleans-LA-Unitedstates (27/34) ---

--- Generating visualizations for NewOrleans-LA-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/NewOrleans-LA-Unitedstates/NewOrleans-LA-Unitedstates_price_distribution_20250504_225707.png
Elapsed time: 00:13
Estimated remaining time: 00:03

--- Cleaning data for ClarkCountyNv-NV-Unitedstates ---
Number of invalid price values in ClarkCountyNv-NV-Unitedstates: 4954
Data cleaning for ClarkCountyNv-NV-Unitedstates complete.

--- Processing city: ClarkCountyNv-NV-Unitedstates (28/34) ---

--- Generating visualizations for ClarkCountyNv-NV-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/ClarkCountyNv-NV-Unitedstates/ClarkCountyNv-NV-Unitedstates_price_distribution_20250504_225707.png
Elapsed time: 00:14
Estimated remaining time: 00:03

--- Cleaning data for NewYorkCity-NY-Unitedstates ---
Number of invalid price values in NewYorkCity-NY-Unitedstates: 15126


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are se

Data cleaning for NewYorkCity-NY-Unitedstates complete.

--- Processing city: NewYorkCity-NY-Unitedstates (29/34) ---

--- Generating visualizations for NewYorkCity-NY-Unitedstates ---
Saved plot: report/exports_report_20250504_225653/NewYorkCity-NY-Unitedstates/NewYorkCity-NY-Unitedstates_price_distribution_20250504_225708.png
Elapsed time: 00:15
Estimated remaining time: 00:02

--- Cleaning data for LosAngeles-CA-Unitedstates ---
Number of invalid price values in LosAngeles-CA-Unitedstates: 9164
Data cleaning for LosAngeles-CA-Unitedstates complete.

--- Processing city: LosAngeles-CA-Unitedstates (30/34) ---

--- Generating visualizations for LosAngeles-CA-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are se

Saved plot: report/exports_report_20250504_225653/LosAngeles-CA-Unitedstates/LosAngeles-CA-Unitedstates_price_distribution_20250504_225709.png
Elapsed time: 00:16
Estimated remaining time: 00:02

--- Cleaning data for Dallas-TX-Unitedstates ---
Number of invalid price values in Dallas-TX-Unitedstates: 418
Data cleaning for Dallas-TX-Unitedstates complete.

--- Processing city: Dallas-TX-Unitedstates (31/34) ---

--- Generating visualizations for Dallas-TX-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Dallas-TX-Unitedstates/Dallas-TX-Unitedstates_price_distribution_20250504_225710.png
Elapsed time: 00:16
Estimated remaining time: 00:01

--- Cleaning data for PacificGrove-CA-Unitedstates ---
Number of invalid price values in PacificGrove-CA-Unitedstates: 21
Data cleaning for PacificGrove-CA-Unitedstates complete.

--- Processing city: PacificGrove-CA-Unitedstates (32/34) ---

--- Generating visualizations for PacificGrove-CA-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/PacificGrove-CA-Unitedstates/PacificGrove-CA-Unitedstates_price_distribution_20250504_225710.png
Elapsed time: 00:17
Estimated remaining time: 00:01

--- Cleaning data for Newark-NJ-Unitedstates ---
Number of invalid price values in Newark-NJ-Unitedstates: 128
Data cleaning for Newark-NJ-Unitedstates complete.

--- Processing city: Newark-NJ-Unitedstates (33/34) ---

--- Generating visualizations for Newark-NJ-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Newark-NJ-Unitedstates/Newark-NJ-Unitedstates_price_distribution_20250504_225710.png
Elapsed time: 00:17
Estimated remaining time: 00:00

--- Cleaning data for Rochester-NY-Unitedstates ---
Number of invalid price values in Rochester-NY-Unitedstates: 76
Data cleaning for Rochester-NY-Unitedstates complete.

--- Processing city: Rochester-NY-Unitedstates (34/34) ---

--- Generating visualizations for Rochester-NY-Unitedstates ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['neighbourhood_group'].fillna('Unknown', inplace=True)
  df['neighbourhood_group'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['room_type'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will n

Saved plot: report/exports_report_20250504_225653/Rochester-NY-Unitedstates/Rochester-NY-Unitedstates_price_distribution_20250504_225711.png
Elapsed time: 00:17
Estimated remaining time: 00:00

--- Analysis complete for all cities found. ---


# Section 03 - Clean up unused directories

In [5]:

import shutil
import os
from datetime import datetime

# 1. List the directories you want to delete
directories_to_delete = [
    "/content/report/",
]

# 2. Function to delete a directory and its contents
def delete_directory(dir_path):
    """
    Deletes a directory and all its contents.

    Args:
        dir_path (str): The path to the directory to delete.
    """
    try:
        if os.path.exists(dir_path):
            shutil.rmtree(dir_path)
            print(f"Successfully deleted: {dir_path}")
        else:
            print(f"Directory not found: {dir_path}")
    except Exception as e:
        print(f"Error deleting {dir_path}: {e}")

# 3. Delete the directories
for directory in directories_to_delete:
    delete_directory(directory)

# Optional: Function to create report directory (modified to handle existing)
def makeReportDirectory():
    """Creates a report directory with a timestamp, handling existing directories."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    report_directory_name = os.path.join("report", f"exports_report_{timestamp}")
    try:
        os.makedirs(report_directory_name, exist_ok=True)  # Creates if doesn't exist, doesn't error if exists
        print(f"Report directory created/exists: {report_directory_name}")
    except Exception as e:
        print(f"Error creating report directory: {e}")
        # Consider if you want to raise the error or handle it differently
    return report_directory_name

# Example Usage
# If you want to delete the old ones and create a new one
# report_dir = makeReportDirectory() #<---Uncomment this line AFTER you have deleted the old report directories
# print(f"New report directory: {report_dir}")


Successfully deleted: /content/report/
