### AirBnB in New York City

In [1]:
import pandas as pd
import numpy as np
import panel as pn
import os
pn.extension('tabulator')

import hvplot.pandas

In [2]:
airbnb = pd.read_csv("AB_NYC_2019.csv")
airbnb.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,,,1,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,2018-11-19,0.1,1,0


In [3]:
airbnb.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48895 entries, 0 to 48894
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              48895 non-null  int64  
 1   name                            48879 non-null  object 
 2   host_id                         48895 non-null  int64  
 3   host_name                       48874 non-null  object 
 4   neighbourhood_group             48895 non-null  object 
 5   neighbourhood                   48895 non-null  object 
 6   latitude                        48895 non-null  float64
 7   longitude                       48895 non-null  float64
 8   room_type                       48895 non-null  object 
 9   price                           48895 non-null  int64  
 10  minimum_nights                  48895 non-null  int64  
 11  number_of_reviews               48895 non-null  int64  
 12  last_review                     

In [4]:
#checking null values
airbnb.isnull().sum()

id                                    0
name                                 16
host_id                               0
host_name                            21
neighbourhood_group                   0
neighbourhood                         0
latitude                              0
longitude                             0
room_type                             0
price                                 0
minimum_nights                        0
number_of_reviews                     0
last_review                       10052
reviews_per_month                 10052
calculated_host_listings_count        0
availability_365                      0
dtype: int64

### Data Preprocessing🧹

In [5]:
# Dropping any rows with missing values in the name or host_name column
airbnb.dropna(subset=['name', 'host_name'], inplace=True)

# Converting the last_review column to datetime format
airbnb['last_review'] = pd.to_datetime(airbnb['last_review'])
# Impute missing values in the last_review column with the median value
airbnb['last_review'].fillna(airbnb['last_review'].median(), inplace=True)

# Filling in missing values in the reviews_per_month column with the mean value of the column
mean_reviews_per_month = airbnb['reviews_per_month'].mean()
airbnb['reviews_per_month'].fillna(mean_reviews_per_month, inplace=True)

# Renaming reviews_per_month to a more intuitive name: avg_reviews_per_month
airbnb.rename(columns={'reviews_per_month': 'avg_reviews_per_month'}, inplace=True)

# Checking for and removing any duplicate rows
airbnb.drop_duplicates(inplace=True)

# Removing any outliers in the price and minimum_nights columns
airbnb = airbnb[(airbnb['price'] >= 10) & (airbnb['price'] <= 1000)]
airbnb = airbnb[(airbnb['minimum_nights'] >= 1) & (airbnb['minimum_nights'] <= 60)]

# Removing any irrelevant columns
#airbnb.drop(['id', 'host_id'], axis=1, inplace=True)

In [6]:
#checking null values
def check_for_nulls(df):
    if df.isnull().values.any():
        print("There are null values in the DataFrame.")
    else:
        print("All clean ⭐!")
        return df.isnull().sum()

In [7]:
check_for_nulls(airbnb)

All clean ⭐!


id                                0
name                              0
host_id                           0
host_name                         0
neighbourhood_group               0
neighbourhood                     0
latitude                          0
longitude                         0
room_type                         0
price                             0
minimum_nights                    0
number_of_reviews                 0
last_review                       0
avg_reviews_per_month             0
calculated_host_listings_count    0
availability_365                  0
dtype: int64

In [8]:
### Define the widgets
min_price_slider = pn.widgets.FloatSlider(name="Minimum price (USD)", start=0, end=1000, step=10, value=0)
city_dropdown = pn.widgets.Select(name='City',  options=['All'] + list(airbnb['neighbourhood_group'].unique()))
room_type_dropdown = pn.widgets.Select(name="Room Type", options=['All'] + list(airbnb['room_type'].unique()))

### Exploratory data analysis

#### Defining the widgets

In [9]:
min_price_slider = pn.widgets.FloatSlider(name="Minimum price (USD)", start=0, end=800, step=10, value=0)
min_price_slider

In [10]:
city_dropdown = pn.widgets.Select(name='City',  options=['All'] + list(airbnb['neighbourhood_group'].unique()))
city_dropdown

In [11]:
room_type_dropdown = pn.widgets.Select(name="Room Type", options=['All'] + list(airbnb['room_type'].unique()))
room_type_dropdown

### 1. Top 10 neighborhoods 💹

In [12]:
def create_neighborhood_bar( min_price, city, room_type):
    # Apply the filters
    filtered_df = airbnb[airbnb["price"] >= min_price]

    if city != "All":
        filtered_df = filtered_df[filtered_df["neighbourhood_group"] == city]

    if room_type != "All":
        filtered_df = filtered_df[filtered_df["room_type"] == room_type]

    # Count the number of listings in each neighbourhood
    counts = filtered_df.groupby("neighbourhood")["id"].count().nlargest(10)

    # Create the bar chart
    bar_chart = counts.hvplot.bar(rot=90, xlabel='Neighbourhood', ylabel='Count', 
                                 width=600, height=400, title='Top 10 Neighbourhoods')

    return bar_chart
#Create the interactive plot
neighborhood_bar = pn.bind(create_neighborhood_bar,min_price=min_price_slider,city=city_dropdown, room_type=room_type_dropdown)

#Create the interactive plot without displaying filters
#neighborhood_bar = create_neighborhood_bar(min_price_slider.value, city_dropdown.value, room_type_dropdown.value)

pn.Row(neighborhood_bar)

### 2. Geographic Distribution

In [13]:
def create_geo_scatter(min_price, city, room_type):
    # Apply the filters
    filtered_df = airbnb[airbnb["price"] >= min_price]

    if city != "All":
        filtered_df = filtered_df[filtered_df["neighbourhood_group"] == city]

    if room_type != "All":
        filtered_df = filtered_df[filtered_df["room_type"] == room_type]
        
    geo_scatter = filtered_df.hvplot.scatter(x='longitude', y='latitude', 
                                      c='price', colorbar=True, title='Geographic Distribution')
    return geo_scatter

# Create the interactive plot
geo_scatter = pn.bind(create_geo_scatter, min_price=min_price_slider, city=city_dropdown, room_type=room_type_dropdown)

#Create the interactive plot without displaying filters
#geo_scatter = create_geo_scatter(min_price_slider.value, city_dropdown.value, room_type_dropdown.value)

pn.Row(geo_scatter)

### 3.  Price vs Minimum Nights 🌃

In [14]:
def create_neighborhood_scatter(min_price, city, room_type):
    # Apply the filters
    filtered_df = airbnb[airbnb["price"] >= min_price]

    if city != "All":
        filtered_df = filtered_df[filtered_df["neighbourhood_group"] == city]

    if room_type != "All":
        filtered_df = filtered_df[filtered_df["room_type"] == room_type]

    scatter = filtered_df.hvplot.scatter(x="minimum_nights", y="price", by="neighbourhood_group", c="room_type", cmap="Set1", title="Price vs Minimum Nights")
    return scatter

# Create the interactive plot
neighborhood_scatter = pn.bind(create_neighborhood_scatter, min_price=min_price_slider, city=city_dropdown, room_type=room_type_dropdown)

#Create the interactive plot without displaying filters
#neighborhood_scatter = create_neighborhood_scatter(min_price_slider.value, city_dropdown.value, room_type_dropdown.value)
# Display the plot
pn.Row(neighborhood_scatter)

### 4.  Neighborhood pricing💵
A box plot visualizing the distribution of prices across different neighborhoods. Identify any price differences between neighborhoods.

In [15]:
# Define the function to create the box plot
def create_neighborhood_box(min_price, city, room_type):
    # Apply the filters
    filtered_df = airbnb[airbnb["price"] >= min_price]

    if city != "All":
        filtered_df = filtered_df[filtered_df["neighbourhood_group"] == city]

    if room_type != "All":
        filtered_df = filtered_df[filtered_df["room_type"] == room_type]
        
    neighborhood_box = filtered_df.hvplot.box(y='price', by='neighbourhood_group', 
                                       rot=90, height=500, title='Neighborhood Pricing')
    return neighborhood_box

#Create the interactive plot
neighborhood_box = pn.bind(create_neighborhood_box, min_price=min_price_slider,city=city_dropdown, room_type=room_type_dropdown)

#Create the interactive plot without displaying filters
#neighborhood_box = create_neighborhood_box(min_price_slider.value, city_dropdown.value, room_type_dropdown.value)

# Show the plot
#neighborhood_box.show()
pn.Row(neighborhood_box)

### Panel Dashboard | Airbnb

In [None]:
# Layout using FastListTemplate on panel
pn_logo     = '<a href="https://panel.pyviz.org"><img src="http://panel.pyviz.org/_static/logo_stacked.png" width=140></a>'
template = pn.template.FastListTemplate(
    title='Airbnb in New York City',
    sidebar=[
        pn.pane.Markdown("## About this Dashboard"),
        pn.pane.Markdown("#### The data includes information on Airbnb listings in New York City, such as the price, location, and type of room. Use the dropdown menus and slider to filter the data and visualize the top 10 neighbourhoods in different parts of the city, the relationship between price and minimum nights required for different neighborhoods, Neighbourhood pricing and much more. Have fun!"),
        pn.pane.JPG('nyc.jpg', sizing_mode='scale_both'),
        pn.pane.Markdown("## Filters"),
        min_price_slider,
        city_dropdown,
        room_type_dropdown
    ],
    main=[
        pn.Row(neighborhood_bar, geo_scatter, margin=(0, 20)),
        pn.Row(neighborhood_scatter, neighborhood_box, margin=(0, 20))
    ],
    accent_base_color="#88d8b0",
    header_background="#0474fb",
)
#template.show()
template.servable()