In [1]:
import os
import datetime as dt

import pandas as pd
import numpy as np

import re

import random

import warnings
warnings.filterwarnings('ignore')


In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
from jupyter_dash import JupyterDash
from dash import html, dcc
from dash.dependencies import Output, Input
import dash_bootstrap_components as dbc
from dash.exceptions import PreventUpdate

import plotly.express as px
# import plotly.graph_objs as go
import plotly.graph_objects as go
from plotly.graph_objs.scatter import Line
from plotly.subplots import make_subplots
sns.set()

In [3]:
%autosave 60
%run Amenities.ipynb

Autosaving every 60 seconds
Median:  150.0   Mean:  197.32


In [4]:
#Listings of Ny
ny_listings = pd.read_csv("../Data_Files/NewYork_Listings.csv", low_memory=False)
ny_listings.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,license
0,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75356,-73.98559,Entire home/apt,225,30,48,2019-11-04,0.31,3,308,0,
1,5121,BlissArtsSpace!,7356,Garon,Brooklyn,Bedford-Stuyvesant,40.68535,-73.95512,Private room,60,30,50,2019-12-02,0.32,2,365,0,
2,5136,"Spacious Brooklyn Duplex, Patio + Garden",7378,Rebecca,Brooklyn,Sunset Park,40.66265,-73.99454,Entire home/apt,275,21,2,2021-08-08,0.02,1,250,1,
3,5178,Large Furnished Room Near B'way,8967,Shunichi,Manhattan,Midtown,40.76457,-73.98317,Private room,68,2,536,2022-05-09,3.37,1,172,62,
4,5203,Cozy Clean Guest Room - Family Apt,7490,MaryEllen,Manhattan,Upper West Side,40.8038,-73.96751,Private room,75,2,118,2017-07-21,0.76,1,0,0,


In [5]:
ny_neigh_group = ny_listings[['id','neighbourhood_group','neighbourhood','availability_365','price']]
ny_neigh_group.sort_values(by='id')
ny_neigh_group.rename(columns = {'id':'listing_id'}, inplace = True)

In [6]:
'''
Neighborhoods of NY.  There are 5 NY neighbourhood groupings and several neighbourhoods per grouping.
'''
ny_nbh = pd.read_csv("../Data_Files/neighbourhoods.csv", low_memory=False)

ny_nbh.neighbourhood_group.unique()

array(['Bronx', 'Brooklyn', 'Manhattan', 'Queens', 'Staten Island'],
      dtype=object)

In [7]:
## NY calendar dataframe --  Lists availability on a given date
ny_cal = pd.read_csv("../Data_Files/calendar.csv.gz", compression="gzip", header=0, sep=",")

In [8]:
ny_cal_uniqueID = ny_cal.listing_id.unique()
merged = pd.merge(ny_cal, ny_neigh_group, on='listing_id', how='outer')
merged = merged.dropna()

In [9]:
##This cell takes about 1Min 30 Sec to run at 500k sample
sample10000 = 10000
sample50000 = 50000
sample100k = 100000
sample500k = 500000
'''
Please feel free to use the full merged dataframe if your machine can handle it.
'''


merged['date'] = pd.to_datetime(merged['date'])
merged = merged.sort_values('date', ascending=True)
merged['date'] = merged['date'].dt.strftime('%m-%d-%Y')
merged100k = merged.sample(n=sample100k,random_state=1)
NY_merged = merged

In [10]:
external_stylesheets = [dbc.themes.BOOTSTRAP]
app = JupyterDash(__name__,external_stylesheets=external_stylesheets)

In [11]:
# JupyterDash.infer_jupyter_proxy_config()

In [12]:
fig_scatter = px.scatter(data_frame=merged100k,
          x='availability_365',
          y='minimum_nights',
          size='price_y',
           animation_group='neighbourhood_group',
          color='neighbourhood_group',
          log_x=False,
          animation_frame='date',
           height=900,
           width=900,
           size_max=90,facet_row='neighbourhood_group')
fig_scatter.for_each_annotation(lambda a: a.update(text=a.text.replace("neighbourhood_group=", "")))
fig_scatter.for_each_annotation(lambda a: a.update(text=a.text.replace("neighbourhood_group=", "")))
fig_scatter=fig_scatter.for_each_trace(lambda t: t.update(name=t.name.replace("neighbourhood_group=", "")))

In [15]:
app.layout = html.Div([
    html.H1('Welcome To Airbnb Explorers Dashboard'),
    html.H6('**This dashboard is for academic purposes only.  Actual prices may vary'),
    html.H6('**Please check Airbnb for actual prices.'),
    html.Br(),
    dcc.Markdown('''
    #### Intro
                                                                  
    Despite concerns of an ongoing global pandemic, increase in international turmoil, and the uncertainty of the\n
    US economy, leisure traveling is expected to reach new highs [(Mckinsey)](https://www.mckinsey.com/industries/travel-logistics-and-infrastructure/our-insights/us-summer-travel-2022) As the interest in traveling increases, individuals.\n 
    are faced with the question of booking a hotel or renting an Airbnb. Despite where you may stand on this debate,\n
    it is hard to ignore the success Airbnb has had since the demise of traveling at the start of the Covid-19 pandemic.\n
    In fact, Airbnb posted revenue of $1.5 billion in Q1 of 2022, and average daily rates continue to rise [(CNN)](https://www.cnn.com/2022/05/03/tech/airbnb-first-quarter-earnings/index.f).\n
    '''),
    html.Br(),
    dcc.Markdown('''
    To match this demand in traveling, more & more hosts are buying out property for the sole purpose of short term\n
    rentals. New York City alone saw the number of available short term rentals increase to 13,000 [(Bloomberg)](https://www.bloomberg.com/news/articles/2022-05-18/airbnbs-outnumber-new-york-city-apartments-in-hot-housing-market). Using\n
    data available from [Inside Airbnb](http://insideairbnb.com/get-the-data/) we will analyze New York City’s Airbnb listings data to build a predictive model\n
    on future rental prices. The different regression models that will be used include linear, lasso, ridge, and random\n
    forest. The analysis and model will be incorporated into a final dashboard that a host can use to learn about their\n
    desired market.
    '''),
    html.Br(),
    html.H3('Average cost of an Airbnb rental in 2022')
    
    dcc.Dropdown(id='neighbourhood_group',style={"width": "75%"},
                options=[{'label': neighbourhood_group, 'value': neighbourhood_group}
                        for neighbourhood_group in merged['neighbourhood_group'].unique()]),
    
    
    html.Br(),
    html.Div(id='report'),
    html.Br(),
    dcc.Graph(id='animated_scatter',
             figure=fig_scatter),
    html.Br(),
    html.Br(),
    
])

@app.callback(Output('report', 'children'),
             Input('neighbourhood_group','value'))
def display_neighbourhood_group(neighbourhood_group):
    if neighbourhood_group is None:
        return ''
    group_filt = merged100k.groupby('neighbourhood_group', as_index=False)['price_y'].mean()
    filtered = group_filt[group_filt['neighbourhood_group']==neighbourhood_group]
    price = filtered.loc[:, 'price_y'].values[0]
    return [html.H3(neighbourhood_group),
           f'The average sampled price for one night stay in {neighbourhood_group} is ${price:,.0f}.']


In [14]:
'''
Please make sure all Dash ports are shutdown before tyring to run.
'''
app.run_server(mode='external',port=8992)

Dash app running on http://127.0.0.1:8992/


In [7]:
amenities_df = pd.DataFrame(amenities_counter.most_common(50), columns =['Amenities', 'Count'])
amenities_df

Unnamed: 0,Amenities,Count
0,Wifi,3364
1,Smoke alarm,3337
2,Long term stays allowed,3276
3,Carbon monoxide alarm,3126
4,Kitchen,3104
5,Essentials,3099
6,Hangers,3031
7,Hair dryer,2913
8,TV,2898
9,Iron,2880
