In [1]:
import os
import datetime as dt

import pandas as pd
import numpy as np

import re

import random

import warnings
warnings.filterwarnings('ignore')

from urllib.request import urlopen
import json

#GeoJson file for heat map
with urlopen('http://data.insideairbnb.com/united-states/ny/new-york-city/2022-06-03/visualisations/neighbourhoods.geojson') as response:
    ny_data = json.load(response)

In [3]:
import matplotlib.pyplot as plt
from jupyter_dash import JupyterDash
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import dash_bootstrap_components as dbc
from dash.exceptions import PreventUpdate

import plotly.graph_objects as go
from plotly.graph_objs.scatter import Line
from plotly.subplots import make_subplots
import plotly.figure_factory as ff

In [4]:
df1 = pd.read_csv('../Data_Files/NYC_Listings_202203.csv')
df1['Quarter']=1
df2 = pd.read_csv('../Data_Files/NYC_Listings_202206.csv')
df2['Quarter']=2
df3 = pd.read_csv('../Data_Files/NYC_Listings_202109.csv')
df3['Quarter']=3
df4 = pd.read_csv('../Data_Files/NYC_Listings_202112.csv')
df4['Quarter']=4
nyc_data=pd.concat([df1, df2, df3, df4])

In [5]:
#Listings of Ny
ny_listings = pd.read_csv("../Data_Files/NewYork_Listings.csv", low_memory=False)

In [6]:
ny_neigh_group = ny_listings[['id','neighbourhood_group','neighbourhood','availability_365','price']]
ny_neigh_group.sort_values(by='id')
ny_neigh_group.rename(columns = {'id':'listing_id'}, inplace = True)

In [7]:
## NY calendar dataframe --  Lists availability on a given date
ny_cal = pd.read_csv("../Data_Files/calendar.csv.gz", compression="gzip", header=0, sep=",")

In [8]:
ny_cal_uniqueID = ny_cal.listing_id.unique()
merged = pd.merge(ny_cal, ny_neigh_group, on='listing_id', how='outer')
merged = merged.dropna()

In [9]:
##This cell takes about 1Min 30 Sec to run at 500k sample
sample10000 = 10000
sample50000 = 50000
sample100k = 100000
sample500k = 500000
'''
Please feel free to use the full merged dataframe if your machine can handle it.
'''


merged['date'] = pd.to_datetime(merged['date'])
merged = merged.sort_values('date', ascending=True)
merged['date'] = merged['date'].dt.strftime('%m-%d-%Y')
merged100k = merged.sample(n=sample100k,random_state=1)
NY_merged = merged

In [10]:
external_stylesheets = [dbc.themes.BOOTSTRAP]
app = JupyterDash(__name__,external_stylesheets=external_stylesheets)

In [11]:
'''
This cell for instructor use only.  If you are unable to find a port:
Please stop all open notebooks.
Restart this notebook and run it.  Ports around 8992 were a safe bet but might not work for others.
If that fails uncomment this box.  It should in theroy "infer" a port.  
'''
# JupyterDash.infer_jupyter_proxy_config()


'\nThis cell for instructor use only.  If you are unable to find a port:\nPlease stop all open notebooks.\nRestart this notebook and run it.  Ports around 8992 were a safe bet but might not work for others.\nIf that fails uncomment this box.  It should in theroy "infer" a port.  \n'

In [12]:
#animated scatter plot outside the dashboard cell
fig_scatter = px.scatter(data_frame=merged100k,
          x='availability_365',
          y='minimum_nights',
          size='price_y',
           animation_group='neighbourhood_group',
          color='neighbourhood_group',
          log_x=False,
          animation_frame='date',
           height=900,
           width=900,
           size_max=90,facet_row='neighbourhood_group')
fig_scatter.for_each_annotation(lambda a: a.update(text=a.text.replace("neighbourhood_group=", "")))
fig_scatter.for_each_annotation(lambda a: a.update(text=a.text.replace("neighbourhood_group=", "")))
fig_scatter=fig_scatter.for_each_trace(lambda t: t.update(name=t.name.replace("neighbourhood_group=", "")))

In [13]:
#The next two cells are for dahsboard graphs
hist_data = [nyc_data['price'].to_numpy()]
price_hist_fig=ff.create_distplot(hist_data,['distplot'])

#log transformation
nyc_data['price_log'] = np.log(nyc_data.price+1)
log_data=[nyc_data['price_log'].to_numpy()]
price_log=ff.create_distplot(log_data,['Log'])

In [14]:

ny_listings["price"] = ny_listings["price"].astype(float)

In [16]:
app.layout = html.Div([
    html.H1('Welcome To Airbnb Explorers Dashboard'),
    html.H6('**This dashboard is for academic purposes only.  Actual prices may vary'),
    html.H6('**Please check Airbnb for actual prices.'),
    html.Br(),
    dcc.Markdown('''
    #### Intro
                                                                  
    Despite concerns of an ongoing global pandemic, increase in international turmoil, and the uncertainty of the\n
    US economy, leisure traveling is expected to reach new highs [(Mckinsey)](https://www.mckinsey.com/industries/travel-logistics-and-infrastructure/our-insights/us-summer-travel-2022) As the interest in traveling increases, individuals.\n 
    are faced with the question of booking a hotel or renting an Airbnb. Despite where you may stand on this debate,\n
    it is hard to ignore the success Airbnb has had since the demise of traveling at the start of the Covid-19 pandemic.\n
    In fact, Airbnb posted revenue of $1.5 billion in Q1 of 2022, and average daily rates continue to rise [(CNN)](https://www.cnn.com/2022/05/03/tech/airbnb-first-quarter-earnings/index.f).\n
    '''),
    html.Br(),
    dcc.Markdown('''
    To match this demand in traveling, more & more hosts are buying out property for the sole purpose of short term\n
    rentals. New York City alone saw the number of available short term rentals increase to 13,000 [(Bloomberg)](https://www.bloomberg.com/news/articles/2022-05-18/airbnbs-outnumber-new-york-city-apartments-in-hot-housing-market). Using\n
    data available from [Inside Airbnb](http://insideairbnb.com/get-the-data/) we will analyze New York City’s Airbnb listings data to build a predictive model\n
    on future rental prices. The different regression models that will be used include linear, lasso, ridge, and random\n
    forest. The analysis and model will be incorporated into a final dashboard that a host can use to learn about their\n
    desired market.
    '''),
    html.Br(),
    html.H3('Average cost of an Airbnb rental in 2022'),
    html.Br(),
    dcc.Graph(id='ny_heat_map'),
    dcc.Slider(
    ny_listings['price'].min(),
    ny_listings['price'].max(),
    step=None,
    value=ny_listings['price'].min(),
    marks={str(price):str(price) for price in ny_listings['price']},
    id='heat_slider'),
    
    dcc.Dropdown(id='neighbourhood_group',style={"width": "75%"},
                options=[{'label': neighbourhood_group, 'value': neighbourhood_group}
                        for neighbourhood_group in merged['neighbourhood_group'].unique()]),
    html.Br(),
    html.Div(id='report'),
    html.Br(),
    
    dcc.Tabs([
        dcc.Tab(label='NY Price Distrobution', children=[
            dbc.Row([
        dbc.Col([
            dcc.Graph(id='price_distro_hist',figure=price_hist_fig)
        ]),
        dbc.Col([
            dcc.Graph(id='price_log_hist',figure=price_log)
        ]),
    ]),
        ]),
        dcc.Tab(label='EDA',children=[
            
        ]),
    ]),
        
    
    
    dcc.Graph(id='animated_scatter',
             figure=fig_scatter),
    html.Br(),
    html.Br(),
    
])

@app.callback(Output('report', 'children'),
             Input('neighbourhood_group','value'))
def display_neighbourhood_group(neighbourhood_group):
    if neighbourhood_group is None:
        return ''
    group_filt = merged100k.groupby('neighbourhood_group', as_index=False)['price_y'].mean()
    filtered = group_filt[group_filt['neighbourhood_group']==neighbourhood_group]
    price = filtered.loc[:, 'price_y'].values[0]
    return [html.H3(neighbourhood_group),
           f'The average sampled price for one night stay in {neighbourhood_group} is ${price:,.0f}.']

@app.callback(
Output('ny_heat_map','figure'),
Input('heat_slider','value'))

def update_figure(selected_price):
    filtered_df = ny_listings[ny_listings.price == selected_price]
    

    heat = px.choropleth_mapbox(filtered_df,
                                geojson=ny_data,
                                color='price',
                                locations='neighbourhood',
                                featureidkey="properties.neighbourhood",
                                center={"lat":40.75356, "lon":-73.98559},
                                mapbox_style="carto-positron",
                                zoom=9.2)
    heat.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    return heat

###Put link to google doc down here
##Maybe link through api and/or download dataset

In [17]:
'''
Please make sure all Dash ports are shutdown before tyring to run.
'''
app.run_server(mode='external',port=8993)

Dash app running on http://127.0.0.1:8993/


In [None]:
amenities_df = pd.DataFrame(amenities_counter.most_common(50), columns =['Amenities', 'Count'])
amenities_df