<b>Notebook for exploring tourism intensity and other spatial attributes based on Inside Airbnb listing data </b>

In [None]:
import pandas as pd
import datetime
import geopandas as gpd
import seaborn as sns
import glob as glob
import matplotlib.pyplot as plt
from pylab import savefig
import matplotlib.ticker as mtick
from scripts.data_wrangling import listing2gdf, census2gdf,CalculateTouristIntensity,aggregate

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
#listing data urls
apr2019 = "http://data.insideairbnb.com/the-netherlands/north-holland/amsterdam/2019-04-08/data/listings.csv.gz"
apr2018 = "http://data.insideairbnb.com/the-netherlands/north-holland/amsterdam/2018-04-07/data/listings.csv.gz"
apr2017 = "http://data.insideairbnb.com/the-netherlands/north-holland/amsterdam/2017-04-02/data/listings.csv.gz"
apr2016 = "http://data.insideairbnb.com/the-netherlands/north-holland/amsterdam/2016-04-04/data/listings.csv.gz"
apr2015 = "http://data.insideairbnb.com/the-netherlands/north-holland/amsterdam/2015-04-05/data/listings.csv.gz"

listings_2015_2019 = [apr2015,apr2019,apr2018,apr2017,apr2016]

In [None]:
#retrieve and process that listing data
for url in listings_2015_2019:
    
    date = url.split('/')[-3]
    year = date[0:4]
    
    print("Processing: \n" + url)
    
    #load airbnb data and seperate features
    airbnb_gdf = listing2gdf(url)
    room = airbnb_gdf.loc[airbnb_gdf['room_type'].isin(['Private room','Shared room'])]
    entire_home = airbnb_gdf.loc[airbnb_gdf['room_type']=='Entire home/apt']
    superhost = airbnb_gdf.loc[airbnb_gdf['host_is_superhost']=='t']

    #loading Amsterdam census data
    nbh_gdf = census2gdf("./data/amsterdam_neighbourhoods.geojson")
    
    #aggregate airbnb features and census data
    nbh_gdf = aggregate(airbnb_gdf,nbh_gdf,room,entire_home,superhost)
    
    #calculate tourist intensity
    nbh_gdf = CalculateTouristIntensity(nbh_gdf,year)
    nbh_gdf['date'] = date
    
    #export listing and airbnb data as geojson files
    nbh_gdf.to_file("./output/AirbnbPoly_{}.geojson".format(date.replace('-','')), driver="GeoJSON",encoding='utf-8')
    airbnb_gdf.to_file("./output/AirbnbPoints_{}.geojson".format(date.replace('-','')),driver="GeoJSON",encoding='utf-8')
    
    print("Done!")

In [None]:
#concatenate geojson from different years
data_poly = []
for poly in glob.glob("./output/AirbnbPoly_*"):
    gdf = gpd.read_file(poly, driver='GeoJSON')
    pdf = pd.DataFrame(gdf)
    data_poly.append(pdf)
    concat = pd.concat(data_poly, axis=0)
    combined_gdf = gpd.GeoDataFrame(concat)
combined_gdf.to_file("./output/AirbnbPoly_2015to2019.geojson", driver="GeoJSON",encoding='utf-8')

data_point = []
for poly in glob.glob("./output/AirbnbPoints_*"):
    gdf = gpd.read_file(poly, driver='GeoJSON')
    pdf = pd.DataFrame(gdf)
    data_point.append(pdf)
    concat = pd.concat(data_point, axis=0)
    combined_gdf = gpd.GeoDataFrame(concat)
combined_gdf.to_file("./output/AirbnbPoints_2015to2019.geojson", driver="GeoJSON",encoding='utf-8')

<b>Plots </b>

In [None]:
#Top 10 Airbnb Neighbourhoods in Amsterdam April 2015-2019
airbnb = gpd.read_file("./output/AirbnbPoly_2015to2019.geojson",driver='GeoJSON')
for date in airbnb['date'].unique():
    listing = airbnb[airbnb['date'].str.contains(date)]
    listing = listing.sort_values(by='Airbnb_ListingCount',ascending=False)[:10]
    
    sns.set(style="whitegrid")
    fig, ax = plt.subplots()
    ax.axes.set_title("Top 10 Airbnb Neighourhoods in \nAmsterdam {}".format(date),fontsize=13,fontweight="bold")
    sns.barplot(x='Airbnb_ListingCount', y='Buurt', data=listing,label="Entire Lodge", color="#f49191")
    sns.barplot(x='Airbnb_RoomRentalCount', y='Buurt',data= listing,label="Room", color="#F7F48B")
    ax.set_xlabel('Airbnb Listing')
    ax.set_ylabel('Neighourhood')

    ax.legend(ncol=1, loc="lower right", frameon=True)
    ax.set(xlim=(0, 400))
    sns.despine(left=True, bottom=False)
    #plt.savefig("./images/top10airbnb_april{}.png".format(date[0:4]),bbox_inches = "tight",dpi=300) 

In [None]:
for date in airbnb['date'].unique():
    listing = airbnb[airbnb['date'].str.contains(date)]
    listing = listing.sort_values(by='Airbnb_AvgPrice',ascending=False)[:10]
    
    sns.set(style="whitegrid")
    fig, ax = plt.subplots()
    ax.axes.set_title("Top 10 Expensive Airbnb Neighourhoods in \nAmsterdam {}".format(date),fontsize=13,fontweight="bold")
    sns.barplot(x='Airbnb_AvgPrice', y='Buurt', data=listing,label="Entire Lodge", color="#f49191")
    ax.set_xlabel('Airbnb Average Price in U.S. ')
    ax.set_ylabel('Neighourhood')
    
    fmt = '${x:,.0f}'
    tick = mtick.StrMethodFormatter(fmt)
    ax.xaxis.set_major_formatter(tick) 

    ax.legend(ncol=1, loc="lower right", frameon=True)
    ax.set(xlim=(0, 400))
    sns.despine(left=True, bottom=False)
    #plt.savefig("./images/top10airbnb_april{}.png".format(date[0:4]),bbox_inches = "tight",dpi=300) 

In [None]:
listing.head()