In [1]:
import geopandas as gpd

In [2]:
neighbourhoods = gpd.read_file('../data/neighbourhoods.geojson')

In [3]:
import lux
import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport

import json
import re
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

import plotly.express as px
import plotly.graph_objects as go

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

### Configuring Matplotlib styles

In [4]:
plt.style.use('seaborn')

mpl.rcParams['figure.dpi'] = 200
mpl.rcParams['figure.figsize'] = [15, 10]
mpl.rcParams['axes.titlesize'] = 24
mpl.rcParams['axes.labelsize'] = 20
mpl.rcParams['lines.linewidth'] = 3
mpl.rcParams['lines.markersize'] = 10
mpl.rcParams['xtick.labelsize'] = 16
mpl.rcParams['ytick.labelsize'] = 16

## Load listings data

In [5]:
listings = pd.read_csv('../data/listings-detailed.csv')

In [6]:
listings.columns

Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'name', 'description',
       'neighborhood_overview', 'picture_url', 'host_id', 'host_url',
       'host_name', 'host_since', 'host_location', 'host_about',
       'host_response_time', 'host_response_rate', 'host_acceptance_rate',
       'host_is_superhost', 'host_thumbnail_url', 'host_picture_url',
       'host_neighbourhood', 'host_listings_count',
       'host_total_listings_count', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'neighbourhood_cleansed', 'neighbourhood_group_cleansed', 'latitude',
       'longitude', 'property_type', 'room_type', 'accommodates', 'bathrooms',
       'bathrooms_text', 'bedrooms', 'beds', 'amenities', 'price',
       'minimum_nights', 'maximum_nights', 'minimum_minimum_nights',
       'maximum_minimum_nights', 'minimum_maximum_nights',
       'maximum_maximum_nights', 'minimum_nights_avg_ntm',
       'maximum_nights_avg_ntm', 'calendar_upd

In [7]:
# Convert dates and datetimes to pandas.DateTime
listings['scrape_id'] = pd.to_datetime(listings['scrape_id'], format='%Y%m%d%H%M%S')
listings['last_scraped'] = pd.to_datetime(listings['last_scraped'], format='%Y-%m-%d')
listings['host_since'] = pd.to_datetime(listings['host_since'], format='%Y-%m-%d')
listings['calendar_last_scraped'] = pd.to_datetime(listings['calendar_last_scraped'], format='%Y-%m-%d')
listings['first_review'] = pd.to_datetime(listings['first_review'], format='%Y-%m-%d')
listings['last_review'] = pd.to_datetime(listings['last_review'], format='%Y-%m-%d')

In [8]:
# Format price correctly
listings['price'] = listings['price'].apply(lambda p: float(p[1:].replace(',','')))

In [18]:
neighbourhoods.head()

Unnamed: 0,neighbourhood,neighbourhood_group,geometry
0,Treycovagnes,Jura-Nord vaudois,"MULTIPOLYGON (((6.60774 46.76022, 6.60252 46.7..."
1,Lausanne,Lausanne,"MULTIPOLYGON (((6.64199 46.50532, 6.64187 46.5..."
2,Villars-Epeney,Jura-Nord vaudois,"MULTIPOLYGON (((6.70499 46.78969, 6.70488 46.7..."
3,Vinzel,Nyon,"MULTIPOLYGON (((6.28615 46.44040, 6.28305 46.4..."
4,Denens,Morges,"MULTIPOLYGON (((6.45446 46.50829, 6.45367 46.5..."


In [None]:
# profile = ProfileReport(listings, title="Listings", explorative=True)

In [None]:
# profile

In [None]:
# profile.to_file("listings-profile.html")

In [None]:
fig, ax = plt.subplots()

sns.scatterplot(data=listings[listings['number_of_reviews'] <= 1], x="beds", y="price", ax=ax)
sns.scatterplot(data=listings[listings['number_of_reviews'] > 1], x="beds", y="price", ax=ax)

ax.set(xlim=(0, 20), ylim=(0, 5000))

plt.show()

In [None]:
@interact(text=listings.description)
def wordcloudgenerator(text):
    wc = WordCloud(background_color="white").generate(text)

    plt.imshow(wc, interpolation="bilinear")
    plt.axis('off')
    plt.show()

In [None]:
f, axes = plt.subplots(2, 3)

cleanr = re.compile('<.*?>')

wc1 = WordCloud(background_color="white").generate(re.sub(cleanr, '', " ".join(list(listings[listings.price < 50].description.apply(lambda x: str(x))))))
wc2 = WordCloud(background_color="white").generate(re.sub(cleanr, '', " ".join(list(listings[(listings.price < 100) & (listings.price > 50)].description.apply(lambda x: str(x))))))
wc3 = WordCloud(background_color="white").generate(re.sub(cleanr, '', " ".join(list(listings[(listings.price < 200) & (listings.price > 100)].description.apply(lambda x: str(x))))))
wc4 = WordCloud(background_colszor="white").generate(re.sub(cleanr, '', " ".join(list(listings[(listings.price < 300) & (listings.price > 200)].description.apply(lambda x: str(x))))))
wc5 = WordCloud(background_color="white").generate(re.sub(cleanr, '', " ".join(list(listings[(listings.price < 500) & (listings.price > 300)].description.apply(lambda x: str(x))))))
wc6 = WordCloud(background_color="white").generate(re.sub(cleanr, '', " ".join(list(listings[listings.price > 500].description.apply(lambda x: str(x))))))

plt.subplot(231), plt.imshow(wc1, interpolation="bilinear"), plt.axis('off')
plt.subplot(232), plt.imshow(wc2, interpolation="bilinear"), plt.axis('off')
plt.subplot(233), plt.imshow(wc3, interpolation="bilinear"), plt.axis('off')
plt.subplot(234), plt.imshow(wc4, interpolation="bilinear"), plt.axis('off')
plt.subplot(235), plt.imshow(wc5, interpolation="bilinear"), plt.axis('off')
plt.subplot(236), plt.imshow(wc6, interpolation="bilinear"), plt.axis('off')

plt.show()

## Load reviews data

In [None]:
reviews = pd.read_csv('../data/reviews-detailed.csv')

In [None]:
reviews['date'] = pd.to_datetime(reviews['date'], format='%Y-%m-%d')

In [None]:
reviews

## Load calendar data


In [None]:
calendar = pd.read_csv('../data/calendar.csv')

In [None]:
calendar['date'] = pd.to_datetime(calendar['date'], format='%Y-%m-%d')

In [None]:
calendar

## Load neighbourhood data

In [None]:
neighbourhoods = pd.read_csv('../data/neighbourhoods.csv')

In [None]:
neighbourhoods['neighbourhood_group'].value_counts()