In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

df = pd.read_csv('data/calendar.csv')
df.available = df.available.apply(lambda x: 1 if x=='t' else 0)
df.drop(columns=['adjusted_price'], inplace=True)
df.date = pd.to_datetime(df.date, format='%Y-%m-%d')
df['price'] = df['price'].str[1:]  # remove $-sign
df['price'] = df['price'].str.replace(',', '').astype(float)  # remove commas
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3210272 entries, 0 to 3210271
Data columns (total 6 columns):
 #   Column          Dtype         
---  ------          -----         
 0   listing_id      int64         
 1   date            datetime64[ns]
 2   available       int64         
 3   price           float64       
 4   minimum_nights  float64       
 5   maximum_nights  float64       
dtypes: datetime64[ns](1), float64(3), int64(2)
memory usage: 147.0 MB


In [2]:
# Remove outliers
df = df[df.price < 5000]
df = df[df.price > 500]
df.describe()

Unnamed: 0,listing_id,available,price,minimum_nights,maximum_nights
count,2501513.0,2501513.0,2501513.0,2501466.0,2501466.0
mean,22975750.0,0.1678352,1027.432,4.362093,701.8385
std,12275950.0,0.37372,542.2242,15.53693,533.3273
min,42932.0,0.0,501.0,1.0,1.0
25%,13329600.0,0.0,700.0,2.0,30.0
50%,21588290.0,0.0,860.0,2.0,1125.0
75%,34243110.0,0.0,1170.0,4.0,1125.0
max,44415920.0,1.0,4900.0,900.0,5000.0


In [29]:
df_price = df.groupby('date')['price'].mean().reset_index()

fig = px.line(df_price, x='date', y="price", color_discrete_sequence=['#f9585d'], labels={"price": "Price (NOK per night)", "date": "Month"})
fig.update_layout(height=400, width=900, title={
        'text': "Price by date 2020-2021",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_yaxes(range=[950, 1060])
fig.show()

In [20]:
df_available = df.groupby('date')['available'].sum().reset_index()
df_available['available'] = df_available['available'].apply(lambda x: x/df['listing_id'].nunique())

fig = px.line(df_available, x='date', y="available", color_discrete_sequence=['#f9585d'], labels={"available": "Available listings (%)", "date": "Month"})
fig.update_layout(height=400, width=900, title={
        'text': "Availability by date 2020-2021",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [8]:
df_last_year = pd.read_csv('data/calendar_2019.csv')
df_last_year.available = df_last_year.available.apply(lambda x: 1 if x=='t' else 0)
df_last_year.drop(columns=['adjusted_price'], inplace=True)
df_last_year.date = pd.to_datetime(df_last_year.date, format='%Y-%m-%d')
df_last_year['price'] = df_last_year['price'].str[1:]  # remove $-sign
df_last_year['price'] = df_last_year['price'].str.replace(',', '').astype(float)  # remove commas
df_last_year = df_last_year[df_last_year.price < 5000]
df_last_year = df_last_year[df_last_year.price > 500]
df_last_year.head()

Unnamed: 0,listing_id,date,available,price,minimum_nights,maximum_nights
0,970425,2019-07-26,0,1212.0,3,14
1,970425,2019-07-27,0,1212.0,3,14
2,970425,2019-07-28,0,1212.0,3,14
3,970425,2019-07-29,0,1212.0,3,14
4,970425,2019-07-30,0,1212.0,3,14


In [26]:
df_last_year_price = df_last_year.groupby('date')['price'].mean().reset_index()

fig = px.line(df_last_year_price, x='date', y="price", color_discrete_sequence=['#f9585d'], labels={"price": "Price (NOK per night)", "date": "Month"})
fig.update_layout(height=400, width=900, title={
        'text': "Price by date 2019-2020",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_yaxes(range=[950, 1060])
fig.show()

In [24]:
df_last_year_available = df_last_year.groupby('date')['available'].sum().reset_index()
df_last_year_available['available'] = df_last_year_available['available'].apply(lambda x: x/df['listing_id'].nunique())

fig = px.line(df_last_year_available, x='date', y="available", color_discrete_sequence=['#f9585d'], labels={"available": "Available listings (%)", "date": "Month"})
fig.update_layout(height=600, width=900, title={
        'text': "Availability by date 2019-2020",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()