## What locations appear to be most effected by event attendance?

In [1]:
from datetime import timedelta, datetime
from math import sqrt

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from scripts.utility import *
from scripts.dates import (
    bumbershoot_dates,
    siff_dates,
    hempfest_dates,
    pride_dates,
    summer_dates,
    all_events,
)

# set data paths
preproc_path = "data/intermediate/preproc/"
nhood_path = "data/intermediate/nhood/"

%load_ext autoreload
%autoreload 2
%load_ext nb_black

<IPython.core.display.Javascript object>

## Load data and stuff

In [2]:
# load data
calendar_df = pd.read_csv(preproc_path + "calendar.csv")
reviews_df = pd.read_csv(preproc_path + "reviews.csv")
location_df = pd.read_csv(preproc_path + "location.csv")

  interactivity=interactivity, compiler=compiler, result=result)


<IPython.core.display.Javascript object>

In [3]:
# print(location_df.shape)
# print(location_df.head())
# location_df.groupby("neighborhood").count().sort_values("id", ascending=False)["id"]

<IPython.core.display.Javascript object>

## Find stats on top neighborhoods

In [4]:
# remove vague "Other" option and choose only the top neighborhoods
all_neighborhoods = location_df.groupby("neighborhood").count().index.to_list()
all_neighborhoods.remove("Other neighborhoods")
top_neighborhoods = all_neighborhoods[0:10]
print(top_neighborhoods)

top_neighborhoods_df = location_df[location_df["neighborhood"].isin(top_neighborhoods)]

['Ballard', 'Beacon Hill', 'Capitol Hill', 'Cascade', 'Central Area', 'Delridge', 'Downtown', 'Interbay', 'Lake City', 'Magnolia']


<IPython.core.display.Javascript object>

In [5]:
top_neighborhoods_df = location_df[location_df["neighborhood"] != "Other neighborhoods"]

<IPython.core.display.Javascript object>

In [6]:
# get some basic stats about the neighborhoods
nhood_stats_df = (
    top_neighborhoods_df[["neighborhood", "price", "number_of_reviews"]]
    .groupby("neighborhood")
    .agg(
        {
            "neighborhood": ["count"],
            "price": ["mean", "std"],
            "number_of_reviews": ["mean"],
        }
    )
    .sort_values(("neighborhood", "count"), ascending=False)
)
nhood_stats_df

Unnamed: 0_level_0,neighborhood,price,price,number_of_reviews
Unnamed: 0_level_1,count,mean,std,mean
neighborhood,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Downtown,1252,161.127796,85.587449,37.518371
Capitol Hill,733,167.248295,87.180351,58.904502
Central Area,609,164.35468,87.941437,72.45977
Queen Anne,500,162.16,82.504523,57.496
West Seattle,415,169.298795,87.743931,64.496386
Rainier Valley,386,175.663212,88.738902,61.611399
Ballard,381,160.047244,77.159458,75.84252
Cascade,337,153.780415,83.792256,26.804154
Beacon Hill,270,161.374074,74.580293,70.92963
University District,243,176.320988,90.189904,25.617284


<IPython.core.display.Javascript object>

## Calculate change in prices due to various events (by neighborhood)

In [7]:
calc_pct_change = (
    lambda x: 0 if x.iloc[0] == x.iloc[-1] else (x.iloc[-1] - x.iloc[0]) / x.iloc[0]
)
for event_name, event_dates in all_events.items():

    week_start = event_dates["week_start"] - 2
    week_end = event_dates["week_end"]
    event_slice_df = time_slice(
        top_neighborhoods_df.set_index("week"), week_start, week_end
    )

    price_byneigh_df = (
        event_slice_df[
            event_slice_df["price"].notnull() & event_slice_df["price"] > 0.0
        ]
        .groupby(["week", "neighborhood"])
        .agg(event_mean_price=("price", "mean"), count=("listing_id", "count"))
        .reset_index()
    )

    price_byneigh_final_df = (
        price_byneigh_df[
            price_byneigh_df["event_mean_price"].notnull()
            & price_byneigh_df["event_mean_price"]
            > 0.0
        ]
        .groupby(["neighborhood"])
        .agg(
            pct_change_price=("event_mean_price", calc_pct_change),
            event_mean_price=("event_mean_price", "mean"),
        )
        .reset_index()
    )

    print(event_name)
    #     print(price_byneigh_final_df)

    price_byneigh_final_df.to_csv(nhood_path + event_name + "_pricechange.csv")

bumbershoot
siff
hempfest
pride


<IPython.core.display.Javascript object>

## Compare event occupancy to Summer average (by neighborhood)

In [10]:
# get limited dataframe
occp_perc_df = top_neighborhoods_df[["available", "week", "neighborhood", "listing_id"]]

# Get count of available statuses per nhood per week
occp_perc_byneigh_df = (
    occp_perc_df.groupby(["week", "available", "neighborhood"])
    .agg(count=("listing_id", "count"))
    .reset_index()
)

# get sum of available statuses per nhood per week
occp_perc_byneigh_final_df = (
    occp_perc_byneigh_df.groupby(["neighborhood", "week"])
    .agg({"count": "sum"})
    .reset_index()
)

# calculate occupancy rate per nhood per week
calc_occp = lambda x: x[x["available"] == "f"]["count"].sum() / x["count"].sum()
occp_perc_byneigh_final_df["occp_perc"] = (
    occp_perc_byneigh_df.groupby(["neighborhood", "week"])
    .apply(calc_occp)
    .reset_index()
)[0]

occp_perc_byneigh_final_df

Unnamed: 0,neighborhood,week,count,occp_perc
0,Ballard,1,5,0.600000
1,Ballard,2,7,0.857143
2,Ballard,3,15,0.533333
3,Ballard,4,6,0.500000
4,Ballard,5,5,0.400000
...,...,...,...,...
779,West Seattle,49,6,0.833333
780,West Seattle,50,3,0.666667
781,West Seattle,51,8,0.375000
782,West Seattle,52,5,0.400000


<IPython.core.display.Javascript object>

In [12]:
# calculate difference b/w event and summer availability stats
calc_pct_change = (
    lambda x: 0 if x.iloc[0] == 0 else (x.iloc[-1] - x.iloc[0]) / x.iloc[0]
)
for event_name, event_dates in all_events.items():
    print(event_name)
    event_slice_df = time_slice(
        occp_perc_byneigh_final_df.set_index("week"),
        event_dates["week_start"] - 2,
        event_dates["week_end"],
    )
    event_occp_df = (
        event_slice_df.groupby(["neighborhood"])
        .agg(
            count=("count", "mean"),
            occp_perc=("occp_perc", "mean"),
            pct_change_occp=("occp_perc", calc_pct_change),
        )
        .reset_index()
    )

    event_occp_df.to_csv(nhood_path + event_name + "_occpchange.csv")

bumbershoot
siff
hempfest
pride


<IPython.core.display.Javascript object>