# Airbnb Seattle: Monthly Neighbourhood Prices Exploration

Explore how neighbourhood prices change over the year.

In [3]:
import pandas as pd
import plotly.express as px
import datetime
import plotly.graph_objects as go

### Load Data

In [5]:
calendar_df = pd.read_csv("../data/calendar.csv")
listing_df = pd.read_csv("../data/listings.csv")

In [7]:
listing_df.head(2)

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,...,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month
0,241032,https://www.airbnb.com/rooms/241032,20160104002432,2016-01-04,Stylish Queen Anne Apartment,,Make your self at home in this charming one-be...,Make your self at home in this charming one-be...,none,,...,10.0,f,,WASHINGTON,f,moderate,f,f,2,4.07
1,953595,https://www.airbnb.com/rooms/953595,20160104002432,2016-01-04,Bright & Airy Queen Anne Apartment,Chemically sensitive? We've removed the irrita...,"Beautiful, hypoallergenic apartment in an extr...",Chemically sensitive? We've removed the irrita...,none,"Queen Anne is a wonderful, truly functional vi...",...,10.0,f,,WASHINGTON,f,strict,t,t,6,1.48


In [9]:
calendar_df.head(2)

Unnamed: 0,listing_id,date,available,price
0,241032,2016-01-04,t,$85.00
1,241032,2016-01-05,t,$85.00


### Data Wrangling: Join Calendar and Neighbourhood dfs on listing id

In [11]:
listing_neighbourhood_df = listing_df[['id','neighbourhood_group_cleansed']]
listing_neighbourhood_df = listing_neighbourhood_df.rename(columns={"id": "listing_id"})
merged_df = pd.merge(calendar_df, listing_neighbourhood_df, on='listing_id', how='outer')
merged_df['date'] = pd.to_datetime(merged_df['date'], format="%Y-%m-%d")
merged_df.head(2)

Unnamed: 0,listing_id,date,available,price,neighbourhood_group_cleansed
0,241032,2016-01-04,t,$85.00,Queen Anne
1,241032,2016-01-05,t,$85.00,Queen Anne


### Data Wrangling: Clean Price

In [13]:
def clean_string(df, col_name, character_list):
    """ Clean string of defined characters.
    
    Returns: df with column cleaned """
    df[col_name] = df[col_name].astype(str)

    for char in character_list:
        df[col_name] = [x.replace(char,'') for x in df[col_name]]
    
    return df

def col_to_dtype(df, col_name, dtype):
    """Change df col to particular dtype.
    
    Returns: df with column changed"""
    df[col_name] = df[col_name].astype(dtype)

    return df

merged_df = clean_string(merged_df, 'price', ['$',','])
merged_df = col_to_dtype(merged_df, 'price', 'float')

merged_df.head()

Unnamed: 0,listing_id,date,available,price,neighbourhood_group_cleansed
0,241032,2016-01-04,t,85.0,Queen Anne
1,241032,2016-01-05,t,85.0,Queen Anne
2,241032,2016-01-06,f,,Queen Anne
3,241032,2016-01-07,f,,Queen Anne
4,241032,2016-01-08,f,,Queen Anne


### Data Analysis: Group By Neighbourhood

In [None]:

calendar_month_avg = merged_df.groupby([merged_df['date'].dt.strftime('%B'), merged_df['neighbourhood_group_cleansed']]).mean()
calendar_month_avg.head()

In [16]:
# unstack multi index for bar chart plot 
grouped_price_df = calendar_month_avg['price']
unstacked_df = grouped_price_df.unstack(level=1)
unstacked_df.head()

neighbourhood_group_cleansed,Ballard,Beacon Hill,Capitol Hill,Cascade,Central Area,Delridge,Downtown,Interbay,Lake City,Magnolia,Northgate,Other neighborhoods,Queen Anne,Rainier Valley,Seward Park,University District,West Seattle
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
April,120.99935,99.030576,134.181818,157.135593,128.884648,89.799889,182.2288,116.575646,92.973451,204.425329,91.769274,121.214977,170.429821,100.901802,105.656733,106.58308,145.114359
August,139.124224,103.394035,142.804368,167.7523,143.500132,96.316934,225.402173,125.2,101.069565,201.063165,100.167991,135.258759,194.843181,107.465484,121.463768,111.939155,159.073464
December,129.631683,101.333211,133.784395,142.488285,138.567857,92.66002,176.166682,114.5,90.80176,202.312173,89.121781,129.43885,172.255945,98.605489,119.830688,104.452389,147.817291
February,110.033227,96.401518,125.38133,144.141864,122.093812,81.613109,156.857117,112.874126,92.184322,185.428373,88.00578,113.688757,153.775233,89.511774,97.792434,96.716808,136.937485
January,108.986406,97.777725,123.823421,139.289689,120.513238,78.06,153.544756,118.723636,88.437179,187.909692,87.301223,110.440733,156.448199,90.09151,100.407538,94.948899,140.519956


In [18]:
# Create month column
unstacked_df['month'] = unstacked_df.index.values
# Create month number column, to order by
unstacked_df['month_num'] = [datetime.datetime.strptime(x, "%B").month for x in unstacked_df['month']]

In [20]:
monthly_neigh_prices =unstacked_df.sort_values('month_num')  # sort x axis by month
# remove columns not being visualised
monthly_neigh_prices = monthly_neigh_prices.drop(['month_num','month'], axis=1) 
monthly_neigh_prices = monthly_neigh_prices.round(2)  # round for visualisation
monthly_neigh_prices.head()

neighbourhood_group_cleansed,Ballard,Beacon Hill,Capitol Hill,Cascade,Central Area,Delridge,Downtown,Interbay,Lake City,Magnolia,Northgate,Other neighborhoods,Queen Anne,Rainier Valley,Seward Park,University District,West Seattle
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
January,108.99,97.78,123.82,139.29,120.51,78.06,153.54,118.72,88.44,187.91,87.3,110.44,156.45,90.09,100.41,94.95,140.52
February,110.03,96.4,125.38,144.14,122.09,81.61,156.86,112.87,92.18,185.43,88.01,113.69,153.78,89.51,97.79,96.72,136.94
March,113.24,94.02,129.38,146.52,123.15,87.96,173.55,119.14,91.23,190.36,89.28,114.39,163.66,93.05,97.59,98.78,137.89
April,121.0,99.03,134.18,157.14,128.88,89.8,182.23,116.58,92.97,204.43,91.77,121.21,170.43,100.9,105.66,106.58,145.11
May,122.75,102.74,135.51,168.93,134.31,91.48,196.8,116.68,96.04,200.87,96.05,124.45,177.21,103.64,106.02,108.03,146.61


### Data Visualisation

In [22]:
# Create random data with numpy
fig = go.Figure()

# Add traces
for neigh in monthly_neigh_prices.columns:
    fig.add_trace(go.Scatter(x=monthly_neigh_prices.index, y=monthly_neigh_prices[neigh],
                        mode='lines+markers',
                        name=neigh))

fig.update_layout(
    template='plotly_dark',
    autosize=False,
    width=600,
    height=800,
    yaxis=dict(
        title_text="Average Listing Price ($)",
        tickmode="array",
        titlefont=dict(size=14),
    ),
    xaxis=dict(
    title_text="Airbnb eattle Neighbourhood",
    tickmode="array",
    titlefont=dict(size=14),
    ))
    
fig.show()