# Airbnb Seattle: Looking at Listing Owner Patterns

Take a look at who owns the properties in seattle and how they are rented out

In [1]:
# imports
import pandas as pd
import seaborn as sns
import numpy as np
import plotly.express as px

### Load Data

In [2]:
listings_df = pd.read_csv("../data/listings.csv", dtype='unicode')

print(listings_df['host_id'].unique().shape[0])
print(listings_df.columns)

2751
Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'name', 'summary',
       'space', 'description', 'experiences_offered', 'neighborhood_overview',
       'notes', 'transit', 'thumbnail_url', 'medium_url', 'picture_url',
       'xl_picture_url', 'host_id', 'host_url', 'host_name', 'host_since',
       'host_location', 'host_about', 'host_response_time',
       'host_response_rate', 'host_acceptance_rate', 'host_is_superhost',
       'host_thumbnail_url', 'host_picture_url', 'host_neighbourhood',
       'host_listings_count', 'host_total_listings_count',
       'host_verifications', 'host_has_profile_pic', 'host_identity_verified',
       'street', 'neighbourhood', 'neighbourhood_cleansed',
       'neighbourhood_group_cleansed', 'city', 'state', 'zipcode', 'market',
       'smart_location', 'country_code', 'country', 'latitude', 'longitude',
       'is_location_exact', 'property_type', 'room_type', 'accommodates',
       'bathrooms', 'bedrooms', 'beds', 'bed_type', 'amenitie

In [3]:
listings_df.head(2)

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,...,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month
0,241032,https://www.airbnb.com/rooms/241032,20160104002432,2016-01-04,Stylish Queen Anne Apartment,,Make your self at home in this charming one-be...,Make your self at home in this charming one-be...,none,,...,10,f,,WASHINGTON,f,moderate,f,f,2,4.07
1,953595,https://www.airbnb.com/rooms/953595,20160104002432,2016-01-04,Bright & Airy Queen Anne Apartment,Chemically sensitive? We've removed the irrita...,"Beautiful, hypoallergenic apartment in an extr...",Chemically sensitive? We've removed the irrita...,none,"Queen Anne is a wonderful, truly functional vi...",...,10,f,,WASHINGTON,f,strict,t,t,6,1.48


### Data Wrangling

In [4]:
def check_uniqueness(df, column_name):
    """Check the unique rows."""
    print("Number of rows: " + str(df.shape[0]))
    print("Number of distinct rows: " + str(df[column_name].unique().shape[0]))  # making sure no duplicate listings

def clean_string(df, col_name, character_list):
    """ Clean string of defined characters.
    
    Returns: df with column cleaned """
    df[col_name] = df[col_name].astype(str)

    for char in character_list:
        df[col_name] = [x.replace(char,'') for x in df[col_name]]
    
    return df

def col_to_dtype(df, col_name, dtype):
    """Change df col to particular dtype.
    
    Returns: df with column changed"""
    df[col_name] = df[col_name].astype(dtype)

    return df

listings_df = clean_string(listings_df, 'price', ['$',','])
listings_df = col_to_dtype(listings_df, 'price', 'float')

In [5]:
## Check Unique Lisitngs
check_uniqueness(listings_df, 'id')

Number of rows: 3818
Number of distinct rows: 3818


In [12]:


listings_df = col_to_dtype(listings_df, 'calculated_host_listings_count', 'int')
listings_df = col_to_dtype(listings_df, 'price', 'int')
listings_df = col_to_dtype(listings_df, 'availability_365', 'int')

### Data Analysis

In [13]:
# calc avg
neighbourhood_avgs = listings_df.groupby('neighbourhood_group_cleansed').mean()

# rename columsn for vis
neighbourhood_avgs.rename(columns={
'price':'Avg. Listing Price (£)',
'calculated_host_listings_count': 'Avg. No of Host Owned Listings',
'availability_365': 'Yearly Availability (out 365 days)'},
inplace = True)

# create neighbourhood group
neighbourhood_avgs['neighbourhood_group_cleansed'] = neighbourhood_avgs.index

# round for vis
neighbourhood_avgs = neighbourhood_avgs.round(2)

neighbourhood_avgs.head()

Unnamed: 0_level_0,Avg. Listing Price (£),Yearly Availability (out 365 days),Avg. No of Host Owned Listings,neighbourhood_group_cleansed
neighbourhood_group_cleansed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ballard,124.68,252.67,1.66,Ballard
Beacon Hill,94.67,256.53,2.72,Beacon Hill
Capitol Hill,130.59,233.98,3.65,Capitol Hill
Cascade,140.1,210.3,1.4,Cascade
Central Area,128.34,245.41,1.82,Central Area


### Data Visualisation

In [19]:
def custom_bar(df_series, x_label, y_label):
    """ Custom bar plot wrapper."""
    fig = px.bar(df_series.sort_values(ascending=False), template='plotly_dark')

    fig.update_layout(
        showlegend=False,
        autosize=False,
        width=1200,
        height=500,
        yaxis=dict(
            title_text=x_label,
            tickmode="array",
            titlefont=dict(size=14)),
        xaxis=dict(
        title_text=y_label,
        tickmode="array",
        titlefont=dict(size=14)))

    fig.show()


def custom_scatter(df, x_col, y_col, color_col, text_col, size_col):
    """ Custom scatter plot wrapper."""
    fig = px.scatter(neighbourhood_avgs,
                    x=x_col,
                    y=y_col,
                    color=color_col,
                    text=text_col,
                    size=size_col,
                    size_max=30,
                    template='plotly_dark')

    fig.update_traces(textposition='top center')

    fig.update_layout(
        showlegend=False,
        autosize=False,
        width=1200,
        height=800,
        yaxis=dict(
            title_text=y_col,
            tickmode="array",
            titlefont=dict(size=14)),
        xaxis=dict(
        title_text=x_col,
        tickmode="array",
        titlefont=dict(size=14)))
    
    fig.show()

    return fig

In [20]:
fig = custom_scatter(neighbourhood_avgs, "Avg. No of Host Owned Listings", "Yearly Availability (out 365 days)", "neighbourhood_group_cleansed", "neighbourhood_group_cleansed", 'Avg. Listing Price (£)')

In [16]:
import chart_studio
username = '' # your username
api_key = '' # your api key - go to profile > settings > regenerate key
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

In [21]:
import chart_studio.plotly as py
py.plot(fig, filename = 'host_patterns', auto_open=True)

'https://plotly.com/~jhmarlow/14/'