# Listing Type Exploration

Data exploration of Airbnb for London

In [1]:
# imports
import pandas as pd
import seaborn as sns
import numpy as np
import plotly.express as px

In [2]:
listings_df = pd.read_csv("../data/listings.csv", dtype='unicode')
listings_df.columns

Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'name', 'summary',
       'space', 'description', 'experiences_offered', 'neighborhood_overview',
       'notes', 'transit', 'thumbnail_url', 'medium_url', 'picture_url',
       'xl_picture_url', 'host_id', 'host_url', 'host_name', 'host_since',
       'host_location', 'host_about', 'host_response_time',
       'host_response_rate', 'host_acceptance_rate', 'host_is_superhost',
       'host_thumbnail_url', 'host_picture_url', 'host_neighbourhood',
       'host_listings_count', 'host_total_listings_count',
       'host_verifications', 'host_has_profile_pic', 'host_identity_verified',
       'street', 'neighbourhood', 'neighbourhood_cleansed',
       'neighbourhood_group_cleansed', 'city', 'state', 'zipcode', 'market',
       'smart_location', 'country_code', 'country', 'latitude', 'longitude',
       'is_location_exact', 'property_type', 'room_type', 'accommodates',
       'bathrooms', 'bedrooms', 'beds', 'bed_type', 'amenities', '

In [3]:
print(listings_df['neighbourhood'].unique().shape)
print(listings_df['neighbourhood_cleansed'].unique().shape)
print(listings_df['neighbourhood_group_cleansed'].unique().shape)


(82,)
(87,)
(17,)


In [4]:
listings_df['neighbourhood_group_cleansed'].unique()

array(['Queen Anne', 'Ballard', 'Other neighborhoods', 'Cascade',
       'Central Area', 'University District', 'Downtown', 'Magnolia',
       'West Seattle', 'Interbay', 'Beacon Hill', 'Rainier Valley',
       'Delridge', 'Seward Park', 'Northgate', 'Capitol Hill',
       'Lake City'], dtype=object)

### Clean Price

In [5]:
def clean_string(df, col_name, character_list):
    """ Clean string of defined characters.
    
    Returns: df with column cleaned """
    df[col_name] = df[col_name].astype(str)

    for char in character_list:
        df[col_name] = [x.replace(char,'') for x in df[col_name]]
    
    return df

def col_to_dtype(df, col_name, dtype):
    """Change df col to particular dtype.
    
    Returns: df with column changed"""
    df[col_name] = df[col_name].astype(dtype)

    return df

listings_df = clean_string(listings_df, 'price', ['$',','])
listings_df = col_to_dtype(listings_df, 'price', 'float')

In [22]:
def barplot_template(data, x_label, y_label):
    """Plot template to be called"""
    fig = px.bar(data, template='plotly_dark')

    fig.update_layout(
        autosize=False,
        width=1400,
        height=500,
        yaxis=dict(
            title_text=x_label,
            tickmode="array",
            titlefont=dict(size=14),
        ),
        xaxis=dict(
        title_text=y_label,
        tickmode="array",
        titlefont=dict(size=14),
        ))
        
    fig.show()

    return fig


## Count Listing Types

In [6]:
room_types_df = listings_df[['neighbourhood_group_cleansed', 'room_type']]
onehot_encoding = pd.get_dummies(room_types_df['room_type'])
room_types_encoded = room_types_df.join(onehot_encoding)
room_types_encoded.head()

Unnamed: 0,neighbourhood_group_cleansed,room_type,Entire home/apt,Private room,Shared room
0,Queen Anne,Entire home/apt,1,0,0
1,Queen Anne,Entire home/apt,1,0,0
2,Queen Anne,Entire home/apt,1,0,0
3,Queen Anne,Entire home/apt,1,0,0
4,Queen Anne,Entire home/apt,1,0,0


In [7]:
room_type_bars_df = room_types_encoded.groupby('neighbourhood_group_cleansed').sum()
room_type_bars_df['total_listings'] = room_type_bars_df.sum(axis=1)  #Total sum per row: 
room_type_bars_df = room_type_bars_df.sort_values('total_listings', ascending=False)
room_type_bars_df = room_type_bars_df.drop('total_listings', axis=1)
room_type_bars_df.head()

Unnamed: 0_level_0,Entire home/apt,Private room,Shared room
neighbourhood_group_cleansed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Other neighborhoods,492.0,274.0,28.0
Capitol Hill,384.0,143.0,40.0
Downtown,471.0,42.0,17.0
Central Area,225.0,140.0,4.0
Queen Anne,231.0,62.0,2.0


In [10]:
barplot_template(room_type_bars_df, 'Types of Listings', ' Airbnb Seattle Neighbourhoods')

## Order by Avg Price

In [17]:
avg_neighbourhood_prices = listings_df.groupby('neighbourhood_group_cleansed').mean()
prices_room_df = room_type_bars_df.join(avg_neighbourhood_prices).sort_values("price", ascending=False)
prices_room_df.index = prices_room_df.index.rename('Neighbourhood')
prices_room_df.head()

Unnamed: 0_level_0,Entire home/apt,Private room,Shared room,price
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Magnolia,41.0,19.0,1.0,177.672131
Queen Anne,231.0,62.0,2.0,157.223729
Downtown,471.0,42.0,17.0,154.40566
West Seattle,135.0,67.0,1.0,141.359606
Cascade,62.0,24.0,3.0,140.101124


In [23]:
fig = barplot_template(prices_room_df[['Entire home/apt', 'Private room', 'Shared room']], 'Types of Listings', ' Airbnb Seattle Neighbourhoods')

In [24]:
import chart_studio
import chart_studio.plotly as py
username = 'jhmarlow' # your username
api_key = 'sW1GO8ncX3JlvRlUGc0M' # your api key - go to profile > settings > regenerate key
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

py.plot(fig, filename = 'listing_type_airbnb_seattle', auto_open=True)

'https://plotly.com/~jhmarlow/8/'