# Neighbourhood Prices

In [5]:
import pandas as pd
import math


In [2]:
raw_df = pd.read_csv("../data/listings.csv")

raw_df.shape
raw_df.columns

Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'name', 'summary',
       'space', 'description', 'experiences_offered', 'neighborhood_overview',
       'notes', 'transit', 'thumbnail_url', 'medium_url', 'picture_url',
       'xl_picture_url', 'host_id', 'host_url', 'host_name', 'host_since',
       'host_location', 'host_about', 'host_response_time',
       'host_response_rate', 'host_acceptance_rate', 'host_is_superhost',
       'host_thumbnail_url', 'host_picture_url', 'host_neighbourhood',
       'host_listings_count', 'host_total_listings_count',
       'host_verifications', 'host_has_profile_pic', 'host_identity_verified',
       'street', 'neighbourhood', 'neighbourhood_cleansed',
       'neighbourhood_group_cleansed', 'city', 'state', 'zipcode', 'market',
       'smart_location', 'country_code', 'country', 'latitude', 'longitude',
       'is_location_exact', 'property_type', 'room_type', 'accommodates',
       'bathrooms', 'bedrooms', 'beds', 'bed_type', 'amenities', '

In [3]:
def clean_string(df, col_name, character_list):
    """ Clean string of defined characters.
    
    Returns: df with column cleaned """
    df[col_name] = df[col_name].astype(str)

    for char in character_list:
        df[col_name] = [x.replace(char,'') for x in df[col_name]]
    
    return df

def col_to_dtype(df, col_name, dtype):
    """Change df col to particular dtype.
    
    Returns: df with column changed"""
    df[col_name] = df[col_name].astype(dtype)

    return df

raw_df = clean_string(raw_df, 'price', ['$',','])
raw_df = col_to_dtype(raw_df, 'price', 'float')

In [11]:
def haversine_dist(n_lat, n_long):
    """Calculate distance to Seattle centre.

    https://www.kite.com/python/answers/how-to-find-the-distance-between-two-lat-long-coordinates-in-python
    
    Returns (float):  distance between coords and center
    """

    s_lat = 47.608013
    s_lon = -122.335167
    R = 6373.0  #radius of the Earth

    # coordinates
    lat1 = math.radians(s_lat)
    lon1 = math.radians(s_lon)
    lat2 = math.radians(n_lat)
    lon2 = math.radians(n_long)

    # change in coordinates
    dlon = lon2 - lon1 
    dlat = lat2 - lat1

    # Haversine formula
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    distance = R * c

    return distance

In [7]:
hist_data = []
group_labels = []

for name in raw_df['neighbourhood_group_cleansed'].unique():
    hist_data.append(raw_df[raw_df['neighbourhood_group_cleansed']==name]['price'])
    group_labels.append(name)

In [8]:
import plotly.figure_factory as ff
import numpy as np

# Create distplot with custom bin_size
fig = ff.create_distplot(hist_data, group_labels, bin_size=20, show_hist=False)
fig.update_layout(
    template='plotly_dark',
    autosize=False,
    width=800,
    height=800,
    xaxis=dict(
    title_text="Listing Prices ($)",
    tickmode="array",
    titlefont=dict(size=14),
    ))
fig.show()

In [41]:
df = raw_df[['neighbourhood_group_cleansed', 'latitude', 'longitude', 'price']]
bubble_df = df.groupby(df['neighbourhood_group_cleansed']).mean()
bubble_df['No. of Listings'] = df.groupby(df['neighbourhood_group_cleansed']).count()['price']
bubble_df['Neighbourhood'] = bubble_df.index
bubble_df = bubble_df.rename(columns={'price': "Avg. Price ($)"})


In [45]:
dists = []
for i in bubble_df.iterrows():
    dists.append(haversine_dist(i[1].latitude, i[1].longitude))

bubble_df['Avg. Distance to Seattle Centre'] = dists
bubble_df = bubble_df.round(2)
bubble_df.head()

Unnamed: 0_level_0,latitude,longitude,Avg. Price ($),No. of Listings,Neighbourhood,Distance to Seattle Centre,Avg. Distance to Seattle Centre
neighbourhood_group_cleansed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Ballard,47.67,-122.38,124.68,230,Ballard,8.13,7.67
Beacon Hill,47.57,-122.31,94.67,118,Beacon Hill,4.77,4.63
Capitol Hill,47.62,-122.32,130.59,567,Capitol Hill,2.23,1.75
Cascade,47.63,-122.33,140.1,89,Cascade,2.88,2.48
Central Area,47.61,-122.3,128.34,369,Central Area,2.51,2.65


In [46]:
import plotly.express as px

fig = px.scatter(bubble_df, x="Avg. Distance to Seattle Centre", y="Avg. Price ($)",
	         size="No. of Listings", color="Neighbourhood", text="Neighbourhood",
                 hover_name="Neighbourhood", size_max=40)

fig.update_layout(
    template='plotly_dark',
    showlegend=False,
    autosize=False,
    width=1000,
    height=500,
    yaxis=dict(
        title_text="Price ($)",
        tickmode="array",
        titlefont=dict(size=14),
    ),
    xaxis=dict(
    title_text="Distance to City Centre (km)",
    tickmode="array",
    titlefont=dict(size=14),
    ))

fig.update_traces(textposition='top center')
fig.show()

In [47]:
import chart_studio
import chart_studio.plotly as py
username = 'jhmarlow' # your username
api_key = 'sW1GO8ncX3JlvRlUGc0M' # your api key - go to profile > settings > regenerate key
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

py.plot(fig, filename = 'neighbourhood_dist_price_seattle_aribnb', auto_open=True)

'https://plotly.com/~jhmarlow/10/'