In [1]:
import pandas as pd
import altair as alt
import requests
from random import sample

In [2]:
import gzip

In [9]:
for i in ['125','225','324','424'][:-1]:
    df = pd.read_csv(f'London_Airbnb_Q{i}.csv.gz', compression='gzip')
    df.to_csv(f'London_Airbnb_Q{i}.csv.bz', compression='bz2')

In [46]:
# Import pound currency formatting
gb_format = 'https://raw.githubusercontent.com/d3/d3-format/refs/heads/main/locale/en-GB.json'

response = requests.get(gb_format)
gb_format = response.json()

alt.renderers.set_embed_options(formatLocale=gb_format)

RendererRegistry.enable('default')

In [None]:
from ecostyles import EcoStyles
# Create styles instance
styles = EcoStyles()
# Register and enable a theme
styles.register_and_enable_theme(theme_name="article")  # or "article"

In [3]:
# Import data set
Q324 = pd.read_csv('/Users/sambickel-barlow/Desktop/Github/RADataHub/ChartOfTheDay/airbnb/London_Airbnb_Q324.csv')
Q424 = pd.read_csv('/Users/sambickel-barlow/Desktop/Github/RADataHub/ChartOfTheDay/airbnb/London_Airbnb_Q324.csv')
Q125 = pd.read_csv('/Users/sambickel-barlow/Desktop/Github/RADataHub/ChartOfTheDay/airbnb/London_Airbnb_Q125.csv')
Q225 = pd.read_csv('/Users/sambickel-barlow/Desktop/Github/RADataHub/ChartOfTheDay/airbnb/London_Airbnb_Q225.csv')

In [None]:
# Add quarter column
Q324['quarter'] = 'Q324'
Q424['quarter'] = 'Q424'
Q125['quarter'] = 'Q125'
Q225['quarter'] = 'Q225'

In [None]:
# Merge all quarterly data
allQs = pd.concat([Q324, Q424, Q125, Q225], axis=0)

In [None]:
# Subset data to only one bedroom full home/apartment listings
allQs_eh = allQs[allQs['room_type'] == 'Entire home/apt']
allQs_1b = allQs_eh[allQs_eh['bedrooms'] == 1]

In [None]:
# Clean price column and filter out missing prices
allQs_1b['price'] = allQs_1b['price'].str.replace('$','').str.replace(',','').astype('float')
allQs_1b = allQs_1b[~allQs_1b['price'].isna()]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  allQs_1b['price'] = allQs_1b['price'].str.replace('$','').str.replace(',','').astype('float')


In [None]:
# Calculate median price by quarter and by neighbourhood
median_1b_xnq = pd.DataFrame(allQs_1b.groupby(['neighbourhood_cleansed', 'quarter'])['price'].median()).reset_index().sort_values(by='neighbourhood_cleansed')

In [None]:
# Reformat from long to wide
median_1b_price = median_1b_xnq.pivot(columns='quarter',index='neighbourhood_cleansed', values='price').reset_index()

In [None]:
# Calculate % change in price between Q3 2024 and Q2 2025
median_1b_price['delta'] = (median_1b_price['Q225'] - median_1b_price['Q324']) / median_1b_price['Q324']

In [None]:
# Add column for red/green colour assignment based on positive or negative price change
median_1b_price['posneg'] = median_1b_price['delta'].apply(lambda x: 'green' if x > 0 else 'red')

In [None]:
# Create chart
chart = alt.Chart(median_1b_price).encode(
    x=alt.X('delta', scale=alt.Scale(domain=[-.15, .3]), axis=alt.Axis(format='.0%', title='Source: Inside Airbnb', titleX=50)),
    y=alt.Y('neighbourhood_cleansed', sort='-x'),
    color=alt.Color('posneg', legend=None),
    tooltip=['neighbourhood_cleansed',alt.Tooltip('Q324', title='Q3 2024 Airbnbs', format='$,.2f'), alt.Tooltip('Q225', title='Q2 2025 Airbnbs', format='$,.2f'), alt.Tooltip('delta', title='Percent change', format='.1%')]



).properties(
    title={
      "text": ["One-Bedroom price on Airbnb by London neighbourhood"], 
      "subtitle": ["Percent change between Q3 2024 and Q2 2025"]
    }, height=400, width=350).mark_bar()

chart

In [50]:
# Save to png
chart.save('Airbnb: 1 bed price change by neighbourhood.png', scale_factor=2)
# Save to json
chart.save('Airbnb: 1 bed price change by neighbourhood.json', scale_factor=2)

In [59]:
Q225_1b = allQs_1b[allQs_1b['quarter'] == 'Q225']

In [125]:
import numpy as np

In [198]:
host_listing_price_n = Q225_1b.groupby('neighbourhood_cleansed').agg(
    Total_Listings=('id', 'count'),
    Distinct_Hosts=('host_id', 'nunique'),
    Avg_Price=('price', 'mean')
).reset_index()


In [200]:
host_listing_price_n['Listings_per_host'] = host_listing_price_n['Total_Listings'] / host_listing_price_n['Distinct_Hosts']

In [227]:
hostn_gb = Q225_1b.groupby(['neighbourhood_cleansed','host_id'])['id'].count().reset_index()
host_gb = Q225_1b.groupby(['neighbourhood_cleansed'])['id'].count().reset_index()
hostn_gb_merged = hostn_gb.merge(host_gb, on='neighbourhood_cleansed')
hostn_gb_merged['hhi'] = (hostn_gb_merged['id_x'] / hostn_gb_merged['id_y'])**2
hhi_calc = hostn_gb_merged.groupby('neighbourhood_cleansed')['hhi'].sum().reset_index()

In [229]:
host_listing_price_n = host_listing_price_n.merge(hhi_calc, on='neighbourhood_cleansed')

In [230]:
host_listing_price_n.sort_values(by='hhi')

Unnamed: 0,neighbourhood_cleansed,Total_Listings,Distinct_Hosts,Avg_Price,Listings_per_host,hhi
11,Hackney,1053,845,144.8585,1.246154,0.001949
31,Wandsworth,713,586,172.13324,1.216724,0.002591
29,Tower Hamlets,1294,868,163.035549,1.490783,0.002721
18,Islington,1056,710,172.827652,1.487324,0.002983
21,Lambeth,742,586,138.270889,1.266212,0.003113
27,Southwark,833,658,156.528211,1.265957,0.003152
12,Hammersmith and Fulham,890,588,150.611236,1.513605,0.00375
5,Camden,1503,829,209.622089,1.813028,0.004046
19,Kensington and Chelsea,1760,910,216.303977,1.934066,0.004215
22,Lewisham,412,338,108.23301,1.218935,0.004277


In [255]:
# Assume host_listing_price_n is your DataFrame
# Example: label these specific neighborhoods

neighborhoods_to_label = ['City of London', 'Brent', 'Westminster', 'Camden', 'Bexley', 'Sutton', 'Redbridge']
labels_df = host_listing_price_n[host_listing_price_n['neighbourhood_cleansed'].isin(neighborhoods_to_label)]


# Base scatter plot
base = alt.Chart(host_listing_price_n).encode(
    x=alt.X('hhi', axis=alt.Axis(format=',.3f', titleX=200), title='Herfindahl-Hirschman Index (HHI)'),
    y=alt.Y('Avg_Price', axis=alt.Axis(format='$,.0f'))
).properties(
    title={
        "text": ["Airbnb Market Power - London Neighbourhoods"], 
        "subtitle": ["HHI is not positively correlated with prices"]
    },
    height=350,
    width=400
).mark_point(opacity=0.75)

labels = alt.Chart(labels_df).mark_text(
    align='left',
    dx=8,
    dy=2,
    fontSize=10
).encode(
    x='hhi',
    y='Avg_Price',
    text='neighbourhood_cleansed'
)

# Regression line
regression = alt.Chart(host_listing_price_n).transform_regression(
    'hhi', 'Avg_Price'
).mark_line(color='#36b7b4', strokeDash=[5,5]).encode(
    x='hhi',
    y='Avg_Price'
)

# Combine all layers
chart = labels + regression + base
chart

In [256]:
# Assume host_listing_price_n is your DataFrame
# Example: label these specific neighborhoods

neighborhoods_to_label = ['City of London', 'Brent', 'Westminster', 'Camden', 'Bexley', 'Sutton', 'Redbridge']
labels_df = host_listing_price_n[host_listing_price_n['neighbourhood_cleansed'].isin(neighborhoods_to_label)]


# Base scatter plot
base = alt.Chart(host_listing_price_n).encode(
    x=alt.X('Listings_per_host', scale=alt.Scale(domain=[1,2.5], zero=False), axis=alt.Axis(format=',.3f', titleX=200), title='Average individual host listings'),
    y=alt.Y('Avg_Price', axis=alt.Axis(format='$,.0f'))
).properties(
    title={
        "text": ["Airbnb Market Power - London Neighbourhoods"], 
        "subtitle": ["Listings per host correlated with prices"]
    },
    height=350,
    width=400
).mark_point(opacity=0.75)

labels = alt.Chart(labels_df).mark_text(
    align='left',
    dx=8,
    dy=2,
    fontSize=10
).encode(
    x='Listings_per_host',
    y='Avg_Price',
    text='neighbourhood_cleansed'
)

# Regression line
regression = alt.Chart(host_listing_price_n).transform_regression(
    'Listings_per_host', 'Avg_Price'
).mark_line(color='#36b7b4', strokeDash=[5,5]).encode(
    x='Listings_per_host',
    y='Avg_Price'
)

# Combine all layers
chart = labels + regression + base
chart

# COULD BE DENSITY LOOK AT HHI

In [158]:
styles.eco_colours

{'red': '#e6224b',
 'blue-light': '#179fdb',
 'blue-dark': '#122b39',
 'yellow': '#f4c245',
 'orange': '#eb5c2e',
 'turquoise': '#36b7b4'}

In [146]:
host_listing_price_n.sort_values(by='Price', ascending=False)

Unnamed: 0,neighbourhood_cleansed,Host_Listings,Price
6,City of London,92.370213,323.561702
32,Westminster,52.855953,260.597779
19,Kensington and Chelsea,44.080114,216.303977
5,Camden,42.203593,209.622089
25,Redbridge,7.630872,181.583893
18,Islington,27.140152,172.827652
31,Wandsworth,14.164095,172.13324
24,Newham,28.53616,171.119701
3,Brent,22.787037,165.437037
29,Tower Hamlets,31.504637,163.035549


In [80]:
# Add column for red/green colour assignment based on positive or negative price change
Q225_1b['host_listing_capped'] = Q225_1b['calculated_host_listings_count'].apply(
    lambda x: x if x < 10 else '10+'
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Q225_1b['host_listing_capped'] = Q225_1b['calculated_host_listings_count'].apply(
