In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [2]:
##--Airbnb Data--##

airbnb = pd.read_csv("data/20240614-London-listings.csv.gz")

airbnb = gpd.GeoDataFrame(airbnb, 
                       geometry=gpd.points_from_xy(airbnb.longitude, airbnb.latitude), 
                       crs="EPSG:4326").to_crs(epsg=27700)


In [3]:
## Filter data

from datetime import datetime, timedelta

# Filter min. nights
filtered_airbnb=airbnb[airbnb.minimum_nights<=30]

## To determine recently active sites

# Convert 'last_review' to datetime format
filtered_airbnb.loc[:, 'last_review'] = pd.to_datetime(filtered_airbnb['last_review'], errors='coerce')

# Set the reference date (for reproducibility)
reference_date = datetime(2024, 11, 28)

# Calculate the threshold date (6 months before the reference date)
six_months_ago = reference_date - timedelta(days=6 * 30) 

# Filter the data
filtered_airbnb = filtered_airbnb[filtered_airbnb['last_review'] >= six_months_ago]

filtered_airbnb = filtered_airbnb[filtered_airbnb['availability_365'] >= 90]

filtered_airbnb = filtered_airbnb[filtered_airbnb['room_type'] == "Entire home/apt"]

In [4]:
ward_profiles = pd.read_csv("data/ward-profiles.csv", encoding="latin1")

ward_profiles.head(5)

Unnamed: 0,Ward name,Old code,New code,Population - 2015,Children aged 0-15 - 2015,Working-age (16-64) - 2015,Older people aged 65+ - 2015,% All Children aged 0-15 - 2015,% All Working-age (16-64) - 2015,% All Older people aged 65+ - 2015,...,A-Level Average Point Score Per Student - 2013/14,A-Level Average Point Score Per Entry; 2013/14,Crime rate - 2014/15,Violence against the person rate - 2014/15,"Deliberate Fires per 1,000 population - 2014",% area that is open space - 2014,Cars per household - 2011,Average Public Transport Accessibility score - 2014,% travel by bicycle to work - 2011,Turnout at Mayoral election - 2012
0,City of London,00AA,E09000001,8100,650,6250,1250,8.0,76.9,15.2,...,662.9,210.5,656.4,85.3,0.4,18.6,0.4,7.9,5.3,48.5
1,Barking and Dagenham - Abbey,00ABFX,E05000026,14750,3850,10150,750,26.0,69.0,5.0,...,682.6,208.9,138.1,42.2,1.4,21.9,0.5,6.0,0.8,25.7
2,Barking and Dagenham - Alibon,00ABFY,E05000027,10600,2700,6800,1050,25.7,64.3,10.0,...,627.9,201.6,73.6,27.3,0.7,20.6,0.8,3.1,1.0,20.3
3,Barking and Dagenham - Becontree,00ABFZ,E05000028,12700,3200,8350,1100,25.4,65.9,8.7,...,632.0,207.9,79.9,27.6,1.2,1.9,0.9,2.9,1.6,22.5
4,Barking and Dagenham - Chadwell Heath,00ABGA,E05000029,10400,2550,6400,1450,24.3,61.5,14.2,...,613.9,210.5,76.1,24.6,1.3,56.0,0.9,2.3,1.2,25.3


In [5]:
##--Wards Data--##

wards = gpd.read_file("data/London-wards-2011/London_Ward_CityMerged.shp").to_crs(epsg=27700)[["GSS_CODE","HECTARES","geometry"]]

wards.head(5)

Unnamed: 0,GSS_CODE,HECTARES,geometry
0,E05000405,755.173,"POLYGON ((516401.6 160201.8, 516407.3 160210.5..."
1,E05000414,259.464,"POLYGON ((517829.6 165447.1, 517837.1 165469.4..."
2,E05000401,145.39,"POLYGON ((518107.5 167303.4, 518114.3 167307.5..."
3,E05000400,268.506,"POLYGON ((520480 166909.8, 520490.7 166901.9, ..."
4,E05000402,187.821,"POLYGON ((522071 168144.9, 522063.9 168144, 52..."


In [None]:
##--POIs Data--##
##Selecting pubs, restaurants, cafes

gpkg_path = "data/London_Points_Of_Interest.gpkg"

# Read the GeoPackage into a GeoDataFrame
gdf = gpd.read_file(gpkg_path)

# Define the list of desired classnames
desired_classes = ["Pubs, Bars and Inns", "Cafes, Snack Bars and Tea Rooms", "Restaurants"]

# Filter the GeoDataFrame
poi_data = gdf[gdf['classname'].isin(desired_classes)]

# Display the result
print(poi_data)

In [None]:
##--Pubs Data--##
##NOTE: No longer relevant for analysis

#pubs = gpd.read_file("data/pubs.geojson").to_crs(epsg=27700)
#pubs["geometry"] = pubs.geometry.centroid

#pubs = pubs[["name","geometry"]]

#pubs.head(5)

## merging

In [None]:
merged = wards.merge(ward_profiles, left_on = "GSS_CODE", right_on = "New code", how = "left")

merged = gpd.GeoDataFrame(merged, crs = "EPSG:27700")

merged.head(5)

In [None]:
#merged["n_pubs"] = merged.geometry.apply(lambda geom: sum(pubs.geometry.intersects(geom)))
#merged["pub_density"] = merged["n_pubs"] / merged["HECTARES"]

merged["n_poi"] = merged.geometry.apply(lambda geom: sum(poi_data.geometry.intersects(geom)))
merged["poi_density"] = merged["n_poi"] / merged["HECTARES"]

merged["n_airbnb"] = merged.geometry.apply(lambda geom: sum(filtered_airbnb.geometry.intersects(geom)))
merged["airbnb_density"] = merged["n_airbnb"] / merged["HECTARES"]
merged["airbnb_per_1000"] = (merged["n_airbnb"] / merged["Population - 2015"])*1000
merged["airbnb_per_1000_household"] = (merged["n_airbnb"] / merged["Number of Household spaces - 2011"])*1000




In [None]:
merged.plot(column = "airbnb_per_1000_household", cmap="viridis", legend = True)

In [None]:
merged.sort_values(by='poi_density', ascending=False).head(5)

In [None]:
merged.to_file("data/ward_airbnb.gpkg", driver="GPKG")


### Some plots !

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.scatter(merged['poi_density'], merged['airbnb_density'], alpha=0.7, edgecolor='k')
plt.title("Point of Interest Density vs. Airbnb Density", fontsize=14)
plt.xlabel("Point of Interest Density", fontsize=12)
plt.ylabel("Airbnb Density", fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(merged['Average Public Transport Accessibility score - 2014'], merged['airbnb_density'], alpha=0.7, edgecolor='k')
plt.title("Public Transport Accessibility vs. Airbnb Density", fontsize=14)
plt.xlabel("Public Transport Accessibility", fontsize=12)
plt.ylabel("Airbnb Density", fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(merged['(ID2010) - Rank of average score (within London) - 2010'], merged['airbnb_density'], alpha=0.7, edgecolor='k')
plt.title("Deprivation vs. Airbnb Density", fontsize=14)
plt.xlabel("Deprivation Rank", fontsize=12)
plt.ylabel("Airbnb Density", fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

#LOWER RANK = MORE DEPRIVATION!