### Calculate the density of tourist attractions in each LSOA in London 

In [26]:
!pip install --upgrade osmnx



In [27]:
import osmnx as ox
print(ox.__version__)  # Should be 1.2.2 or newer

2.0.0


In [28]:
import geopandas as gpd
import osmnx as ox
import pandas as pd

# Step 1: Load London LSOA boundary data
lsoa_boundaries = gpd.read_file("data/London/LSOA_2011_London_gen_MHW.shp")

# Step 2: Retrieve tourist attraction data for London
# Use OSMnx to extract nodes tagged as `tourism=attraction`
tourist_attractions = ox.features_from_place(
    "London, England", tags={"tourism": "attraction"}
)

# Ensure the coordinate reference system (CRS) is consistent
tourist_attractions = tourist_attractions.to_crs(lsoa_boundaries.crs)

# Step 3: Count the number of tourist attractions within each LSOA
lsoa_boundaries["tourist_attraction_count"] = lsoa_boundaries.apply(
    lambda row: tourist_attractions.within(row.geometry).sum(), axis=1
)

# Step 4: Calculate the density of tourist attractions (per square kilometer)
# Transform to an equal-area projection to compute accurate area
lsoa_boundaries = lsoa_boundaries.to_crs({"proj": "cea"})
lsoa_boundaries["area_km2"] = lsoa_boundaries.geometry.area / 1e6  # Convert to square kilometers

# Tourist attraction density = count / area
lsoa_boundaries["tourist_attraction_density"] = (
    lsoa_boundaries["tourist_attraction_count"] / lsoa_boundaries["area_km2"]
)

# Step 5: Export results to a CSV file
# Use 'LSOA11CD' as the LSOA code column
output_path = "data/london_lsoa_tourist_density.csv"
output_csv = lsoa_boundaries[["LSOA11CD", "tourist_attraction_count", "tourist_attraction_density"]]
output_csv.to_csv(output_path, index=False)

print(f"Tourist attraction density has been saved to {output_path}")

Tourist attraction density has been saved to data/london_lsoa_tourist_density.csv


### Transfer rental price data

In [30]:
import pandas as pd

# 转换为UTF-8编码并保存
with open("data/rental_price_postcode.csv", "r", encoding="ISO-8859-1") as f:
    content = f.read()

with open("data/rental_price_postcode.csv", "w", encoding="utf-8") as f:
    f.write(content)

# 加载房租数据
rental_data = pd.read_csv("data/rental_price_postcode.csv")

# 清洗数据：确保Postcode存在并无空值
rental_data = rental_data.dropna(subset=['Postcode'])
rental_data['Postcode'] = rental_data['Postcode'].astype(str).str.strip()

# 定义伦敦的Postcode前缀
london_postcodes = ['E', 'W', 'N', 'NW', 'SE', 'SW', 'EC', 'WC', 'BR', 'CR', 'DA', 'EN', 'HA', 'IG', 'KT', 'RM', 'SM', 'TW', 'UB', 'WD']

# 筛选伦敦的Postcode
rental_data['London'] = rental_data['Postcode'].str.startswith(tuple(london_postcodes))

# 提取伦敦数据
london_rentals = rental_data[rental_data['London']]

# 筛选所需的列
columns_to_keep = ['Postcode', 'Avg asking rent (pm)', 'Avg. household income']
london_rentals_filtered = london_rentals[columns_to_keep]

# 输出结果
print(london_rentals_filtered)

# 保存结果到新的CSV文件
london_rentals_filtered.to_csv("data/london_rental_filtered.csv", index=False, encoding="utf-8")
print("筛选后的伦敦租金数据已保存！")

     Postcode Avg asking rent (pm) Avg. household income
666     BR1 1                1,501                67,100
667     BR1 2                2,000                68,200
668     BR1 3                1,650                62,800
669     BR1 4                1,813                51,600
670     BR1 5                  NaN                46,900
...       ...                  ...                   ...
7345    WV6 9                  NaN                45,600
7346    WV7 3                  NaN                48,900
7347    WV8 1                  NaN                40,200
7348    WV8 2                  NaN                51,300
7349    WV9 5                  NaN                39,300

[2025 rows x 3 columns]
筛选后的伦敦租金数据已保存！
