In [1]:
from pathlib import Path
import pandas as pd
import requests
import os

In [2]:
home_dir = Path.home()
inside_airbnb_data_dir = home_dir / 'Programming/data/inside-airbnb/london'
population_dataset_england_wales_data_dir = home_dir / 'Programming/data/office-national-statistics/population-dataset-england-wales/'
crime_rate_dir = home_dir / 'Programming/data/crime-rate/'

In [3]:
FOURSQUARE_API_KEY = os.environ['FOURSQUARE_API_KEY']
FOURSQUARE_URL = "https://api.foursquare.com/v3/places/search"

In [4]:
inside_airbnb_data_file = inside_airbnb_data_dir / 'listings.csv'
population_dataset_england_wales_data_file = population_dataset_england_wales_data_dir / 'mye23tablesew.xlsx'
crime_rate_data_file = crime_rate_dir / 'crimerate-pro-data-table-rmp-region-towns-cities.csv'

In [5]:
crime_rate_df = pd.read_csv(crime_rate_data_file, usecols=['Borough', 'Crime Rate'])
crime_rate_df.rename(columns={'Borough': 'borough', 'Crime Rate': 'crime_rate'}, inplace=True)
crime_rate_df = crime_rate_df[crime_rate_df.borough != 'DownloadCSVExcelTSV']

In [6]:
columns_list = ['neighbourhood_cleansed', 'bathrooms', 'bedrooms', 'latitude', 'longitude',
                'room_type', 'latitude', 'longitude', 'property_type', 'price']
inside_airbnb_df = pd.read_csv(inside_airbnb_data_file, usecols=columns_list)
inside_airbnb_df.rename(columns={'neighbourhood_cleansed': 'borough'}, inplace=True)
inside_airbnb_df.price = inside_airbnb_df.price.str.replace('$', '')

In [7]:
inside_airbnb_df = inside_airbnb_df.merge(crime_rate_df, on='borough', how='left')
inside_airbnb_df.head()

Unnamed: 0,borough,latitude,longitude,property_type,room_type,bathrooms,bedrooms,price,crime_rate
0,Hammersmith and Fulham,51.49392,-0.22754,Entire rental unit,Entire home/apt,1.5,1.0,89.0,103.14
1,Tower Hamlets,51.52435,-0.06938,Entire condo,Entire home/apt,3.0,3.0,220.0,98.6
2,Hammersmith and Fulham,51.49547,-0.22864,Entire rental unit,Entire home/apt,1.0,1.0,88.0,103.14
3,Hammersmith and Fulham,51.49368,-0.22774,Room in aparthotel,Private room,1.0,1.0,75.0,103.14
4,Haringey,51.59031,-0.09408,Private room in rental unit,Private room,1.0,1.0,53.0,133.11


In [8]:
inside_airbnb_df.shape

(95144, 9)

In [9]:
BROAD_CATEGORIES = [
    ("Grocery Store", ["supermarket", "grocery", "convenience store", "gourmet", "butcher"]),
    ("Restaurant", ["restaurant", "bbq", "steakhouse", "diner", "sushi", "cuisine", "brasserie", "joint",
                    "pizzeria", "parlor", "fish", "chips", "bistro", "dining"]),
    ("Cafe", ["coffee", "cafe", "tea", "bakery", "dessert", "café", "drinking", "breakfast"]),
    ("Nightlife", ["bar", "pub", "club", "lounge", "casino"]),
    ("Retail", ["shopping", "store", "mall", "market", "food", "beverage", "boutique", "office", "plaza"]),
    ("Fitness", ["gym", "fitness", "yoga", "crossfit", "martial arts", "tennis", "sports", "football", "cricket"]),
    ("Wellness", ["spa", "massage", "therapy", "sauna"]),
    ("Entertainment", ["theater", "cinema", "concert", "comedy", "recreation", "bingo", "music", "auditorium", "jazz", "blues"]),
    ("Cultural", ["museum", "art", "gallery", "library", "historic", "landmarks", "monument"]),
    ("Outdoor", ["park", "trail", "beach", "zoo", "hiking", "playground", "outdoors"]),
    ("Transport", ["train", "bus", "subway", "parking", "taxi", "tube", "dealership", "automotive", "car rental", "shipping", "motorcycle"]),
    ("Healthcare", ["hospital", "clinic", "pharmacy", "dentist", "veterinary", "medicine", "doctor", "surgeon", "surgery", "healthcare",
                    "physiotherapist", "physician", "psycho", "assisted living", "medical", "nutritionist", "ambulance"]),
    ("Services", ["bank", "atm", "post", "salon", "barber", "laundry", "child care", "agency", "photographer", "chimney",
                  "pet", "wedding", "architecture", "upholstery", "cleaning", "computer", "photography", "audiovisual"]),
    ("Organization", ["community", "government", "assistance", "legal", "environmental", "non-profit", "charity", "youth",
                      "disabled", "military", "embassy", "consulate", "agriculture", "forestry"]),
    ("Education", ["school", "learning", "tutoring", "preschool", "kindergarten", "university", "college"]),
    ("Religion", ["church", "cathedral", "seminary", "mosque", "temple", "synagogue", "faith", "monastery", "cemetery", "spiritual"]),
    ("Home Improvement", ["hvac", "home", "heating ventilating air conditioning", "landscape", "garden", "smith", "contractor",
                          "construction", "carpenter", "builder", "plumber", "housing", "electrician", "locksmith"]),
]

In [10]:
def classify_category(category_name):
    category_name_lower = category_name.lower()
    for broad_category, keywords in BROAD_CATEGORIES:
        if any(keyword in category_name_lower for keyword in keywords):
            return broad_category
    return category_name

In [11]:
def get_nearby_categories(lat, lon, radius=100, limit=3):
    if not FOURSQUARE_API_KEY:
        return "API key missing"

    headers = {
        "Authorization": FOURSQUARE_API_KEY,
        "Accept": "application/json"
    }
    params = {
        "ll": f"{lat},{lon}",
        "radius": radius,
        "limit": limit
    }
    
    response = requests.get(FOURSQUARE_URL, headers=headers, params=params)
    
    if response.status_code == 200:
        data = response.json()
        categories = set()
        for place in data.get('results', []):
            category_list = place.get('categories', [])
            if category_list:
                category_name = category_list[0]['name']
                broad_category = classify_category(category_name)
                categories.add(broad_category)
        
        return ', '.join(categories) if categories else "No amenities found"
    else:
        return f"API error: {response.status_code}"

In [12]:
inside_airbnb_df.iloc[700:730].apply(lambda row: get_nearby_categories(row['latitude'], row['longitude']), axis=1)

700                              Nightlife, Cafe
701                             Retail, Services
702                           No amenities found
703                        Restaurant, Nightlife
704                                    Nightlife
705                           No amenities found
706                              Nightlife, Cafe
707                           No amenities found
708                     Healthcare, Organization
709                  Restaurant, Outdoor, Retail
710                                       Retail
711                     Plaza, Retail, Nightlife
712                           No amenities found
713                          Religion, Education
714    Retail, Nightlife, Motorcycle Repair Shop
715                             Restaurant, Cafe
716                        Healthcare, Nightlife
717                                       Retail
718                                    Nightlife
719                Healthcare, Retail, Nightlife
720                 

In [13]:
inside_airbnb_df.head()

Unnamed: 0,borough,latitude,longitude,property_type,room_type,bathrooms,bedrooms,price,crime_rate
0,Hammersmith and Fulham,51.49392,-0.22754,Entire rental unit,Entire home/apt,1.5,1.0,89.0,103.14
1,Tower Hamlets,51.52435,-0.06938,Entire condo,Entire home/apt,3.0,3.0,220.0,98.6
2,Hammersmith and Fulham,51.49547,-0.22864,Entire rental unit,Entire home/apt,1.0,1.0,88.0,103.14
3,Hammersmith and Fulham,51.49368,-0.22774,Room in aparthotel,Private room,1.0,1.0,75.0,103.14
4,Haringey,51.59031,-0.09408,Private room in rental unit,Private room,1.0,1.0,53.0,133.11
