In [1]:
from pathlib import Path
import pandas as pd
import requests
import os

In [2]:
pd.options.display.max_rows = None

In [3]:
home_dir = Path.home()
inside_airbnb_data_dir = home_dir / 'Programming/data/inside-airbnb/london'
crime_rate_dir = home_dir / 'Programming/data/crime-rate/'

In [4]:
FOURSQUARE_API_KEY = os.environ['FOURSQUARE_API_KEY']
FOURSQUARE_URL = "https://api.foursquare.com/v3/places/search"

In [5]:
inside_airbnb_data_file = inside_airbnb_data_dir / 'listings.csv'
crime_rate_data_file = crime_rate_dir / 'crimerate-pro-data-table-rmp-region-towns-cities.csv'

In [6]:
crime_rate_df = pd.read_csv(crime_rate_data_file, usecols=['Borough', 'Crime Rate'])
crime_rate_df.rename(columns={'Borough': 'borough', 'Crime Rate': 'crime_rate'}, inplace=True)
crime_rate_df = crime_rate_df[crime_rate_df.borough != 'DownloadCSVExcelTSV']

In [7]:
columns_list = ['neighbourhood_cleansed', 'bathrooms', 'bedrooms', 'latitude', 'longitude',
                'room_type', 'latitude', 'longitude', 'property_type', 'price', 'minimum_nights']
inside_airbnb_df = pd.read_csv(inside_airbnb_data_file, usecols=columns_list)
inside_airbnb_df.rename(columns={'neighbourhood_cleansed': 'borough'}, inplace=True)
inside_airbnb_df.price = inside_airbnb_df.price.str.replace('$', '')

In [8]:
inside_airbnb_df = inside_airbnb_df.loc[inside_airbnb_df.room_type == 'Entire home/apt']

In [9]:
inside_airbnb_df = inside_airbnb_df.loc[inside_airbnb_df.minimum_nights >= 30]

In [10]:
inside_airbnb_df = inside_airbnb_df.merge(crime_rate_df, on='borough', how='left')

In [11]:
inside_airbnb_df.head()

Unnamed: 0,borough,latitude,longitude,property_type,room_type,bathrooms,bedrooms,price,minimum_nights,crime_rate
0,Croydon,51.37487,-0.11565,Entire condo,Entire home/apt,,2.0,,180,111.91
1,Ealing,51.52479,-0.36186,Entire townhouse,Entire home/apt,,2.0,,90,103.37
2,Lambeth,51.491476,-0.111514,Entire rental unit,Entire home/apt,1.0,1.0,150.0,30,137.98
3,Kensington and Chelsea,51.48566,-0.18415,Entire rental unit,Entire home/apt,2.0,2.0,195.0,91,118.02
4,Merton,51.40804,-0.22837,Entire rental unit,Entire home/apt,,1.0,,30,82.49


In [12]:
inside_airbnb_df.shape

(2059, 10)

In [None]:
BROAD_CATEGORIES = [
    ("Grocery Store", ["supermarket", "grocery", "convenience store", "gourmet", "butcher"]),
    ("Restaurant", ["restaurant", "bbq", "steakhouse", "diner", "sushi", "cuisine", "brasserie", "joint",
                    "pizzeria", "parlor", "fish", "chips", "bistro", "dining"]),
    ("Cafe", ["coffee", "cafe", "tea", "bakery", "dessert", "café", "drinking", "breakfast"]),
    ("Nightlife", ["bar", "pub", "club", "lounge", "casino"]),
    ("Retail", ["shopping", "store", "mall", "market", "food", "beverage", "boutique", "office", "plaza"]),
    ("Fitness", ["gym", "fitness", "yoga", "crossfit", "martial arts", "tennis", "sports", "football", "cricket"]),
    ("Wellness", ["spa", "massage", "therapy", "sauna", "escape room"]),
    ("Entertainment", ["theater", "cinema", "concert", "comedy", "recreation", "bingo", "music", "auditorium", "jazz", "blues", "stadium"]),
    ("Cultural", ["museum", "art", "gallery", "library", "historic", "landmarks", "monument"]),
    ("Outdoor", ["park", "trail", "beach", "zoo", "hiking", "playground", "outdoors"]),
    ("Transport", ["train", "bus", "subway", "parking", "taxi", "tube", "dealership", "automotive", "car rental", "shipping", "motorcycle", "fuel station"]),
    ("Healthcare", ["hospital", "clinic", "pharmacy", "dentist", "veterinary", "medicine", "doctor", "surgeon", "surgery", "healthcare",
                    "physiotherapist", "physician", "psycho", "assisted living", "medical", "nutritionist", "ambulance"]),
    ("Services", ["bank", "atm", "post", "salon", "barber", "laundry", "child care", "agency", "photographer", "chimney",
                  "pet", "wedding", "architecture", "upholstery", "cleaning", "computer", "photography", "audiovisual"]),
    ("Organization", ["community", "government", "assistance", "legal", "environmental", "non-profit", "charity", "youth",
                      "disabled", "military", "embassy", "consulate", "agriculture", "forestry"]),
    ("Education", ["school", "learning", "tutoring", "preschool", "kindergarten", "university", "college"]),
    ("Religion", ["church", "cathedral", "seminary", "mosque", "temple", "synagogue", "faith", "monastery", "cemetery", "spiritual"]),
    ("Home Improvement", ["hvac", "home", "heating ventilating air conditioning", "landscape", "garden", "smith", "contractor",
                          "construction", "carpenter", "builder", "plumber", "housing", "electrician", "locksmith"]),
]

In [None]:
def classify_category(category_name):
    category_name_lower = category_name.lower()
    for broad_category, keywords in BROAD_CATEGORIES:
        if any(keyword in category_name_lower for keyword in keywords):
            return broad_category
    return category_name

In [None]:
def get_nearby_categories(lat, lon, radius=100, limit=3):
    if not FOURSQUARE_API_KEY:
        return "API key missing"

    headers = {
        "Authorization": FOURSQUARE_API_KEY,
        "Accept": "application/json"
    }
    params = {
        "ll": f"{lat},{lon}",
        "radius": radius,
        "limit": limit
    }
    
    response = requests.get(FOURSQUARE_URL, headers=headers, params=params)
    
    if response.status_code == 200:
        data = response.json()
        categories = set()
        for place in data.get('results', []):
            category_list = place.get('categories', [])
            if category_list:
                category_name = category_list[0]['name']
                broad_category = classify_category(category_name)
                categories.add(broad_category)
        
        return ', '.join(categories) if categories else "None"
    else:
        return f"API error: {response.status_code}"

In [None]:
inside_airbnb_df.loc['amenities'] = inside_airbnb_df.apply(lambda row: get_nearby_categories(row['latitude'], row['longitude']), axis=1)

In [None]:
inside_airbnb_df.head()