### AirBnB Listings

Most recent for 25 European cities, as of April 2024

In [6]:
import pandas as pd 
import os 
import numpy as np 
import importlib
import calendar
import requests
from datetime import datetime
import glob, os

In [8]:
def generate_days(year, month):
    days_in_month = calendar.monthrange(year, month)[1]
    days_list = [f"{year}-{month:02d}-{day:02d}" for day in range(1, days_in_month + 1)]
    return days_list

generate_days(2023, 12)

['2023-12-01',
 '2023-12-02',
 '2023-12-03',
 '2023-12-04',
 '2023-12-05',
 '2023-12-06',
 '2023-12-07',
 '2023-12-08',
 '2023-12-09',
 '2023-12-10',
 '2023-12-11',
 '2023-12-12',
 '2023-12-13',
 '2023-12-14',
 '2023-12-15',
 '2023-12-16',
 '2023-12-17',
 '2023-12-18',
 '2023-12-19',
 '2023-12-20',
 '2023-12-21',
 '2023-12-22',
 '2023-12-23',
 '2023-12-24',
 '2023-12-25',
 '2023-12-26',
 '2023-12-27',
 '2023-12-28',
 '2023-12-29',
 '2023-12-30',
 '2023-12-31']

In [9]:
cities = pd.read_csv("europeancities.csv")
city_country_df = pd.read_csv("green-city-reco-data/airbnb_data/airbnb_city_country_mapping.csv", sep=";")

city_country_df = city_country_df[city_country_df['city'].isin(cities['city'])]
city_country_df

Unnamed: 0,city,country,Neighbourhood
0,Amsterdam,The-Netherlands,north-holland
3,Barcelona,Spain,Catalonia
5,Berlin,Germany,Be
6,Bologna,Italy,Emilia-romagna
7,Bordeaux,France,nouvelle-aquitaine
9,Brussels,Belgium,Bru
10,Copenhagen,Denmark,hovedstaden
15,Geneva,Switzerland,Geneva
19,Istanbul,Turkey,marmara
21,London,United Kingdom,England


In [24]:
def form_url(city: str, country: str, neighbourhood: str, year: int, month: int):
    date_options = generate_days(year, month)
    country = country.lower().replace(" ", "-")

    # TODO: change this for some countries - Spain, and others
    neighbourhood = neighbourhood.lower().replace(" ", "-")

    for date_option in date_options:
        url = f"http://data.insideairbnb.com/{country}/{neighbourhood}/{city.lower()}/{date_option}/data/listings.csv.gz"
        file_name = city.lower().replace(" ", "_") + "-" + date_option + ".csv.gz"
        try:
            response = requests.get(url)
            if response.status_code == 200:
                download_data(response.content, file_name)
                break
        except Exception as e:
            print(f"Error downloading {file_name}")
            print(e)
            continue


def download_data(content, file_name):
    file_path = "european-city-data/data-sources/airbnb/original"
    print(f"Downloading {file_name}")
    with open(file_path+file_name, "wb") as f:
        f.write(content)
        f.close()
    print(f"Downloaded {file_name}")


def gather_all_cities_data(year, month, city_list = None):

    if city_list is None:
        cities = pd.read_csv("europeancities.csv")
    else: 
        cities = city_list
    city_country_df = pd.read_csv("green-city-reco-data/airbnb_data/airbnb_city_country_mapping.csv", sep=";")
    city_country_df = city_country_df[city_country_df['city'].isin(cities['city'])]
    
    for index, row in city_country_df.iterrows():
        city = row["city"]
        country = row["country"]
        neighbourhood = row["Neighbourhood"]
        form_url(city=city, country=country, neighbourhood=neighbourhood, year=year, month=month)

In [13]:
gather_all_cities_data(2024, 4)

Downloading amsterdam-2024-04-12.csv.gz
Downloaded amsterdam-2024-04-12.csv.gz
Downloading barcelona-2024-04-17.csv.gz
Downloaded barcelona-2024-04-17.csv.gz
Downloading bologna-2024-04-25.csv.gz
Downloaded bologna-2024-04-25.csv.gz
Downloading bordeaux-2024-04-22.csv.gz
Downloaded bordeaux-2024-04-22.csv.gz
Downloading brussels-2024-04-25.csv.gz
Downloaded brussels-2024-04-25.csv.gz
Downloading london-2024-04-17.csv.gz
Downloaded london-2024-04-17.csv.gz
Downloading lyon-2024-04-22.csv.gz
Downloaded lyon-2024-04-22.csv.gz
Downloading madrid-2024-04-19.csv.gz
Downloaded madrid-2024-04-19.csv.gz
Downloading munich-2024-04-28.csv.gz
Downloaded munich-2024-04-28.csv.gz
Downloading naples-2024-04-25.csv.gz
Downloaded naples-2024-04-25.csv.gz
Downloading paris-2024-04-17.csv.gz
Downloaded paris-2024-04-17.csv.gz
Downloading porto-2024-04-22.csv.gz
Downloaded porto-2024-04-22.csv.gz
Downloading rome-2024-04-19.csv.gz
Downloaded rome-2024-04-19.csv.gz
Downloading vienna-2024-04-19.csv.gz
Down

In [15]:
airbnb_cities = os.listdir("european-city-data/data-sources/airbnb/")
airbnb_cities = list(map(lambda x: (x.split("-")[0]).capitalize(), airbnb_cities))
airbnb_cities

['Porto',
 'Paris',
 'Vienna',
 'Munich',
 'Naples',
 'Rome',
 'Bologna',
 'Madrid',
 'Bordeaux',
 'London',
 'Brussels',
 'Lyon',
 'Barcelona',
 'Amsterdam']

In [20]:
european_cities = pd.read_csv("europeancities.csv")
city_list = european_cities[~european_cities['city'].isin(airbnb_cities)]
city_list

Unnamed: 0.1,Unnamed: 0,city,country
2,2,Berlin,Germany
6,6,Copenhagen,Denmark
7,7,Geneva,Switzerland
8,8,Istanbul,Turkey
12,12,Milan,Italy
17,17,Prague,Czechia
18,18,Riga,Latvia
20,20,Stockholm,Sweden
21,21,Thessaloniki,Greece
22,22,Valencia,Spain


In [21]:
gather_all_cities_data(2024, 3, city_list)

Downloading berlin-2024-03-24.csv.gz
Downloaded berlin-2024-03-24.csv.gz
Downloading copenhagen-2024-03-30.csv.gz
Downloaded copenhagen-2024-03-30.csv.gz
Downloading istanbul-2024-03-31.csv.gz
Downloaded istanbul-2024-03-31.csv.gz
Downloading milan-2024-03-24.csv.gz
Downloaded milan-2024-03-24.csv.gz
Downloading prague-2024-03-25.csv.gz
Downloaded prague-2024-03-25.csv.gz
Downloading riga-2024-03-31.csv.gz
Downloaded riga-2024-03-31.csv.gz
Downloading stockholm-2024-03-30.csv.gz
Downloaded stockholm-2024-03-30.csv.gz
Downloading valencia-2024-03-24.csv.gz
Downloaded valencia-2024-03-24.csv.gz
Downloading zurich-2024-03-30.csv.gz
Downloaded zurich-2024-03-30.csv.gz


Upon checking the InsideAirBnB website for Geneva and Thessaloniki, the most recent update is from December 2023 

In [22]:
city_list = city_list[city_list['city'].isin(['Thessaloniki', 'Geneva'])]
gather_all_cities_data(2023, 12, city_list)

Downloading geneva-2023-12-27.csv.gz
Downloaded geneva-2023-12-27.csv.gz
Downloading thessaloniki-2023-12-25.csv.gz
Downloaded thessaloniki-2023-12-25.csv.gz
