In [80]:
import pandas as pd
import numpy as np
from pathlib import Path

In [81]:
file_names = [
    "listings_202309.csv",
    "listings_202312.csv",
    "listings_202403.csv",
    "listings_202406.csv",
]

In [82]:
all_listings = pd.concat(
    (
        pd.read_csv(f)
        for f in [Path(f"../local_data/{file_name}") for file_name in file_names]
    ),
    ignore_index=True,
).sort_values("last_scraped")

In [69]:
dates = all_listings["last_scraped"].unique()


for date in dates:
    temp_df = all_listings[all_listings["last_scraped"] == date].dropna(
        axis=1, how="all"
    )
    print(
        f"Date: {date}: cols missing: {[ele for ele in all_listings.columns.tolist() if ele not in temp_df.columns.tolist()]}"
    )

all_listings["last_scraped"] = all_listings["last_scraped"].replace(
    "2023-12-27", "2023-12-28"
)
all_listings = all_listings.dropna(subset=["price"], axis=0, how="all")
all_listings["bathrooms"] = np.where(
    all_listings["bathrooms_text"].isna(),
    all_listings["bathrooms"],
    all_listings["bathrooms_text"],
)

Date: 2023-09-24: cols missing: ['bathrooms', 'calendar_updated', 'license']
Date: 2023-12-27: cols missing: ['description', 'bathrooms', 'bedrooms', 'calendar_updated', 'license']
Date: 2023-12-28: cols missing: ['description', 'bathrooms', 'bedrooms', 'calendar_updated', 'license']
Date: 2024-03-31: cols missing: ['calendar_updated', 'license']
Date: 2024-06-29: cols missing: ['calendar_updated', 'license']


In [70]:
all_listings.groupby("last_scraped").size()

last_scraped
2023-09-24    2534
2023-12-28    2372
2024-03-31    2140
2024-06-29    2323
dtype: int64

In [71]:
pd.set_option("display.max_columns", None)

In [72]:
a = all_listings.groupby("last_scraped").count().sort_index()
a.loc[:, ((a == 0).sum() > 1)]

Unnamed: 0_level_0,calendar_updated,license
last_scraped,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-09-24,0,0
2023-12-28,0,0
2024-03-31,0,0
2024-06-29,0,0


In [73]:
all_listings.filter(["bedrooms", "beds"]).describe()

Unnamed: 0,bedrooms,beds
count,6192.0,9257.0
mean,1.257752,1.553959
std,0.898196,1.151982
min,0.0,0.0
25%,1.0,1.0
50%,1.0,1.0
75%,1.0,2.0
max,32.0,32.0


In [74]:
all_listings["bathrooms_text"]

0               1 bath
1685     1 shared bath
1686            1 bath
1687     1 shared bath
1688            1 bath
             ...      
9176            1 bath
9177            1 bath
9178            1 bath
9179     1 shared bath
11182           1 bath
Name: bathrooms_text, Length: 9369, dtype: object

In [75]:
all_listings["beds"].fillna(np.nan).sub(all_listings["bedrooms"]).value_counts()

 0.0     3866
 1.0     1495
-1.0      342
 2.0      291
 3.0       56
-2.0       50
 4.0       29
 5.0       11
-3.0        8
 6.0        4
-4.0        3
 16.0       1
-19.0       1
-6.0        1
-5.0        1
Name: count, dtype: int64

In [76]:
min_entries = 2

keep_ids = (
    all_listings.value_counts("id").loc[lambda x: x >= min_entries].index.tolist()
)
relevant_listings = (
    all_listings[all_listings["id"].isin(keep_ids)]
    .drop_duplicates("id", keep="last")
    .dropna(how="all", axis=1)
)

In [77]:
relevant_listings.to_csv("../local_data/relevant_listings.csv", index=False)

In [78]:
relevant_listings.sort_values("price", ascending=False)

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
6847,668839556377128266,https://www.airbnb.com/rooms/668839556377128266,20240330234249,2024-03-31,city scrape,Luxury Penthouse near the Dolder Hotel,Ever dreamed of living in a luxury penthouse? ...,,https://a0.muscache.com/pictures/miso/Hosting-...,40704806,https://www.airbnb.com/users/show/40704806,Florence,2015-08-06,"Zürich, Switzerland","Hello :) I am Florence, living in Zurich. I tr...",,,,f,https://a0.muscache.com/im/pictures/user/fb493...,https://a0.muscache.com/im/pictures/user/fb493...,,1,1,['phone'],t,f,,Hirslanden,Kreis 7,47.364930,8.569090,Entire condo,Entire home/apt,4,2 baths,2 baths,2.0,2.0,"[""Hair dryer"", ""Free washer \u2013 In building...",$999.00,10,1125,10,10,1125,1125,10.0,1125.0,t,30,60,90,365,2024-03-31,0,0,0,,,,,,,,,,t,1,1,0,0,
7508,955784712671658608,https://www.airbnb.com/rooms/955784712671658608,20240330234249,2024-03-31,city scrape,Zurich City Apartment - Wollishofen,Wellcome in Zurich,,https://a0.muscache.com/pictures/72f799fd-f7d1...,177523078,https://www.airbnb.com/users/show/177523078,Markus,2018-03-09,"Zürich, Switzerland",,within a day,100%,79%,t,https://a0.muscache.com/im/pictures/user/User-...,https://a0.muscache.com/im/pictures/user/User-...,,1,2,"['email', 'phone']",t,t,,Wollishofen,Kreis 2,47.336297,8.531129,Entire rental unit,Entire home/apt,2,1 bath,1 bath,1.0,1.0,"[""Clothing storage: dresser"", ""Free washer \u2...",$99.00,3,15,1,3,15,15,3.0,15.0,t,20,40,46,46,2024-03-31,17,17,1,2023-09-28,2024-03-25,5.00,5.00,5.00,5.00,5.00,4.94,4.94,f,1,1,0,0,2.74
4796,937575075386386458,https://www.airbnb.com/rooms/937575075386386458,20231227173841,2023-12-28,city scrape,Rental unit in Zürich · ★4.55 · 1 bedroom · 1 ...,,,https://a0.muscache.com/pictures/miso/Hosting-...,83233956,https://www.airbnb.com/users/show/83233956,Ariana,2016-07-12,"Zürich, Switzerland","Wer zusammen lacht, versteht sich. Wir erkenne...",within an hour,100%,97%,f,https://a0.muscache.com/im/pictures/user/User-...,https://a0.muscache.com/im/pictures/user/User-...,,1,1,"['email', 'phone']",t,t,,Unterstrass,Kreis 6,47.400853,8.532059,Entire rental unit,Entire home/apt,2,1 bath,1 bath,,1.0,[],$99.00,2,30,1,2,1125,1125,2.0,1125.0,t,22,28,38,38,2023-12-27,22,22,2,2023-08-04,2023-12-16,4.55,4.73,4.77,4.82,4.73,4.68,4.45,t,1,1,0,0,4.52
8483,15653745,https://www.airbnb.com/rooms/15653745,20240629155539,2024-06-29,city scrape,Nice 2.5 room Raspberry I Apartment,"The ""Raspberry I"" Business Apartment is the pe...",The modern business apartment is located close...,https://a0.muscache.com/pictures/d3861949-8acc...,12886487,https://www.airbnb.com/users/show/12886487,Stephanie,2014-03-07,"Zürich, Switzerland","HITrental, your holiday rentals in Switzerland",within an hour,100%,100%,t,https://a0.muscache.com/im/pictures/user/115df...,https://a0.muscache.com/im/pictures/user/115df...,,268,271,"['email', 'phone', 'work_email']",t,t,"Zürich, Switzerland",Oerlikon,Kreis 11,47.408020,8.547620,Entire rental unit,Entire home/apt,4,1 bath,1 bath,1.0,2.0,"[""Bed linens"", ""Shower gel"", ""Dryer"", ""Pets al...",$99.00,1,1125,1,1,1125,1125,1.0,1125.0,t,0,10,12,12,2024-06-29,13,1,0,2017-02-02,2023-10-31,4.92,4.92,4.92,4.92,5.00,4.85,4.69,t,107,107,0,0,0.14
4207,716380258943464821,https://www.airbnb.com/rooms/716380258943464821,20231227173841,2023-12-28,previous scrape,Rental unit in Zürich · ★4.09 · 1 bedroom · 1 ...,,,https://a0.muscache.com/pictures/miso/Hosting-...,479599044,https://www.airbnb.com/users/show/479599044,Adem,2022-09-15,,,,,99%,f,https://a0.muscache.com/im/pictures/user/4fbbd...,https://a0.muscache.com/im/pictures/user/4fbbd...,,11,12,"['email', 'phone']",t,t,,Hochschulen,Kreis 1,47.376800,8.544850,Entire rental unit,Entire home/apt,2,1 bath,1 bath,,1.0,[],$99.00,1,365,1,1,1125,1125,1.0,1125.0,t,0,0,0,0,2023-12-28,33,16,0,2022-09-20,2023-03-26,4.09,4.42,4.21,4.42,4.67,4.91,4.33,t,10,10,0,0,2.13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9377,52546817,https://www.airbnb.com/rooms/52546817,20240629155539,2024-06-29,city scrape,Penthouse Bellevue,Enjoy a stylish experience at this centrally-l...,,https://a0.muscache.com/pictures/miso/Hosting-...,144959892,https://www.airbnb.com/users/show/144959892,Anna Victoria,2017-08-08,"Zürich, Switzerland",,within a few hours,100%,83%,f,https://a0.muscache.com/im/pictures/user/b200d...,https://a0.muscache.com/im/pictures/user/b200d...,,3,3,"['email', 'phone']",t,t,,Rathaus,Kreis 1,47.368830,8.544710,Entire loft,Entire home/apt,6,2.5 baths,2.5 baths,2.0,3.0,"[""Washer"", ""Wifi"", ""Refrigerator"", ""Air condit...","$1,000.00",3,365,3,3,365,365,3.0,365.0,t,1,1,1,180,2024-06-29,0,0,0,,,,,,,,,,f,3,3,0,0,
10512,1038869421955448066,https://www.airbnb.com/rooms/1038869421955448066,20240629155539,2024-06-29,city scrape,"Kleine, feine Wohnung im besten Quartier der S...",Welcome to my small but nice and very centrall...,"Bars, cafes, restaurants, a park with trees, r...",https://a0.muscache.com/pictures/miso/Hosting-...,28241669,https://www.airbnb.com/users/show/28241669,Roxy,2015-02-23,"Zürich, Switzerland",,,,,f,https://a0.muscache.com/im/pictures/user/bf07d...,https://a0.muscache.com/im/pictures/user/bf07d...,,1,1,"['email', 'phone']",t,t,"Zürich, Switzerland",Wipkingen,Kreis 10,47.392242,8.526805,Entire rental unit,Entire home/apt,3,1 bath,1 bath,1.0,1.0,"[""Clothing storage"", ""Bed linens"", ""Pets allow...","$1,000.00",1,6,1,1,6,6,1.0,6.0,t,5,5,5,5,2024-06-29,0,0,0,,,,,,,,,,f,1,1,0,0,
3945,554819065932635093,https://www.airbnb.com/rooms/554819065932635093,20231227173841,2023-12-28,previous scrape,Rental unit in Zürich · 2 bedrooms · 2 beds · ...,,,https://a0.muscache.com/pictures/miso/Hosting-...,151693991,https://www.airbnb.com/users/show/151693991,Sinha‘S Swiss,2017-09-22,Switzerland,"""Home is where our story begins…""\n\nAnd here ...",within an hour,100%,97%,f,https://a0.muscache.com/im/pictures/user/b5578...,https://a0.muscache.com/im/pictures/user/b5578...,,43,137,"['email', 'phone', 'work_email']",t,t,,Hirzenbach,Kreis 12,47.404950,8.578840,Entire rental unit,Entire home/apt,4,1 bath,1 bath,,2.0,[],"$1,000.00",2,365,1,1,365,365,1.0,365.0,t,0,0,0,0,2023-12-28,0,0,0,,,,,,,,,,f,5,5,0,0,
9083,43703654,https://www.airbnb.com/rooms/43703654,20240629155539,2024-06-29,city scrape,The Crown Penthouse Bahnhofstrasse Zürich,An oasis in Zurich city life.We know what is i...,Coming soon,https://a0.muscache.com/pictures/85b3d24e-d2d8...,144959892,https://www.airbnb.com/users/show/144959892,Anna Victoria,2017-08-08,"Zürich, Switzerland",,within a few hours,100%,83%,f,https://a0.muscache.com/im/pictures/user/b200d...,https://a0.muscache.com/im/pictures/user/b200d...,,3,3,"['email', 'phone']",t,t,"Zürich, Switzerland",Lindenhof,Kreis 1,47.376350,8.540470,Entire rental unit,Entire home/apt,5,3.5 baths,3.5 baths,2.0,4.0,"[""Private entrance"", ""Washer"", ""Wifi"", ""Breakf...","$1,000.00",4,365,4,4,365,365,4.0,365.0,t,0,0,0,179,2024-06-29,2,0,0,2020-07-01,2020-07-04,5.00,5.00,4.50,5.00,5.00,5.00,4.00,f,3,3,0,0,0.04
