## Create a subset of global-streetscapes dataset

In [41]:
# --------------------------------------
import warnings

warnings.filterwarnings("ignore")

# --------------------------------------
from streetscapes import conf
from streetscapes.functions import load_city_subset

### Load dataset

In [42]:
df_city = load_city_subset("Amsterdam")

[35mStreetscapes[0m | [36m2025-02-20@11:58:15[0m | [1mLoading 'Amsterdam.parquet'...[0m


In [43]:
df_city.head()

Unnamed: 0,uuid,source,orig_id,glare,lighting_condition,pano_status,platform,quality,reflection,view_direction,...,to,ref,tunnel_1,bridge_1,service,access,road_width,area,est_width,reversed
13920,99d8019f-38bc-48ba-b9de-870790cce291,Mapillary,295335152124602,False,day,False,walking surface,good,False,front/back,...,46367470.0,,,,,,,,,True
14030,7aa5b791-7d8a-41b1-9e6e-c5a15313ee67,Mapillary,826691848265754,False,day,True,cycling surface,good,False,,...,1307719000.0,,building_passage,,,no,1.25,,,False
14219,2df73a61-742d-494b-9ea4-ce52e8ad74e0,Mapillary,975351706501580,False,day,False,walking surface,good,False,front/back,...,8036447000.0,,,,,,,,,False
14631,5168b863-e938-4ccf-acee-d52ca075cee4,Mapillary,1170489873417401,False,day,True,cycling surface,good,False,,...,46379160.0,,,,,,,,,True
14791,30fe8123-bd44-4d12-ab6f-f088815ece32,Mapillary,304360741085605,False,day,True,driving surface,good,False,,...,46347930.0,,,,,,,,,False


### Subset dataset

In this case we are choosing images of Amsterdam, during the day with a viewing direction from the side. 

In [44]:
df_city["lighting_condition"].unique()

array(['day', 'dusk/dawn', 'night'], dtype=object)

In [45]:
df_day = df_city[df_city["lighting_condition"] == "day"]
df_day.columns

Index(['uuid', 'source', 'orig_id', 'glare', 'lighting_condition',
       'pano_status', 'platform', 'quality', 'reflection', 'view_direction',
       ...
       'to', 'ref', 'tunnel_1', 'bridge_1', 'service', 'access', 'road_width',
       'area', 'est_width', 'reversed'],
      dtype='object', length=140)

In [46]:
df_side = df_day[df_day["view_direction"] == "side"]
df_side.columns

Index(['uuid', 'source', 'orig_id', 'glare', 'lighting_condition',
       'pano_status', 'platform', 'quality', 'reflection', 'view_direction',
       ...
       'to', 'ref', 'tunnel_1', 'bridge_1', 'service', 'access', 'road_width',
       'area', 'est_width', 'reversed'],
      dtype='object', length=140)

### Create dataframe to download images

Only keep the information needed to download the images and save to a csv file. 

In [47]:
df_to_download = df_side[["uuid", "source", "orig_id"]]
df_to_download.head()

Unnamed: 0,uuid,source,orig_id
14924,b3cf1852-0b91-49b8-b0c1-d520f729c2f1,Mapillary,469797594077540
15107,75df3b51-d8f7-43f6-bb7d-a6398d1f0a5d,Mapillary,383338319993255
15890,6b7e9737-7394-4f73-b4d1-8dbb4d1b7c12,Mapillary,1096261547548171
16386,92c524e8-bfa3-4a9e-ab01-996bca4f3556,Mapillary,1173473299835694
18404,1569de6b-7b20-4611-a031-1d6cc53ead8a,Mapillary,1033394290580893


In [48]:
parquet_dir = conf.DATA_DIR / "data/parquet"
df_to_download.to_parquet(parquet_dir / "amsterdam_side.parquet")