In [1]:
from IPython.display import display
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from pytorch_lightning import seed_everything
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

from srai.embedders import Highway2VecEmbedder, Hex2VecEmbedder, GTFS2VecEmbedder
from srai.joiners import IntersectionJoiner
from srai.loaders import OSMNetworkType, OSMWayLoader, OSMOnlineLoader, OSMPbfLoader, GTFSLoader
from srai.loaders.osm_loaders.filters import GEOFABRIK_LAYERS, HEX2VEC_FILTER
from srai.neighbourhoods import H3Neighbourhood
from srai.plotting import plot_regions, plot_numeric_data
from srai.regionalizers import H3Regionalizer, geocode_to_region_gdf

from pathlib import Path
from tqdm import tqdm
import torch
from torchvision import transforms
from torchvision.models import resnet50, ResNet50_Weights
from PIL import Image
import numpy as np
from torch.utils.data import Dataset, DataLoader


This script takes 4 encoders and creates embeddings for each of them. The encoders are: road network, POI, public transport stops & street view images. 

Delft demo 2.0 includes all encoders while Delft demo 1.0 only includes road network and POI.
Delft demo 3.0 will include relative embeddings and verification of accessibility.


In [2]:
# change if needed
CITY = "Delft"
COUNTRY = "Netherlands"
area_name = f"{CITY}, {COUNTRY}"

area = None
area_gdf = geocode_to_region_gdf(area_name)


In [3]:
#Regionalize
from srai.regionalizers import H3Regionalizer

regionalizer = H3Regionalizer(9)
regions_gdf = regionalizer.transform(area_gdf)
print(f"Number of regions in area: {len(regions_gdf)}.")

Number of regions in area: 327.


-----------------------ROAD NETWORK ENCODER-----------------------

In [4]:
def encode_roadnetwork(area_gdf, regions_gdf):
    # Seed for reproducibility
    seed_everything(42)

    # Load OSM data
    loader = OSMWayLoader(OSMNetworkType.DRIVE)
    nodes_gdf, edges_gdf = loader.load(area_gdf)

    # Join regions and edges
    joiner = IntersectionJoiner()
    joint_gdf = joiner.transform(regions_gdf, edges_gdf)

    # Embed the road network
    embedder = Highway2VecEmbedder()
    embeddings = embedder.fit_transform(regions_gdf, edges_gdf, joint_gdf)
    
    # Return the embeddings 
    return embeddings

# Call the function with area_gdf and regions_gdf
embeddings_roadnetwork = encode_roadnetwork(area_gdf, regions_gdf)


Seed set to 42


Downloading graphs:   0%|          | 0/1 [00:00<?, ?it/s]

  G_directed = ox.graph_from_polygon(


  0%|          | 0/12 [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\s2161699\AppData\Local\miniconda3\envs\afstuderen\Lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generate

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
  embeddings_joint = joint_gdf.join(embeddings_df)


-----------------------POINT OF INTEREST ENCODER-----------------------

In [5]:
tags = {
    "amenity": True,
    "shop": True,
    "tourism": True,
    "leisure": True,
    "historic": True,
    "natural": True,
    "building": True,
    "highway": True,
    "railway": True,
    "public_transport": True,
    "landuse": True,
    "waterway": True,
    "power": True,
    "sport": True
}
loader = OSMOnlineLoader()

features_gdf = loader.load(area_gdf, tags)

Downloading sport: True           : 100%|██████████| 14/14 [00:08<00:00,  1.73it/s]


In [6]:
seed_everything(42)  # Seed for reproducibility

loader = OSMOnlineLoader()
joiner = IntersectionJoiner()
neighbourhood = H3Neighbourhood(regions_gdf)
embedder = Hex2VecEmbedder()

joint_gdf = joiner.transform(regions_gdf, features_gdf)

embeddings_POI = embedder.fit_transform(
    regions_gdf,
    features_gdf,
    joint_gdf,
    neighbourhood,
    trainer_kwargs={"max_epochs": 20, "accelerator": "cuda"},
    batch_size=100,
)

Seed set to 42
100%|██████████| 327/327 [00:00<00:00, 38367.90it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\s2161699\AppData\Local\miniconda3\envs\afstuderen\Lib\site-packages\pytorch_lightning\trainer\configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 84.3 K
---------------------------------------
84.3 K    Trainable params
0         Non-trainable params
84.3 K    Total params
0.337     Total estimated model params size (MB)
C:\Users\s2161699\AppData\Local\miniconda3\envs\afstuderen\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value 

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


-----------------------GTFS ENCODER-----------------------

In [7]:
gtfs_loader = GTFSLoader()
features_gdf = gtfs_loader.load("D:\\tu delft\\Afstuderen\\gtfs_nl.zip")
# Step 1: Replace NaN values with empty sets in columns expected to contain sets
for column in features_gdf.columns:
    if features_gdf[column].dtype == 'object':  # Adjust condition as needed
        features_gdf[column] = features_gdf[column].apply(lambda x: set() if pd.isna(x) else x)

# Step 2: Convert floats to ints
for column in features_gdf.columns:
    if features_gdf[column].dtype == float:  # Check if the column data type is float
        # Fill NaN values with 0 or another placeholder value before conversion
        features_gdf[column] = features_gdf[column].fillna(0).astype(int)

# Step 3: Merging features messed up index names
features_gdf.index.name = "feature_id"

# Step 4: Join data now that it is in correct format
joiner = IntersectionJoiner()
joint_gdf = joiner.transform(regions_gdf, features_gdf)
#joint_gdf.index.names = ['feature_id', 'region_id']
#joint_gdf = joint_gdf.reorder_levels(['region_id', 'feature_id'])

# Step 5: Embed the features
seed_everything(42)
embedder = GTFS2VecEmbedder()
embeddings_GTFS = embedder.fit_transform(regions_gdf, features_gdf, joint_gdf)


  df = pd.read_csv(p, dtype=cs.DTYPE, encoding="utf-8-sig")
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 5.5 K 
1 | decoder | Sequential | 5.5 K 
---------------------------------------
11.0 K    Trainable params
0         Non-trainable params
11.0 K    Total params
0.044     Total estimated model params size (MB)
C:\Users\s2161699\AppData\Local\miniconda3\envs\afstuderen\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
C:\Users\s2161699\AppData\Local\miniconda3\envs\afstuderen\Lib\site-packages\pytorch_lightning\loops\fit_loop.py:293: The number of training batches (14) is smaller t

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


-----------------------STREET VIEW IMAGES ENCODER-----------------------
Step 0: Load the images
Step 1: Create a mapping of image names to coordinates
Step 2: Map images to regions
Step 3: Sample images per region
Step 4: Embed the images
Step 5: Average the embeddings per region

In [8]:
# Step 0: Load the images
# Set paths for the street view images and the associated panoids which contain the coordinates
data_path = Path('imagesummary/data/Delft_NL')
image_directory_path = data_path / 'imagedb'
panoids = gpd.read_file(data_path / 'panoids/panoids.geojson')

In [9]:
# Step 1: Create a mapping of image names to coordinates
# Create a mapping of image names to coordinates
image_name_to_coords = {}
columns = ['im_side_a', 'im_front', 'im_side_b', 'im_back']
for _, row in panoids.iterrows():
    for col in columns:
        img_name = row[col]
        if pd.notnull(img_name):  # Check if the entry is not NaN
            image_name_to_coords[img_name] = (row['lat'], row['lng'])

# Assuming `image_path` is already defined as the Path to your image folder
# Assuming `regions_gdf` is already defined as the GeoDataFrame of H3 regions

def map_images_to_regions(image_path, image_name_to_coords, regions_gdf):
    # List all image files in the directory
    image_files = [f for f in image_path.glob('**/*') if f.is_file()]

    # Prepare a list to store image locations data
    image_locations_data = []

    # Iterate over image files
    for img_file in image_files:
        img_name = img_file.name
        coords = image_name_to_coords.get(img_name)
        if coords:
            lat, lng = coords
            image_locations_data.append({'image_name': img_name, 'lat': lat, 'lng': lng})

    # Convert image locations data to a DataFrame
    image_locations = pd.DataFrame(image_locations_data)

    # Only proceed if there are any image locations to process
    if not image_locations.empty:
        # Convert image locations to GeoDataFrame with Point geometry
        image_locations_gdf = gpd.GeoDataFrame(image_locations, geometry=gpd.points_from_xy(image_locations['lng'],
                                                                                            image_locations['lat']))

        # Ensure the CRS of image_locations_gdf matches that of regions_gdf
        image_locations_gdf.set_crs(regions_gdf.crs, inplace=True)

        # Perform spatial join with regions_gdf using the 'predicate' parameter
        image_locations_with_regions = gpd.sjoin(image_locations_gdf, regions_gdf, how='left', predicate='within')

        return image_locations_with_regions[['image_name', 'index_right']]
    else:
        return pd.DataFrame(columns=['image_name', 'index_right'])


# Adjust `image_name_to_coords` as necessary to match your image naming convention and metadata
image_locations_with_regions = map_images_to_regions(image_directory_path, image_name_to_coords, regions_gdf)
#rename the column 'index_right' to 'region_id'
image_locations_with_regions.rename(columns={'index_right':'region_id'}, inplace=True)

In [10]:
# Step 3: Sample images per region
# n is the number of images to sample per region (do not make too high!!!)
def sample_images_per_region(df, n):
    # Sample up to n images per region
    sampled_df = df.groupby('region_id').apply(lambda x: x.sample(min(len(x), n), replace=False)).reset_index(drop=True)
    return sampled_df

# Apply the sampling function
images_sampled_df = sample_images_per_region(image_locations_with_regions, 100)

In [11]:
# Step 4: Embed the images
# Load ResNet50 model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = resnet50(weights=ResNet50_Weights.DEFAULT).to(device)
model.eval()  # Set model to evaluation mode

# Modify the model to remove the top layer leaving the feature map of resnet (dim 2048).
model = torch.nn.Sequential(*(list(model.children())[:-1]))

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

image_names = images_sampled_df['image_name'].tolist()

In [12]:
# Step 4: Embed the images
import torchvision.transforms as transforms

class ImageDataset(Dataset):
    def __init__(self, image_names, image_directory_path, transform=None):
        self.image_names = image_names
        self.image_directory_path = image_directory_path
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        image_name = self.image_names[idx]
        image_path = self.image_directory_path / image_name
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image

# Instantiate the dataset
image_dataset = ImageDataset(image_names=image_names,
                             image_directory_path=image_directory_path, 
                             transform=transform)

# Create a DataLoader without shuffling. Note that image here is the same as batch!
image_loader = DataLoader(image_dataset, batch_size=512, shuffle=False)

embeddings_streetview = []

# Same as for batch in batch_loader
# Process the images and store embeddings on the GPU
for images in tqdm(image_loader, desc="Encoding images"):
    images = images.to(device)
    with torch.no_grad():
        batch_embeddings = model(images)
        embeddings_streetview.append(batch_embeddings)

# Concatenate all embeddings into a single tensor on the GPU
embeddings_streetview = torch.cat(embeddings_streetview, dim=0)

# Flatten the embeddings if necessary
embeddings_streetview = embeddings_streetview.squeeze()

# Convert the GPU tensor to a NumPy array
embeddings_streetview_np = embeddings_streetview.cpu().numpy()  # This is now a NumPy array

# Now you can delete the original GPU tensor if it's no longer needed
del embeddings_streetview
torch.cuda.empty_cache()  # Clear VRAM cache

Encoding images: 100%|██████████| 52/52 [03:22<00:00,  3.89s/it]


In [13]:
embeddings_streetview_np

array([[0.00277626, 0.00847524, 0.01973447, ..., 0.        , 0.        ,
        0.005976  ],
       [0.05017386, 0.        , 0.05529084, ..., 0.        , 0.        ,
        0.01733419],
       [0.        , 0.        , 0.04011143, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.00150248, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.1977937 , ..., 0.        , 0.        ,
        0.        ],
       [0.00549513, 0.        , 0.1017293 , ..., 0.        , 0.09141378,
        0.        ]], dtype=float32)

In [14]:
# Step 5: Average the embeddings per region

# Create a DataFrame with image names and their corresponding embeddings
embeddings_streetview_df = pd.DataFrame({
    'image_name': image_names,
    'embedding': list(embeddings_streetview_np)  # Convert the 2D NumPy array to a list of 1D arrays
})

# Create a dictionary mapping each region to its images
region_to_images = images_sampled_df.groupby('region_id')['image_name'].apply(list).to_dict()

# Create a dictionary for looking up the region of each image
image_to_region = {image_name: region_id for region_id, images in region_to_images.items() for image_name in images}

# Add 'region_id' to the DataFrame
embeddings_streetview_df['region_id'] = embeddings_streetview_df['image_name'].apply(lambda x: image_to_region.get(x))

# Group by 'region_id' and average the embeddings within each group
# Use np.mean on the list of arrays directly, specifying axis=0 to average across the correct dimension
average_embeddings = embeddings_streetview_df.groupby('region_id')['embedding'].apply(lambda x: np.mean(x, axis=0))

# Convert the resulting Series of arrays into a DataFrame
average_embeddings_df = pd.DataFrame(average_embeddings.tolist(), index=average_embeddings.index)

# Ensure the 'region_id' is set as the index name
average_embeddings_df.index.name = 'region_id'

print(average_embeddings_df.head(6))


                     0         1         2         3         4         5     \
region_id                                                                     
89196bb1003ffff  0.027864  0.009309  0.024048  0.019583  0.354013  0.013709   
89196bb1007ffff  0.069186  0.022199  0.027741  0.007953  0.183431  0.008739   
89196bb100bffff  0.026536  0.004244  0.058487  0.014332  0.385848  0.010592   
89196bb100fffff  0.055697  0.012702  0.048567  0.024926  0.377007  0.008750   
89196bb1013ffff  0.024121  0.025266  0.056022  0.014234  0.258237  0.004997   
89196bb1017ffff  0.061744  0.013457  0.067734  0.005353  0.307522  0.012760   

                     6         7         8         9     ...      2038  \
region_id                                                ...             
89196bb1003ffff  0.098158  0.106250  0.131161  0.009357  ...  0.039936   
89196bb1007ffff  0.081217  0.063458  0.075068  0.010589  ...  0.050389   
89196bb100bffff  0.081340  0.109364  0.143072  0.014979  ...  0.059320 

In [15]:
# # Concatenate the embeddings
# embeddings_concatenated = pd.concat([embeddings_POI, embeddings_roadnetwork], axis=1, sort=False)
# embeddings_concatenated = embeddings_concatenated.fillna(0)
# embeddings_concatenated = embeddings_concatenated.drop(columns=["cluster"])
# 
# print(f"Road network embeddings shape: {embeddings_roadnetwork.shape}")
# print(f"POI embeddings shape: {embeddings_POI.shape}")
# print(f"Combined embeddings shape: {embeddings_concatenated.shape}")

In [16]:

def cluster_plot(embeddings):
    embeddings_plot = embeddings.copy()
    clusterizer = KMeans(n_clusters=5, random_state=42)
    clusterizer.fit(embeddings)
    embeddings_plot["cluster"] = clusterizer.labels_
    return plot_numeric_data(regions_gdf, "cluster", embeddings_plot)

def pca_plot(embeddings):
    pca = PCA(n_components=3)

    pca_embeddings = pca.fit_transform(embeddings)
    # make the embeddings into a dataframe
    pca_embeddings = pd.DataFrame(pca_embeddings, index=embeddings.index)

    # convert to RGB
    pca_embeddings = (
            (pca_embeddings - pca_embeddings.min()) / (pca_embeddings.max() - pca_embeddings.min()) * 255
    ).astype(int)

    # make the rgb array into a string
    pca_embeddings["rgb"] = pca_embeddings.apply(
        lambda row: f"rgb({row[0]}, {row[1]}, {row[2]})", axis=1
    )


    color_dict = dict(enumerate(regions_gdf.index.map(pca_embeddings["rgb"].to_dict()).to_list()))
    return regions_gdf.reset_index().reset_index().explore(
        column="index",
        tooltip="region_id",
        tiles="CartoDB positron",
        legend=False,
        cmap=lambda x: color_dict[x],
        style_kwds=dict(color="#444", opacity=0.0, fillOpacity=0.5),
    )

In [17]:
cluster_plot(average_embeddings_df)


  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


In [18]:
pca_plot(average_embeddings_df)

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


In [19]:

# Concatenation
average_embeddings_df.head()


Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
region_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89196bb1003ffff,0.027864,0.009309,0.024048,0.019583,0.354013,0.013709,0.098158,0.10625,0.131161,0.009357,...,0.039936,0.026116,0.015108,0.008841,0.018559,0.022727,0.020068,0.009846,0.022419,0.022753
89196bb1007ffff,0.069186,0.022199,0.027741,0.007953,0.183431,0.008739,0.081217,0.063458,0.075068,0.010589,...,0.050389,0.019481,0.012341,0.010484,0.017719,0.026458,0.022836,0.009077,0.027217,0.018339
89196bb100bffff,0.026536,0.004244,0.058487,0.014332,0.385848,0.010592,0.08134,0.109364,0.143072,0.014979,...,0.05932,0.022314,0.011224,0.012421,0.007595,0.030351,0.018818,0.014022,0.014368,0.028285
89196bb100fffff,0.055697,0.012702,0.048567,0.024926,0.377007,0.00875,0.086615,0.163813,0.200556,0.011903,...,0.070908,0.021342,0.018459,0.009835,0.014471,0.035503,0.025561,0.033591,0.016121,0.016758
89196bb1013ffff,0.024121,0.025266,0.056022,0.014234,0.258237,0.004997,0.085673,0.074245,0.165682,0.019907,...,0.064427,0.015943,0.026843,0.012785,0.016302,0.024809,0.020698,0.027153,0.020055,0.048154


In [20]:
import pandas as pd
from itertools import combinations
from Plotting import pca_plot, cluster_plot
import warnings
warnings.filterwarnings('ignore')
dataframes = {
    'embeddings_POI': embeddings_POI,
    'embeddings_GTFS': embeddings_GTFS,
    'embeddings_roadnetwork': embeddings_roadnetwork,
    'embeddings_streetview_df': average_embeddings_df,
}

In [21]:
# Dictionary to store concatenated results
concat_results = {}

# Iterate over all non-empty combinations of the dataframes
for r in range(1, len(dataframes) + 1):
    for subset in combinations(dataframes, r):
        # Concatenate dataframes in the subset along axis 1 (columns)
        # Ensuring that indices match up
        df_concat = pd.concat([dataframes[df] for df in subset], axis=1, join='inner')

        # Store concatenated dataframe in the dictionary
        concat_results[', '.join(subset)] = df_concat

# Now, use pca_plot for each concatenated dataframe
for name, df in concat_results.items():
    print(f"PCA Plot for: {name}")

PCA Plot for: embeddings_POI
PCA Plot for: embeddings_GTFS
PCA Plot for: embeddings_roadnetwork
PCA Plot for: embeddings_streetview_df
PCA Plot for: embeddings_POI, embeddings_GTFS
PCA Plot for: embeddings_POI, embeddings_roadnetwork
PCA Plot for: embeddings_POI, embeddings_streetview_df
PCA Plot for: embeddings_GTFS, embeddings_roadnetwork
PCA Plot for: embeddings_GTFS, embeddings_streetview_df
PCA Plot for: embeddings_roadnetwork, embeddings_streetview_df
PCA Plot for: embeddings_POI, embeddings_GTFS, embeddings_roadnetwork
PCA Plot for: embeddings_POI, embeddings_GTFS, embeddings_streetview_df
PCA Plot for: embeddings_POI, embeddings_roadnetwork, embeddings_streetview_df
PCA Plot for: embeddings_GTFS, embeddings_roadnetwork, embeddings_streetview_df
PCA Plot for: embeddings_POI, embeddings_GTFS, embeddings_roadnetwork, embeddings_streetview_df


In [22]:
pca_plot(concat_results['embeddings_POI, embeddings_GTFS, embeddings_roadnetwork, embeddings_streetview_df'], regions_gdf)  

In [23]:
pca_plot(concat_results['embeddings_POI, embeddings_GTFS, embeddings_roadnetwork'], regions_gdf)  

In [24]:
pca_plot(concat_results['embeddings_POI'], regions_gdf)

In [25]:
pca_plot(concat_results['embeddings_streetview_df'], regions_gdf)

In [26]:
pca_plot(concat_results['embeddings_roadnetwork'], regions_gdf)

In [27]:
pca_plot(concat_results['embeddings_streetview_df'], regions_gdf)

In [28]:
#export streetview embeddings
average_embeddings_df.to_csv('streetview_embeddings.csv')

In [29]:
#export embeddings
embeddings_roadnetwork.to_csv('roadnetwork_embeddings_9.csv')
embeddings_POI.to_csv('POI_embeddings_9.csv')
embeddings_GTFS.to_csv('GTFS_embeddings_9.csv')
average_embeddings_df.to_csv('streetview_embeddings_9.csv')

In [34]:
embeddings_roadnetwork.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
region_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89196bb1003ffff,-0.140507,-0.427088,-0.145345,0.157417,0.58804,0.191696,0.248323,-0.658681,0.1366,0.376036,...,0.09404,-0.416582,0.038572,-0.251367,-0.237392,0.460484,-0.06556,-0.268445,-0.181269,0.53637
89196bb1007ffff,0.059147,-0.407063,-0.26195,-0.102696,0.605803,0.039025,0.001682,-0.445681,0.004049,0.795994,...,-0.138007,-0.519464,0.174475,-0.21165,-0.295834,0.550831,-0.257722,-0.146876,-0.361584,0.405336
89196bb100bffff,-0.194602,-0.454861,7.2e-05,0.180888,0.673966,0.269689,0.328895,-0.760565,0.133255,0.290006,...,0.15752,-0.385288,0.088259,-0.155121,-0.166977,0.454285,0.061343,-0.276776,-0.14941,0.583081
89196bb100fffff,-0.035104,-0.319804,-0.254949,0.080055,0.597332,0.041274,0.076917,-0.570119,0.064617,0.49514,...,-0.013314,-0.380447,0.028927,-0.326969,-0.275074,0.448157,-0.210839,-0.075767,-0.103108,0.378293
89196bb1013ffff,-0.295357,-0.313509,-0.033157,0.123026,0.687439,0.18924,0.244834,-0.618279,0.026741,0.27706,...,0.222237,-0.339115,0.119142,-0.330158,-0.317505,0.452707,-0.024857,-0.302235,-0.074299,0.657749


In [31]:
accessibility_embeddings = pd.read_csv('accessibility_embeddings_9.csv')

In [35]:
accessibility_embeddings.set_index('region_id', inplace=True)

In [36]:
accessibility_embeddings

Unnamed: 0_level_0,embedding_dim_0,embedding_dim_1,embedding_dim_2,embedding_dim_3,embedding_dim_4,embedding_dim_5,embedding_dim_6,embedding_dim_7,embedding_dim_8,embedding_dim_9,embedding_dim_10,embedding_dim_11,embedding_dim_12,embedding_dim_13,embedding_dim_14,embedding_dim_15
region_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
89196bb1003ffff,2.292977,3.264391,3.516480,0.0,2.562889,2.703038,0.000000,5.412122,2.474888,2.016440,0.0,2.729931,2.082396,0.0,2.314485,1.184343
89196bb1007ffff,2.262638,2.997612,3.156637,0.0,2.202712,2.195180,0.000000,3.970387,2.099077,1.668237,0.0,2.585835,1.540516,0.0,1.620516,1.803797
89196bb100bffff,2.283813,3.068650,3.827497,0.0,2.858441,2.604209,0.484180,6.179566,2.627845,2.116412,0.0,2.743336,2.234123,0.0,2.917025,0.995235
89196bb100fffff,1.859801,2.210787,3.263609,0.0,2.403231,1.800589,0.878219,4.745056,2.057121,1.654786,0.0,2.221332,1.650027,0.0,2.533669,1.084016
89196bb1013ffff,2.480834,3.565220,3.661557,0.0,2.664018,2.918939,0.000000,5.535921,2.613700,2.137954,0.0,2.926204,2.197933,0.0,2.275773,1.366113
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89196bbadcbffff,1.644186,0.504850,2.961791,0.0,2.422054,0.413751,3.973036,0.293641,1.258339,1.400088,0.0,1.681032,0.866203,0.0,3.224205,2.881146
89196bbadcfffff,1.493693,0.000000,3.421777,0.0,2.953990,0.000000,5.724002,0.960146,1.248057,1.423240,0.0,1.408197,0.735352,0.0,4.300928,2.850906
89196bbadd3ffff,1.383297,0.000000,3.691571,0.0,3.156501,0.000000,6.334992,1.892938,1.297373,1.440786,0.0,1.307462,0.772685,0.0,4.801569,2.501045
89196bbadd7ffff,1.363131,0.000000,3.225438,0.0,2.765183,0.000000,5.546830,0.909940,1.100367,1.296360,0.0,1.263262,0.610688,0.0,4.120503,2.678043


In [ ]:
# relative embeddings
