1. Feature Preparation
Extract relevant shared features from your dataset, such as:

Population Density
Building Type Distribution
Historical Event Patterns
Normalize these features to ensure they are on comparable scales.

2. Define a Similarity Metric
Choose a similarity metric to compute the "closeness" of two neighborhoods based on their features:

Euclidean Distance: For continuous numeric features.
Cosine Similarity: For vectors representing distributions (e.g., building types).
Correlation Coefficient: To capture linear relationships between feature vectors.
3. Construct an Edge Weight Matrix
Compute pairwise similarities or distances for all neighborhoods.
Use a threshold or a similarity score to decide whether an edge should exist and assign a weight:
Thresholding: If similarity exceeds a certain value, create an edge.
Edge Weighting: Use the similarity score as the edge weight.
4. Build the Graph
Use NetworkX to construct the graph:

Nodes represent neighborhoods.
Edges and weights represent feature-based relationships.
5. Generate Node2Vec Embeddings
Run Node2Vec on the constructed graph. The embeddings will now encode relationships based on shared features.

In [None]:
import networkx as nx
import numpy as np
from node2vec import Node2Vec
from sklearn.metrics.pairwise import cosine_similarity

def generate_optimized_node2vec_embeddings_with_features(
    neighbourhood_df, features_df, node2vec_dim=32, threshold=0.5
):
    """
    Generates Node2Vec embeddings for neighborhoods based on shared features.
    Args:
        neighbourhood_df: DataFrame containing neighborhood information.
        features_df: DataFrame containing shared features for neighborhoods.
        node2vec_dim: Dimension of Node2Vec embeddings.
        threshold: Threshold for similarity to create edges.
    Returns:
        node2vec_emb_layer: PyTorch Embedding layer with Node2Vec embeddings.
    """
    # Normalize features
    features_np = features_df.to_numpy()
    features_np = (features_np - np.min(features_np, axis=0)) / (np.max(features_np, axis=0) - np.min(features_np, axis=0))

    # Compute pairwise similarity (e.g., cosine similarity)
    similarity_matrix = cosine_similarity(features_np)

    # Build graph
    G = nx.Graph()
    num_neighborhoods = len(neighbourhood_df)

    for i in range(num_neighborhoods):
        for j in range(i + 1, num_neighborhoods):
            similarity_score = similarity_matrix[i, j]
            if similarity_score > threshold:  # Create an edge if similarity exceeds threshold
                G.add_edge(i, j, weight=similarity_score)

    # Run Node2Vec on the graph
    node2vec = Node2Vec(G, dimensions=node2vec_dim, walk_length=10, num_walks=100, workers=4)
    node2vec_model = node2vec.fit()

    # Convert Node2Vec model to PyTorch Embedding layer
    node2vec_embeddings_np = np.array([node2vec_model.wv[str(i)] for i in range(num_neighborhoods)])
    node2vec_embeddings = torch.from_numpy(node2vec_embeddings_np)
    node2vec_emb_layer = nn.Embedding.from_pretrained(node2vec_embeddings, freeze=True)

    return node2vec_emb_layer


In [None]:
# Step 1: Load and prepare neighborhood geometry data
neighbourhood_df['geometry'] = gpd.GeoSeries.from_wkt(neighbourhood_df['Geometry Multipolygon'])
neighbourhood_df['geometry'] = neighbourhood_df['geometry'].centroid  # Use centroids for proximity

# Step 2: Generate Node2Vec embeddings
node2vec_dim = 32
node2vec_emb_layer = generate_optimized_node2vec_embeddings(neighbourhood_df, num_neighborhoods, node2vec_dim)

# Step 3: Integrate into the model's embedding module
embedding_module = CombinedEmbedding(
    node2vec_emb_layer=node2vec_emb_layer,
    time2vec_embed_dim=time2vec_embed_dim,
    time_feature_dim=time_feature_dim,
    num_building_types=num_building_types,
    building_type_embed_dim=building_type_embed_dim,
    population_embed_dim=population_embed_dim,
    num_event_types=num_event_types,
    event_type_embed_dim=event_type_embed_dim,
    num_equipment_types=num_equipment_types,
    equipment_embed_dim=equipment_embed_dim,
    target_embed_dim=target_embed_dim
)


In [None]:
node2vec_emb_layer = generate_optimized_node2vec_embeddings_with_features(
    neighbourhood_df=neighbourhood_df,
    features_df=shared_features_df,  # DataFrame with shared features
    node2vec_dim=32,
    threshold=0.5
)