In [1]:
import os
import glob
import cudf
import dask_cudf
import hydra

In [2]:
# Load hydra configuration
with hydra.initialize(version_base=None, config_path="../../../aiagents4pharma/talk2knowledgegraphs/configs"):
    cfg = hydra.compose(
        config_name="config", overrides=["tools/multimodal_subgraph_extraction=default"]
    )
    cfg = cfg.tools.multimodal_subgraph_extraction
cfg

{'_target_': 'talk2knowledgegraphs.tools.multimodal_subgraph_extraction', 'ollama_embeddings': ['nomic-embed-text'], 'temperature': 0.1, 'streaming': False, 'topk': 5, 'topk_e': 5, 'cost_e': 0.5, 'c_const': 0.01, 'root': -1, 'num_clusters': 1, 'pruning': 'gw', 'verbosity_level': 0, 'node_id_column': 'node_id', 'node_attr_column': 'node_attr', 'edge_src_column': 'edge_src', 'edge_attr_column': 'edge_attr', 'edge_dst_column': 'edge_dst', 'node_colors_dict': {'gene/protein': '#6a79f7', 'molecular_function': '#82cafc', 'cellular_component': '#3f9b0b', 'biological_process': '#c5c9c7', 'drug': '#c4a661', 'disease': '#80013f'}, 'biobridge': {'source': 'aiagents4pharma/talk2knowledgegraphs/tests/files/ibd_biobridge_multimodal/', 'node_type': ['gene/protein', 'molecular_function', 'cellular_component', 'biological_process', 'drug', 'disease']}}

In [3]:
# cfg.biobridge.source = "/mnt/blockstorage/biobridge_multimodal"
cfg.biobridge.source = "../../../../AIAgents4Pharma/aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal"

In [4]:
# Loop over nodes and edges
chunk_size = 1
graph_dict = {}
for element in ["nodes", "edges"]:
    # Make an empty dictionary for each folder
    graph_dict[element] = {}
    for stage in ["enrichment", "embedding"]:
        print(element, stage)
        # Create the file pattern for the current subfolder
        file_list = glob.glob(os.path.join(cfg.biobridge.source, 
                                           element, 
                                           stage, '*.parquet.gzip'))
        print(file_list)
        # Read and concatenate all dataframes in the folder
        # Except the edges embedding, which is too large to read in one go
        # We are using a chunk size to read the edges embedding in smaller parts instead
        if element == "edges" and stage == "embedding":
            # For edges embedding, only read two columns: triplet_index and edge_emb
            # graph_dict[element][stage] = cudf.concat([cudf.read_parquet(f, columns=["triplet_index", "edge_emb"]) for f in file_list[:2]], ignore_index=True)
            # Loop by chunks
            file_list = file_list[:2]
            graph_dict[element][stage] = []
            for i in range(0, len(file_list), chunk_size):
                chunk_files = file_list[i:i+chunk_size]
                chunk_df = cudf.concat([cudf.read_parquet(f, columns=["triplet_index", "edge_emb"]) for f in chunk_files], ignore_index=True)
                graph_dict[element][stage].append(chunk_df)
        else:
            # For nodes and edges enrichment, read and concatenate all dataframes in the folder
            # This includes the nodes embedding, which is small enough to read in one go
            graph_dict[element][stage] = cudf.concat([cudf.read_parquet(f) for f in file_list], ignore_index=True)

nodes enrichment
['../../../../AIAgents4Pharma/aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal/nodes/enrichment/molecular_function.parquet.gzip', '../../../../AIAgents4Pharma/aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal/nodes/enrichment/disease.parquet.gzip', '../../../../AIAgents4Pharma/aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal/nodes/enrichment/drug.parquet.gzip', '../../../../AIAgents4Pharma/aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal/nodes/enrichment/biological_process.parquet.gzip', '../../../../AIAgents4Pharma/aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal/nodes/enrichment/cellular_component.parquet.gzip', '../../../../AIAgents4Pharma/aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal/nodes/enrichment/gene_protein.parquet.gzip']
nodes embedding
['../../../../AIAgents4Pharma/aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimod

In [5]:
# Merge nodes embedding into nodes enrichment
graph_dict["nodes"] = graph_dict["nodes"]["enrichment"].merge(
    graph_dict["nodes"]["embedding"],
    how="left",
    on="node_id"
)

In [6]:
# Check head
graph_dict["nodes"].head(5)

Unnamed: 0,node_index,primekg_node_index,node_id,node_name,node_type,desc,feat,desc_emb,feat_emb
0,76769,119040,polysome binding_(119040),polysome binding,molecular_function,polysome binding belongs to molecular_function...,Binding to a polysome.,"[-0.026719015, -0.005082723, -0.015734224, 2.2...","[-0.0372445248067379, -0.14702573418617249, -0..."
1,76770,119041,ribosomal small subunit binding_(119041),ribosomal small subunit binding,molecular_function,ribosomal small subunit binding belongs to mol...,Binding to a small ribosomal subunit.,"[-0.010651515, -0.015877694, -0.015149737, -0....","[-0.1250441074371338, -0.4755474328994751, -0...."
2,76771,119042,adiponectin binding_(119042),adiponectin binding,molecular_function,adiponectin binding belongs to molecular_funct...,"Binding to adiponectin, a protein hormone prod...","[-0.023844853, -0.011522252, -0.013281639, 0.0...","[-0.01977725885808468, -0.28609418869018555, -..."
3,76772,119043,dynein light intermediate chain binding_(119043),dynein light intermediate chain binding,molecular_function,dynein light intermediate chain binding belong...,Binding to a light intermediate chain of the d...,"[-0.008497865, -0.0017590033, -0.036013458, -0...","[0.013446025550365448, -0.2218088060617447, -0..."
4,76773,119044,translation elongation factor binding_(119044),translation elongation factor binding,molecular_function,translation elongation factor binding belongs ...,"Binding to a translation elongation factor, an...","[-0.018522592, -0.02598443, -0.014937574, -0.0...","[0.01569276861846447, -0.428425669670105, 0.03..."


In [12]:
# graph_dict["edges"]["enrichment"] = graph_dict["edges"]["enrichment"].merge(
#     cudf.DataFrame({
#         "triplet_index": graph_dict["edges"]["enrichment"].triplet_index,
#         "feat_emb": None
#     }),
#     how="left",
#     on="triplet_index"
# )
graph_dict["edges"]["enrichment"].drop(columns=["feat_emb_x", "feat_emb_y", "feat_emb"], inplace=True)
graph_dict["edges"]["enrichment"]

Unnamed: 0,triplet_index,primekg_head_index,primekg_tail_index,head_id,tail_id,display_relation,edge_type,edge_type_str,head_index,tail_index,feat
0,18192,40,59559,CDK2_(40),PPP1R35_(59559),ppi,"[gene/protein, ppi, gene/protein]",gene/protein|ppi|gene/protein,40,51424,CDK2 (gene/protein) has a direct relationship ...
1,18193,3423,2355,TRPC1_(3423),H2BC9_(2355),ppi,"[gene/protein, ppi, gene/protein]",gene/protein|ppi|gene/protein,3407,2348,TRPC1 (gene/protein) has a direct relationship...
2,18194,919,10320,COL4A2_(919),MSR1_(10320),ppi,"[gene/protein, ppi, gene/protein]",gene/protein|ppi|gene/protein,919,10225,COL4A2 (gene/protein) has a direct relationshi...
3,18195,1766,2522,HHEX_(1766),TP53BP2_(2522),ppi,"[gene/protein, ppi, gene/protein]",gene/protein|ppi|gene/protein,1762,2513,HHEX (gene/protein) has a direct relationship ...
4,18196,4536,9522,INSR_(4536),SOCS7_(9522),ppi,"[gene/protein, ppi, gene/protein]",gene/protein|ppi|gene/protein,4509,9438,INSR (gene/protein) has a direct relationship ...
...,...,...,...,...,...,...,...,...,...,...,...
3904605,3904159,45719,13459,polyamine deacetylation_(45719),HDAC10_(13459),interacts with,"[biological_process, interacts with, gene/prot...",biological_process|interacts with|gene/protein,38488,13286,polyamine deacetylation (biological_process) h...
3904606,3901757,111735,4343,negative regulation of nuclear-transcribed mRN...,TENT4B_(4343),interacts with,"[biological_process, interacts with, gene/prot...",biological_process|interacts with|gene/protein,69694,4320,negative regulation of nuclear-transcribed mRN...
3904607,3901759,51712,10309,lysine biosynthetic process via aminoadipic ac...,AASDHPPT_(10309),interacts with,"[biological_process, interacts with, gene/prot...",biological_process|interacts with|gene/protein,44221,10215,lysine biosynthetic process via aminoadipic ac...
3904608,3897225,48211,5543,mRNA 5'-splice site recognition_(48211),SFSWAP_(5543),interacts with,"[biological_process, interacts with, gene/prot...",biological_process|interacts with|gene/protein,40871,5508,mRNA 5'-splice site recognition (biological_pr...


In [11]:
graph_dict["edges"]["embedding"][0].loc[0, "edge_emb"]

[-0.022698749,
 -0.0013575292,
 0.006256367,
 -0.021244548,
 -0.02613595,
 0.026083069,
 -0.010615664,
 -0.008460803,
 0.001647543,
 0.0012030205,
 -0.0023184584,
 0.023835668,
 0.007561843,
 -0.000353222,
 -0.030776171,
 -0.008678934,
 0.020504227,
 -0.0077865827,
 -0.018296488,
 -0.02583189,
 -0.0035727064,
 0.041272856,
 0.008890453,
 -0.006239842,
 -0.013451355,
 0.02731253,
 -0.014502346,
 -0.031992413,
 -0.021720467,
 -0.0025481558,
 0.019340867,
 0.0023729908,
 -0.0051491917,
 -0.005397067,
 0.021826228,
 -0.036196373,
 -0.0020722358,
 -0.0049211467,
 0.0022176558,
 -0.017119907,
 0.014158625,
 -0.022897048,
 0.005922562,
 0.014634545,
 -0.02681017,
 -0.011488184,
 0.047433376,
 -0.03201885,
 -0.018084968,
 0.02457599,
 0.015216226,
 0.012836625,
 0.024417348,
 -0.008586393,
 0.023624148,
 -0.02642679,
 0.021826228,
 -0.008050983,
 -0.018534446,
 -0.008910283,
 -0.030643972,
 -0.011864955,
 0.0016739831,
 0.018283267,
 -0.020411687,
 0.011732754,
 -1.7041413e-05,
 0.02784133,
 -

In [17]:
[[]] * graph_dict["edges"]["enrichment"].shape[0]

[[],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],


In [18]:
# Make a merged dataframe with additional column
graph_dict["edges"]["enrichment"] = graph_dict["edges"]["enrichment"].merge(
    cudf.DataFrame({
        "triplet_index": graph_dict["edges"]["enrichment"].triplet_index,
        "feat_emb": [[]] * graph_dict["edges"]["enrichment"].shape[0]
    }),
    how="left",
    on="triplet_index"
)
graph_dict["edges"]["enrichment"]

Unnamed: 0,triplet_index,primekg_head_index,primekg_tail_index,head_id,tail_id,display_relation,edge_type,edge_type_str,head_index,tail_index,feat,feat_emb
0,10400,4800,10583,PPIL3_(4800),GINS4_(10583),ppi,"[gene/protein, ppi, gene/protein]",gene/protein|ppi|gene/protein,4771,10485,PPIL3 (gene/protein) has a direct relationship...,[]
1,10401,611,2085,APP_(611),TFPT_(2085),ppi,"[gene/protein, ppi, gene/protein]",gene/protein|ppi|gene/protein,611,2080,APP (gene/protein) has a direct relationship o...,[]
2,10402,4801,9789,CELA1_(4801),KRBA1_(9789),ppi,"[gene/protein, ppi, gene/protein]",gene/protein|ppi|gene/protein,4772,9704,CELA1 (gene/protein) has a direct relationship...,[]
3,10403,4409,6169,MAP3K10_(4409),PKP3_(6169),ppi,"[gene/protein, ppi, gene/protein]",gene/protein|ppi|gene/protein,4386,6128,MAP3K10 (gene/protein) has a direct relationsh...,[]
4,10404,2447,173,PIK3R3_(2447),RAB11A_(173),ppi,"[gene/protein, ppi, gene/protein]",gene/protein|ppi|gene/protein,2439,173,PIK3R3 (gene/protein) has a direct relationshi...,[]
...,...,...,...,...,...,...,...,...,...,...,...,...
3904605,3901758,51712,13472,lysine biosynthetic process via aminoadipic ac...,AASS_(13472),interacts with,"[biological_process, interacts with, gene/prot...",biological_process|interacts with|gene/protein,44221,13299,lysine biosynthetic process via aminoadipic ac...,[]
3904606,3901749,105815,8669,translational attenuation_(105815),YBX2_(8669),interacts with,"[biological_process, interacts with, gene/prot...",biological_process|interacts with|gene/protein,64048,8599,translational attenuation (biological_process)...,[]
3904607,3901754,45686,5310,response to D-galactose_(45686),NAMPT_(5310),interacts with,"[biological_process, interacts with, gene/prot...",biological_process|interacts with|gene/protein,38457,5280,response to D-galactose (biological_process) h...,[]
3904608,3901757,111735,4343,negative regulation of nuclear-transcribed mRN...,TENT4B_(4343),interacts with,"[biological_process, interacts with, gene/prot...",biological_process|interacts with|gene/protein,69694,4320,negative regulation of nuclear-transcribed mRN...,[]


In [24]:
graph_dict["edges"]["enrichment"]['feat_emb'].list.len() == 0

0          True
1          True
2          True
3          True
4          True
           ... 
3904605    True
3904606    True
3904607    True
3904608    True
3904609    True
Name: feat_emb, Length: 3904610, dtype: bool

In [23]:
graph_dict["edges"]["enrichment"]['feat_emb']

0          []
1          []
2          []
3          []
4          []
           ..
3904605    []
3904606    []
3904607    []
3904608    []
3904609    []
Name: feat_emb, Length: 3904610, dtype: list

In [28]:
graph_dict["edges"]["enrichment"].drop(columns=["feat_emb_x", "feat_emb_y"], inplace=True)

In [29]:
# Make a merged dataframe with additional column
graph_dict["edges"]["enrichment"] = graph_dict["edges"]["enrichment"].merge(
    cudf.DataFrame({
        "triplet_index": graph_dict["edges"]["enrichment"].triplet_index,
        "feat_emb": [[]] * graph_dict["edges"]["enrichment"].shape[0]
    }),
    how="left",
    on="triplet_index"
)
# graph_dict["edges"]["enrichment"]

for i, emb_df in enumerate(graph_dict["edges"]["embedding"]):
    # Merge the embeddings into the feature column dataframe
    enrichment_df = graph_dict["edges"]["enrichment"].merge(
        emb_df,
        on="triplet_index",
        how="left",
    )

    # Create mask for rows where feat_emb is empty list and triplet_index exists in current emb_df
    mask = (
        enrichment_df['feat_emb'].list.len() == 0
    ) & (
        enrichment_df['triplet_index'].isin(emb_df['triplet_index'])
    )

    # Assign edge_emb to feat_emb only where mask is True
    enrichment_df.loc[mask, 'feat_emb'] = enrichment_df.loc[mask, 'edge_emb']

    # Drop the edge_emb column
    enrichment_df.drop(columns=['edge_emb'], inplace=True)

    # Update enrichment in graph_dict
    graph_dict["edges"]["enrichment"] = enrichment_df

    # Clean up
    del emb_df
    graph_dict["edges"]["embedding"][i] = None


# Merge the feature column with the edges enrichment dataframe
# graph_dict["edges"]["enrichment"] = graph_dict["edges"]["enrichment"].merge(
#     feat_col,
#     on="triplet_index",
#     how="left",
# )

# Store the edges enrichment
# graph_dict["edges"] = graph_dict["edges"]["enrichment"]

ValueError: Can not set <cudf.core.column.lists.ListColumn object at 0x743aa94085c0>
[
  null,
  null,
  ...
  null,
  null
]
dtype: list into ListColumn

In [18]:
import numpy as np
np.arange(10) + 1250000

array([1250000, 1250001, 1250002, 1250003, 1250004, 1250005, 1250006,
       1250007, 1250008, 1250009])

In [46]:
graph_dict["edges"]["enrichment"][graph_dict["edges"]["enrichment"].triplet_index.isin(np.arange(10) + 1350000)]

Unnamed: 0,triplet_index,primekg_head_index,primekg_tail_index,head_id,tail_id,display_relation,edge_type,edge_type_str,head_index,tail_index,feat,feat_emb
1349376,1350000,15787,17305,Coumarin_(15787),Protriptyline_(17305),synergistic interaction,"[drug, synergistic interaction, drug]",drug|synergistic interaction|drug,15466,16817,Coumarin (drug) has a direct relationship of d...,"[-0.005971643, -0.024854947, 0.020295516, -0.0..."
1349377,1350001,19704,17305,Farnesyl thiopyrophosphate_(19704),Protriptyline_(17305),synergistic interaction,"[drug, synergistic interaction, drug]",drug|synergistic interaction|drug,18897,16817,Farnesyl thiopyrophosphate (drug) has a direct...,"[-0.007006694, -0.019030526, 0.004950598, -0.0..."
1349378,1350002,15531,17305,Nimesulide_(15531),Protriptyline_(17305),synergistic interaction,"[drug, synergistic interaction, drug]",drug|synergistic interaction|drug,15227,16817,Nimesulide (drug) has a direct relationship of...,"[-0.016136318, -0.030427722, 0.01814166, -0.02..."
1349379,1350003,20451,17305,Benoxaprofen_(20451),Protriptyline_(17305),synergistic interaction,"[drug, synergistic interaction, drug]",drug|synergistic interaction|drug,19575,16817,Benoxaprofen (drug) has a direct relationship ...,"[-0.017075982, -0.02360424, 0.016603898, -0.02..."
1349380,1350004,15532,17305,Metamizole_(15532),Protriptyline_(17305),synergistic interaction,"[drug, synergistic interaction, drug]",drug|synergistic interaction|drug,15228,16817,Metamizole (drug) has a direct relationship of...,"[-0.01116664, -0.03029234, 0.006278739, -0.030..."
1349381,1350005,14893,17305,Iproniazid_(14893),Protriptyline_(17305),synergistic interaction,"[drug, synergistic interaction, drug]",drug|synergistic interaction|drug,14622,16817,Iproniazid (drug) has a direct relationship of...,"[-0.002655919, -0.017228309, 0.02556113, -0.03..."
1349382,1350006,20295,17305,Methapyrilene_(20295),Protriptyline_(17305),synergistic interaction,"[drug, synergistic interaction, drug]",drug|synergistic interaction|drug,19433,16817,Methapyrilene (drug) has a direct relationship...,"[-0.015132139, -0.023252474, -0.00335064, -0.0..."
1349383,1350007,19172,17305,Nialamide_(19172),Protriptyline_(17305),synergistic interaction,"[drug, synergistic interaction, drug]",drug|synergistic interaction|drug,18503,16817,Nialamide (drug) has a direct relationship of ...,"[-0.006733189, -0.031606145, 0.030228442, -0.0..."
1349384,1350008,14550,17305,Nomifensine_(14550),Protriptyline_(17305),synergistic interaction,"[drug, synergistic interaction, drug]",drug|synergistic interaction|drug,14325,16817,Nomifensine (drug) has a direct relationship o...,"[-0.01785563, -0.029522736, 0.014774763, -0.01..."
1349385,1350009,15533,17305,Prenylamine_(15533),Protriptyline_(17305),synergistic interaction,"[drug, synergistic interaction, drug]",drug|synergistic interaction|drug,15229,16817,Prenylamine (drug) has a direct relationship o...,"[-0.010561401, -0.021869097, 0.012813612, -0.0..."


In [35]:
graph_dict["edges"]["enrichment"][graph_dict["edges"]["enrichment"].triplet_index.isin(np.arange(10) + 1350000)].iloc[0].feat_emb.to_arrow().to_pylist()[0]

'[-0.005971643, -0.024854947, 0.020295516, -0.035641562, -0.021815326, 0.04142491, 0.001438272, -0.008977639, 0.000143218, -0.014364222, 0.009448377, 0.03045, 0.012864587, -0.002957241, -0.023133392, -0.003138811, 0.035910554, -0.014498719, 0.003638129, -0.005591691, -0.011741541, 0.013281525, 0.010672295, -0.01631442, -0.006829058, 0.021169743, -0.007161937, -0.04519081, -0.020308966, 0.007942016, 0.01406833, 0.004979732, 0.004619953, -0.0209411, 0.006634038, -0.02095455, 0.024155565, 0.025715724, 0.00550763, -0.007135037, -0.005292436, -0.004788074, 0.012467822, 0.014149028, -0.012111407, -0.01005361, 0.031741165, -0.013449647, -0.038923275, 0.020255167, 0.026361307, 0.01827807, -0.009266806, 0.000652728, 0.02238021, -0.007262809, 0.02548708, 0.016005078, -0.012777164, -0.018802606, -0.010282255, -0.007182111, 0.006489454, 0.014216276, 0.002108232, 0.000594727, -0.013738814, 0.008231183, 0.003628042, -0.003927297, 0.039246067, 0.035157375, 0.015736086, 0.003796163, 0.020026524, 0.007

In [15]:
file_list

['../../../../AIAgents4Pharma/aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal/edges/embedding/edges_1250000.parquet.gzip',
 '../../../../AIAgents4Pharma/aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal/edges/embedding/edges_1350000.parquet.gzip']

In [None]:
# # Make a simple dataframe
# feat_col = cudf.DataFrame(
#     {
#         "triplet_index": graph_dict["edges"]["enrichment"].triplet_index,
#         "feat_emb": None
#     }
# )

# # Loop over a set of embeddings chunks
# for i, emb_df in enumerate(graph_dict["edges"]["embedding"]):
#     # Merge the embeddings into the feature column dataframe
#     feat_col = feat_col.merge(
#         emb_df,
#         on="triplet_index",
#         how="left",
#     )

#     # Fill missing embeddings with edge embeddings
#     mask = (feat_col['feat_emb'].isna()) & (feat_col['triplet_index'].isin(emb_df.triplet_index))
#     feat_col.loc[mask, 'feat_emb'] = feat_col.loc[mask, 'edge_emb']

#     # Drop the edge_emb column
#     feat_col = feat_col.drop(columns=['edge_emb'])

# # Merge the feature column with the edges enrichment dataframe
# graph_dict["edges"]["enrichment"] = graph_dict["edges"]["enrichment"].merge(
#     feat_col,
#     on="triplet_index",
#     how="left",
# )

# # Store the edges enrichment
# graph_dict["edges"] = graph_dict["edges"]["enrichment"]