# generate json-ld for ag given graphml definition

In [None]:
import networkx as nx

# Load the graph from the provided graphml file
# fix the path to the graphml file, file path shoould use // instead of //

graph_path = 'C://Users//donbr//github//nsclc-pathways//ts_sl_gene_graph.graphml'
G = nx.read_graphml(graph_path)

# Get an overview of nodes and their attributes
nodes_data = G.nodes(data=True)

# Displaying the first 5 nodes to understand the structure and naming conventions
nodes_sample = list(nodes_data)[:5]

nodes_sample


In [None]:
import networkx as nx
import json

# Load the GraphML file
# graphml_file_path = '/mnt/data/ts_sl_gene_graph.graphml'
# G = nx.read_graphml(graphml_file_path)
# pubmed may also be useful - 	https://identifiers.org/pubmed:16333295

# Define the JSON-LD context for linked data
context = {
    "name": "http://schema.org/name",
    "symbol": "http://identifiers.org/symbol/",
    "entrezId": "http://identifiers.org/ncbigene/",
    "hgncId": "http://identifiers.org/hgnc/",
    "ensemblGeneId": "http://identifiers.org/ensembl/",
    "uniprotId": "http://identifiers.org/uniprot/",
    "iupharId": "http://identifiers.org/iuphar/",
    "keggId": "http://identifiers.org/kegg.genes/",
    "omimId": "http://identifiers.org/omim/",
    "orphanetId": "http://identifiers.org/orphanet/",
    "location": "http://schema.org/position",
    "relationship": "http://schema.org/relatedTo"
}

# Prepare the JSON-LD graph structure
json_ld_graph = []

# Iterate through each node in the GraphML file (each gene)
for node_id, node_data in G.nodes(data=True):
    # Use the node_id directly as the gene symbol (e.g., "TYMS")
    gene_symbol = node_id

    # Start with an empty dictionary for gene_entry
    gene_entry = {}

    # Conditionally add attributes if they exist
    if 'hgnc_id' in node_data:
        gene_entry["@id"] = f"http://identifiers.org/hgnc/{node_data.get('hgnc_id', '').split(':')[-1]}"
    
    # Always add the symbol (based on node ID)
    gene_entry["symbol"] = gene_symbol  # Use the node ID directly as the symbol
    
    if 'name' in node_data:
        gene_entry["name"] = node_data.get('name', '')
    
    if 'entrez_id' in node_data:
        gene_entry["entrezId"] = f"http://identifiers.org/ncbigene/{node_data.get('entrez_id', '')}"
    
    if 'ensembl_gene_id' in node_data:
        gene_entry["ensemblGeneId"] = f"http://identifiers.org/ensembl/{node_data.get('ensembl_gene_id', '')}"
    
    if 'uniprot_ids' in node_data:
        gene_entry["uniprotId"] = f"http://identifiers.org/uniprot/{node_data.get('uniprot_ids', '').split(',')[0]}"
    
    if 'iuphar' in node_data:
        gene_entry["iupharId"] = f"https://www.guidetopharmacology.org/GRAC/ObjectDisplayForward?objectId={node_data.get('iuphar', '').split(':')[-1]}"
    
    if 'entrez_id' in node_data:
        gene_entry["keggId"] = f"http://identifiers.org/kegg.genes/hsa:{node_data.get('entrez_id', '')}"
    
    if 'omim_id' in node_data:
        gene_entry["omimId"] = f"https://omim.org/entry/{node_data.get('omim_id', '')}"
    
    if 'orphanet' in node_data:
        gene_entry["orphanetId"] = f"http://www.orpha.net/ORDO/Orphanet_{node_data.get('orphanet', '').split('.')[0]}"
    
    if 'location' in node_data:
        gene_entry["location"] = node_data.get('location', '')

    # Only add the gene entry if it contains data
    if gene_entry:
        json_ld_graph.append(gene_entry)

# Create the final JSON-LD structure
json_ld_data = {
    "@context": context,
    "@graph": json_ld_graph
}

# Convert the JSON-LD structure to a string
json_ld_output = json.dumps(json_ld_data, indent=2)

# Output the JSON-LD to a file (or print it out)
output_file_path = 'gene_network.jsonld'
with open(output_file_path, 'w') as f:
    f.write(json_ld_output)

print(f"JSON-LD output with HGNC-approved symbols saved to {output_file_path}")
