In [1]:
import os
import openai
import tiktoken
import itertools

import xml.etree.ElementTree as ET
import networkx as nx
import matplotlib.pyplot as plt


import json
import networkx as nx

In [2]:
# Path to the roles list
path_to_id = ["protocolSection","identificationModule","nctId"]
path_to_interventions = ["protocolSection", "armsInterventionsModule", "interventions"]
path_to_description = ["protocolSection","descriptionModule","briefSummary"]

In [3]:
# Function to retrieve a value from a JSON structure based on a given path
def get_value_from_path(data, path):
    """
    Retrieve the value from a JSON structure based on a given path.

    :param data: The JSON data (could be a dictionary, list, or basic data type).
    :param path: A list of keys/indices that define the path to the desired value.
    :return: The value at the specified path, or None if the path doesn't exist.
    """
    current = data

    try:
        for key in path:
            if isinstance(current, dict):
                current = current[key]
            elif isinstance(current, list) and isinstance(key, int):
                current = current[key]
            else:
                return None  # Path leads to an unexpected type
        return current
    except (KeyError, IndexError):
        return None  # If the key/index does not exist

In [4]:
import os
import json

# Define the directory containing the JSON files
# directory_path = "../CancerDrugCombo/datasets/drug-comb-nct/"
directory_path = "../datasets/drug-comb-nct/"

count = 0

nct_drug_combos = {}

# Iterate over each file in the directory
for filename in os.listdir(directory_path):
    # Check if the file is a JSON file
    if filename.endswith(".json"):
        file_path = os.path.join(directory_path, filename)  # Get the full path to the file
        
        # Open and read the JSON file
        with open(file_path, 'r') as json_file:
            # Parse the JSON content
            data = json.load(json_file)
            
            # Get the roles list
            desc = get_value_from_path(data, path_to_description)
            nctid = get_value_from_path(data, path_to_id)
            
            # print(nctid, desc)
            
            nct_drug_combos[nctid] = desc

In [5]:
len(nct_drug_combos)

2496

In [6]:
list(nct_drug_combos.values())[10]

'The purpose of this study is to see if adding a regimen of individualized psychotherapy can help bipolar I patients who are on lithium.\n\nWhile having a manic or depressed episode patients will be assigned randomly (like tossing a coin) to receive appropriate medication either with or without additional individual psychotherapy. If a patient responds well, he/she will again be assigned randomly to receive further preventative treatment in which medication will be managed either with continued medication clinic visits alone or with additional individual psychotherapy (the patient may not receive the same additional treatment this time). Patient response to treatment will be evaluated throughout the study. If manic/depressive symptoms return at any point during the study, the patient will be treated with appropriate medication and will continue the study.\n\nAn individual may be eligible for this study if he/she:\n\nHas Bipolar I disorder, is experiencing a manic or depressed episode a

In [7]:
handle = open('../resources/drug.names.txt')
drug_lines = handle.readlines()

In [8]:
# extract drug mentions from clinical trials
# produce a dictionary of trial ids and their mentions as a list
nct_id_drugs = {}
for nct_id, desc in nct_drug_combos.items():
    found_drugs = []
    for drug in drug_lines:
        if " " not in drug.strip():
            if drug.strip().lower() in desc and drug.strip().lower() not in found_drugs:
                found_drugs.append(drug.strip().lower())
    if len(found_drugs) > 1:
        nct_id_drugs[nct_id] = found_drugs

In [9]:
# construct combos from dictionary and form a graph

drug_combination_graph = nx.Graph()


for nct_id, found_drugs in nct_id_drugs.items():
    # Generate all possible pairs of combinations
    pairs = list(itertools.combinations(found_drugs, 2))

    # Print the pairs
    for pair in pairs:
        drug_combination_graph.add_edge(pair[0], pair[1], label=nct_id)

In [10]:
print(drug_combination_graph)

Graph with 301 nodes and 1407 edges


In [11]:
# for edge in drug_combination_graph.edges(data=True):
#     print(edge)

In [12]:
# Specify the filename for the pickle file
filename = '../outputs/nct-combo-graph-06152024.pickle'

# Write the graph to a pickle file
nx.write_gpickle(drug_combination_graph, filename)

print(f"Graph has been written to {filename}")

Graph has been written to ../outputs/nct-combo-graph-06152024.pickle


In [13]:
import pickle

# Step 2: Open the pickle file in read-binary mode
with open("../outputs/nct-combo-graph.pickle", "rb") as file:
    # Step 3: Load the graph from the pickle file
    drug_combination_graph = pickle.load(file)

# Now the graph is loaded into memory and you can use it
print(drug_combination_graph)


Graph with 301 nodes and 1407 edges


In [14]:
list_nodes = drug_combination_graph.nodes()

In [15]:
len(list_nodes)

301

In [17]:
file_path = "../resources/nct-combo-members-06152024.txt"
with open(file_path, "w") as file:
    for drug in list_nodes:
        file.write(drug + "\n")    
    