In [6]:
import requests
import pandas as pd

In [2]:
# Load Excel file
excel_file = "loinc_dataset-v2.xlsx"

# API Base URL (example, update as needed)
api_url = "https://loinc.regenstrief.org/searchapi/loincs"

# Authentication credentials
auth = ("davissiemens", "jejben-3rykVi-fejzaf")

In [3]:
# Function to get ranking for a LOINC code
def get_loinc_data(loinc_num):
    params = {"query": loinc_num, "rows": 1}  # Search parameter
    response = requests.get(api_url, params=params, auth=auth)

    if response.status_code == 200:
        data = response.json()
        if "Results" in data and len(data["Results"]) > 0:
            # Extract the COMMON_TEST_RANK
            return data["Results"][0].get("COMMON_TEST_RANK", "No Rank Found")
    return "Not Found"

In [4]:
# Function to check if a LOINC code is related to "glucose in blood"
def check_query(loinc_code, query_search):
    # Searching for "glucose in blood" in the LOINC database
    params = {"query": query_search, "rows": 800}  # Adjust query for glucose search
    response = requests.get(api_url, params=params, auth=auth)  # API call with authentication

    if response.status_code == 200:
        data = response.json()
        # Check if any of the results have the same LOINC code
        for result in data.get("Results", []):
            if result.get("LOINC_NUM") == loinc_code:
                return 1
    return 0

In [5]:
# Read all sheets from the Excel file
excel_sheets = pd.read_excel(excel_file, sheet_name=None, skiprows=2)

# Prepare a dictionary to hold the results
result_dict = {}

# Loop through each sheet in the Excel file
for sheet_name, df in excel_sheets.items():
    print(f"Processing sheet: {sheet_name}")
    # Assuming LOINC codes are in a column named 'LOINC Code'
    df["rank"] = df["loinc_num"].astype(str).apply(get_loinc_data)

    df["inSearch"] = df["loinc_num"].astype(str).apply(lambda x: check_query(x, sheet_name))

    # Save the results for each sheet in the result_dict
    result_dict[sheet_name] = df

# Save the results to a new Excel file with multiple sheets
with pd.ExcelWriter("./loinc_ranks_query.xlsx") as writer:
    for sheet_name, df in result_dict.items():
        df.to_excel(writer, sheet_name=sheet_name, index=False)

print("LOINC rankings saved to loinc_ranks_query.xlsx")

Processing sheet: glucose in blood
Processing sheet: bilirubin in plasma
Processing sheet: White blood cells count
LOINC rankings saved to loinc_ranks_query.xlsx


## Adding more terms

## More general code for all sheets related and not related documents

In [15]:
def get_related_loincs(query, num_results=10):
    """
    Fetches relevant documents from the LOINC API based on search query (sheets names)
    """
    params = {"query": query, "rows": num_results}
    response = requests.get(api_url, params=params, auth=auth)

    results = []
    if response.status_code == 200:
        data = response.json()
        for result in data.get("Results", []):
            results.append({
                "loinc_num": result.get("LOINC_NUM", "Unknown"),
                "long_common_name": result.get("LONG_COMMON_NAME", "Unknown"),
                "component": result.get("COMPONENT", "Unknown"),
                "system": result.get("SYSTEM", "Unknown"),
                "property": result.get("PROPERTY", "Unknown"),
                "rank": result.get("COMMON_TEST_RANK", "No Rank Found"),
                "inSearch": 1  # Mark as related
            })
    return pd.DataFrame(results)

In [16]:
def get_unrelated_loincs(existing_loincs, num_results=20):
    """
    Fetches random documents that are NOT in the list of already used LOINC IDs.
    """
    params = {"query": "*", "rows": num_results * 5}  # Get more than 20 to filter
    response = requests.get(api_url, params=params, auth=auth)

    results = []
    if response.status_code == 200:
        data = response.json()
        count = 0
        for result in data.get("Results", []):
            loinc_id = result.get("LOINC_NUM", "Unknown")

            # Ensure it's not in the related documents list
            if loinc_id not in existing_loincs and count < num_results:
                results.append({
                    "loinc_num": loinc_id,
                    "long_common_name": result.get("LONG_COMMON_NAME", "Unknown"),
                    "component": result.get("COMPONENT", "Unknown"),
                    "system": result.get("SYSTEM", "Unknown"),
                    "property": result.get("PROPERTY", "Unknown"),
                    "rank": result.get("COMMON_TEST_RANK", "No Rank Found"),
                    "inSearch": 0  # Mark as NOT related
                })
                count += 1

    return pd.DataFrame(results)


In [17]:
def add_related_and_unrelated_rows_to_sheets(file_path):
    """
    Reads the Excel file and adds 10 related and 20 unrelated documents to each sheet.
    """
    # Read all sheets from the Excel file
    excel_sheets = pd.read_excel(file_path, sheet_name=None)

    # Dictionary to store updated data
    updated_sheets = {}

    # Iterate through each sheet in the Excel file
    for sheet_name, df in excel_sheets.items():
        print(f"Adding documents to sheet: {sheet_name}")

        # Get 10 relevant documents based on the sheet name (query)
        df_related = get_related_loincs(sheet_name, num_results=10)

        # Get 20 unrelated documents ensuring no duplicates
        existing_loincs = set(df["loinc_num"].astype(str)) | set(df_related["loinc_num"].astype(str))
        df_unrelated = get_unrelated_loincs(existing_loincs, num_results=20)

        # Combine original data with new documents
        df_updated = pd.concat([df, df_related, df_unrelated], ignore_index=True)

        # Save updated sheet data
        updated_sheets[sheet_name] = df_updated

    # Save the updated Excel file with all new rows
    with pd.ExcelWriter(file_path, engine="openpyxl", mode="w") as writer:
        for sheet_name, df in updated_sheets.items():
            df.to_excel(writer, sheet_name=sheet_name, index=False)

    print("Related and unrelated documents have been successfully added.")


In [18]:
# Run the function to update the file with all sheets
add_related_and_unrelated_rows_to_sheets("loinc_ranks_query.xlsx")

Adding documents to sheet: glucose in blood
Adding documents to sheet: bilirubin in plasma
Adding documents to sheet: White blood cells count
Related and unrelated documents have been successfully added.


## Adding a new query sheet 

In [30]:
def add_new_query_sheet(file_path):
    """
    Adds a completely new query and its related documents as a new sheet in the Excel file.
    """
    # Read all existing sheets
    excel_sheets = pd.read_excel(file_path, sheet_name=None)

    # Select a new random query that is not already in the dataset
    #existing_queries = set(excel_sheets.keys())  # Get existing sheet names
    #available_queries = [q for q in new_queries if q not in existing_queries]

    #if not available_queries:
    #    print("No new queries available. All options have been used.")
    #    return

    #new_query = random.choice(available_queries)
    new_query = "cholesterol in blood"
    print(f"Adding new query: {new_query}")

    # Fetch data for the new query
    df_related = get_related_loincs(new_query, num_results=10)

    # Get 20 unrelated documents ensuring no duplicates
    existing_loincs = set(set(df_related["loinc_num"].astype(str)))
    df_unrelated = get_unrelated_loincs(existing_loincs, num_results=90)

    # Combine original data with new documents
    df_new_sheet = pd.concat([df_related, df_unrelated], ignore_index=True)
    

    # Add the new query as a sheet in the Excel file
    with pd.ExcelWriter(file_path, engine="openpyxl", mode="w") as writer:
        for sheet_name, df in excel_sheets.items():
            df.to_excel(writer, sheet_name=sheet_name, index=False)
        
        # Add the new sheet
        df_new_sheet.to_excel(writer, sheet_name=new_query, index=False)

    print(f"New sheet '{new_query}' added successfully.")

In [31]:
# Run the function to add a new sheet with a random unrelated query
add_new_query_sheet("loinc_ranks_query.xlsx")

Adding new query: cholesterol in blood
New sheet 'cholesterol in blood' added successfully.
