In [3]:
import os
import openai
from scipy.spatial import distance
import plotly.express as px

In [4]:
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Access the OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

In [5]:
import pandas as pd

concepts_syn_df = pd.read_csv("concept_synonyms_CVsubset.csv")
concepts_syn_df.head()

Unnamed: 0,general_ancestor_concept_name,general_ancestor_concept_id,descendant_concept_id,concept_name,rc,dbc,drc,ddbc,max_levels_of_separation,concept_id,concept_synonym_name
0,Disorder of cardiovascular system,134057,443784,Vascular disorder,186458,13,1408753835,22,1,443784,Angiopathy
1,Disorder of cardiovascular system,134057,443784,Vascular disorder,186458,13,1408753835,22,1,443784,Disorder of blood vessel
2,Disorder of cardiovascular system,134057,443784,Vascular disorder,186458,13,1408753835,22,1,443784,Vascular disease
3,Disorder of cardiovascular system,134057,443784,Vascular disorder,186458,13,1408753835,22,1,443784,Disorder of blood vessel (disorder)
4,Disorder of cardiovascular system,134057,44784217,Cardiac arrhythmia,84417590,19,809155077,22,2,44784217,Arrhythmia


# Term Description Task

In [6]:
from openai import OpenAI
def generate_term_description(term):
    # Enhanced prompt for generating a detailed description of the term
    prompt = f"""
    Provide a clear and concise description of the following medical term:
    
    Term: '{term}'
    """

    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system", 
             "content": """You are a helpful assistant that answers questions about medical coding 
                        and terminologies for people who are trying to determine which codes
                        and terms are best to use for claims and insurance. 
                        
                        You will be given terms to define, and for each term, please 
                        provide a clear and concise description suitable for a medical coder.
                        
                        Include the following in your description:
                        - a list of approximate synonyms
                        - A brief clinical description of the term
                        - Ensure that the description is brief (no more than 3-4 sentences) and clear enough for healthcare professionals to understand.

                        An example of a good description is that of "acute and subacute endocarditis". these are the approximate synonyms: 
                            - Acute endocarditis, Acute endocarditis (infection of heart valve), Acute nonbacterial endocarditis, Endocarditis (infection of heart valve), subacute, Subacute endocarditis, Subacute endocarditis (infection of heart valve)
                        Here is the clinical information: Acute inflammation of the endocardium. Bacteria is the usual etiologic agent, and the distinction between "acute" and "subacute" has traditionally been made based on the pathogenic organism and clinical presentation.

                        Make sure the language is appropriate for clinicians and medical coders, and is easy to comprehend. 
    """
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        # max_tokens=50,  # Allow enough space for a comprehensive description
        temperature=0.3  # Low temperature for clearer, more focused responses
    )
    
    # Extract the full response text from the chat model response
    full_response = response.choices[0].message.content
    
    return full_response  # Return the generated description


In [15]:
# Assuming the generate_term_description function is defined as before
# Initialize a list to store descriptions
descriptions = []

# Generate descriptions for each sampled term
for term in concepts_syn_df["concept_name"]:
    description = generate_term_description(term)
    descriptions.append(description)

# Add the descriptions as a new column in the sampled_terms DataFrame
concepts_syn_df["Description"] = descriptions


In [16]:
description_df = pd.DataFrame({
    "concept_name": sampled_terms["concept_name"],
    "Description": sampled_terms["Description"]
})

In [11]:
# Print the first description using iloc
first_description = concepts_syn_df["Description"].iloc[0]
print(first_description)

Approximate synonyms: 
- Athlete's heart syndrome, Athletic heart, Exercise-induced cardiac remodeling, Physiologic cardiac hypertrophy

Clinical description: Athlete's heart refers to the physiological adaptations of the heart that occur in response to prolonged and intense physical training. This condition is characterized by an increase in heart size, particularly the left ventricle, along with enhanced cardiac function and efficiency. It is important to differentiate athlete's heart from pathological conditions, as these changes are typically benign and reversible with reduced training intensity.
