In [77]:
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

load_dotenv()

True

In [78]:
def generate_description(diagnosis_category, diagnosis_sub_category, treatment_category, treatment_sub_category, temperature):
    chat = ChatOpenAI(temperature=temperature)
    messages = [
        SystemMessage(
            content=
            """You are programmed to create believable narratives of interactions between patients and doctors within a hospital setting, strictly from the doctor's viewpoint. 
            Your responses should draw from specified variables: Diagnosis Category, Diagnosis Sub Category, Treatment Category, and Treatment Sub Category. 
            Exclude any inputs that are empty or contain NaN.
            Do not invent patient identifiers such as names, ages, or genders. 
            Invent plausible symptoms that align with the given diagnosis and treatment. 
            Your output should be a straightforward text description.
            Omit explanations or justifications.
            """
        ),
        HumanMessage(
            content=f'Diagnosis Category: {diagnosis_category}, Diagnosis Sub Category:{diagnosis_sub_category}, Treatment Category:{treatment_category}, Treatment Sub Category:{treatment_sub_category}. Generate description.'
        ),
    ]
    return chat.invoke(messages)

In [79]:
import pandas as pd

df = pd.read_csv('California_PBAC_RAG_v1.csv')
df


Unnamed: 0,Visit ID,Visit Date,Patient ID,Diagnosis Category,Diagnosis Sub Category,Treatment Category,Treatment Sub Category,New Patient,Consulting Physician,Patient Name,Patient Gender,Patient Age Range,Patient Blood Type,Patient Insurance Number,Patient Phone,Patient Address,Patient Occupation,Patient Emergency Contact
0,VQC513203,2017-01-01,PZM508653,Chronic Pain,Vertebral Disc Problem,Pharmacy/Prescription Drugs,Non-FDA Approved Use,True,Dr. Jerry Daniels,Michelle Fisher,Female,41-50,O-,G264037622,15786233257,Pärtzeltweg 2\n22301 Neunburg vorm Wald,"Surveyor, quantity","Jennifer Bailey, 015680180768."
1,VVC435406,2017-01-01,PSN036517,Endocrine/ Metabolic,Hormone Deficiency,Pharmacy/Prescription Drugs,Hormones,True,Dr. Michelle Lamb,Brooke Davis,Female,31-40,O+,Y133547589,16238255302,Kira-Gorlitz-Allee 8\n67100 Rosenheim,Copy,"Dennis Carlson, 015182104709."
2,VKT437745,2017-01-01,PPD253419,Pediatrics,Delayed Speech,Rehabilitation Services - Outpatient,Speech Therapy,True,Dr. Michelle Lamb,Andrew Graves,Male,0-10,O+,F115599209,17550508440,Thiesstr. 3/5\n68745 Bremen,Claims inspector/assessor,"Veronica Harris, 015034485673."
3,VJG208744,2017-01-01,PBN488954,Central Nervous System/ Neuromuscular,,Pharmacy/Prescription Drugs,Non-FDA Approved Use,True,Dr. James Barber,Cody Carpenter,Male,51-64,AB+,W720918648,15210588122,Schmidtkeallee 53\n54913 Siegen,Tourism officer,"Michelle Graham, 017438105819."
4,VAF235393,2017-01-01,PJG173047,Cancer,Breast Cancer,Cancer Treatment,Surgery,True,Dr. James Barber,Christopher White,Female,31-40,A+,Q198981012,16561784363,Ida-Fliegner-Ring 7/6\n53118 Wolgast,Engineering geologist,"Peter Stout, 015017527431."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11684,VYV246792,2018-12-30,PBM172577,Pediatrics,Delayed Speech,Rehabilitation Services - Outpatient,Speech Therapy,False,Dr. James Barber,Kenneth Smith,Male,0-10,B+,H797815344,17316183697,Steinbergstr. 5/9\n43481 Ebermannstadt,"Librarian, public","Steven Caldwell, 015315613798."
11685,VBO082144,2018-12-30,PVI648420,Mental,Depression,Electrical/ Thermal/ Radiofreq. Interventions,,False,Dr. Alexandria Gaines,Kristin Jones,Female,51-64,AB+,Z982649988,17953227317,Beckerplatz 0\n46198 Beeskow,Site engineer,"Scott Simpson, 016581707368."
11686,VKA771318,2018-12-30,PDB455996,Endocrine/ Metabolic,Thyroid Problems,Acute Medical Services - Outpatient,Emergency Room,False,Dr. Michelle Lamb,Christopher Williams,Female,51-64,AB+,Private,16951975501,Fiebigplatz 5\n26886 Fürstenwalde,"Chemist, analytical","James Farrell, 015303912796."
11687,VQZ422704,2018-12-30,PFS771409,Genetic,Chromosomal Anomalies,Pharmacy/Prescription Drugs,Hormones,False,Dr. James Barber,Pamela Mcgee MD,Female,0-10,A+,L605331052,15918361501,Mielcarekplatz 1/9\n83357 Melle,Scientific laboratory technician,"Michael Weiss, 017716313028."


In [80]:
import csv
import os

filename = 'California_PBAC_RAG_Descriptions.csv'
if not os.path.exists(filename):
    with open(filename, 'w', newline='') as csvfile:
        # Define the fieldnames
        fieldnames = ["Visit ID", "Description"]
        # Create a writer object
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        # Write the header
        writer.writeheader()

In [81]:
filename = 'California_PBAC_RAG_Descriptions.csv'
def append_row(visit_id, description):
    row_data = {"Visit ID": visit_id, "Description": description}
    
    with open(filename, 'a', newline='') as csvfile:
        fieldnames = row_data.keys()
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writerow(row_data)

In [82]:
# Testing temp. range
def test_temp():
    for i in range(7):
        temperature = f'0.{i + 1}'
        response = generate_description(df['Diagnosis Category'][i], df['Diagnosis Sub Category'][i], df['Treatment Category'][i], df['Treatment Sub Category'][i], temperature)
        print(f'Row {i}, with temperature: {temperature}.\n', response, '\n')
        append_row(df['Visit ID'][i], response.content)
#test_temp()

In [85]:
import random
import time

temperatures = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]

for i in range(3000, 7000):
    df_descriptions = pd.read_csv('California_PBAC_RAG_Descriptions.csv')
    if df['Visit ID'][i] not in df_descriptions['Visit ID'].unique():
        temperature = random.choice(temperatures)
        response = generate_description(df['Diagnosis Category'][i], df['Diagnosis Sub Category'][i], df['Treatment Category'][i], df['Treatment Sub Category'][i], temperature)
        append_row(df['Visit ID'][i], response.content)
        time.sleep(0.2)
    