In [40]:
import os
import pandas as pd
from ai21 import AI21Client
from ai21.models.chat import ChatMessage, ResponseFormat
from multiprocessing import Pool, cpu_count
import numpy as np
import time
import random

In [88]:


# Retrieve the API key from environment variable
api_key = os.getenv('AI21LABS_API_KEY')

# Check if the API key is set
if not api_key:
    raise ValueError("API key not found. Please set the AI21_API_KEY environment variable.")

# Set up the AI21 client
client = AI21Client(api_key=api_key)

def generate_clinical_summary(row):
    """Generate a clinical summary based on the provided row of data."""
    # Build context string from the DataFrame row
    context = "\n".join(f"{col}: {row[col]}" for col in df.columns if pd.notna(row[col]))
    
    # Define the prompt
    prompt = f"""You are a helpful assistant that generates clinical notes using medical terminology.
    Utilize reasoning to analyze the following structured data and provide a detailed, narrative clinical note
    reflecting typical communication in clinical notes without suggesting solutions. Use paragraph breaks to separate different aspects of the note.
    Data: {context}

    <<Sample output of Clinical Note>>:
    The patient, with ID 10005817, was administered a single dose of IV antibiotics as per the treatment plan. The administration occurred on December 16, 2132, at 19:50, with a total volume of 500 ml delivered.

    The patient weighs 91 kg and was under the care of caregiver ID 4793. The status of the medication administration is noted as "Finished Running."
    
    Please provide a similar style of clinical note for the given data."""

    # Create the chat completion
    response = client.chat.completions.create(
        model="jamba-1.5-large",
        messages=[ChatMessage(role="user", content=prompt)],
        n=1,
        max_tokens=1024,
        temperature=0.4,
        top_p=1,
        response_format=ResponseFormat(type="text"),
    )


    # Extract and return the generated summary
    return response.choices[0].message.content


In [6]:
# Load the CSV file into a DataFrame
df = pd.read_csv('datasets/mimic/icu/inputevents.csv')

In [8]:
df1= pd.read_csv('datasets/mimic/icu/procedureevents.csv')

In [14]:
# Find a subject_id that is present in both df and df1
subject_id = df['subject_id'].iloc[0]

# Filter df1 for the same subject_id
df_filtered = df[df['subject_id'] == subject_id]
df1_filtered = df1[df1['subject_id'] == subject_id]

# Now you can proceed with your processing

In [17]:
# Save df_filtered to a CSV file
df_filtered.to_csv('datasets/single_patient/inputevents_filtered.csv', index=False)
df1_filtered.to_csv('datasets/single_patient/procedureevents_filtered.csv', index=False)

In [48]:
num_processes = 10

In [71]:
generate_clinical_summary(df_filtered.iloc[1])

'**Clinical Note:**\n\nThe patient, with ID 10005817, was administered a dose of antibiotics as per the treatment plan. The administration occurred on December 15, 2132, at 20:15, with the medication recorded as finished running by 20:16. The total volume delivered was 1.0 dose.\n\nThe patient weighs 91.0 kg and was under the care of caregiver ID 92805. The status of the medication administration is noted as "FinishedRunning."'

In [52]:
# Define the number of parallel processes

# Split the DataFrame into chunks
df_chunks = np.array_split(df_filtered, num_processes)

# Function to process a chunk of the DataFrame
def process_chunk(chunk):
    # Introduce a random delay
    time.sleep(random.uniform(0, 1))
    
    for index, row in chunk.iterrows():
        chunk.at[index, 'clinical_summary'] = generate_clinical_summary(row)
        print(".", end="", flush=True)
    #chunk['clinical_summary'] = chunk.apply(generate_clinical_summary, axis=1)
    return chunk

# Use multiprocessing to apply the function to all rows of the DataFrame
with Pool(num_processes) as pool:
    results = pool.map(process_chunk, df_chunks)

# Combine the results
df_processed = pd.concat(results)


  return bound(*args, **kwds)


....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [55]:
# Save the updated DataFrame to a new CSV file
output_file = 'datasets/single_patient/inputevents_filtered_with_summary.csv'
df_processed.to_csv(output_file, index=False)

print(f"Clinical summaries generated and saved to {output_file}")

Clinical summaries generated and saved to datasets/single_patient/inputevents_filtered_with_summary.csv


In [78]:
df1_filtered.keys()

Index(['subject_id', 'hadm_id', 'stay_id', 'caregiver_id', 'starttime',
       'endtime', 'storetime', 'itemid', 'value', 'valueuom', 'location',
       'locationcategory', 'orderid', 'linkorderid', 'ordercategoryname',
       'ordercategorydescription', 'patientweight', 'isopenbag',
       'continueinnextdept', 'statusdescription', 'ORIGINALAMOUNT',
       'ORIGINALRATE'],
      dtype='object')

In [89]:
# Create an "amount" column populated with 0
df1_filtered1 = df1_filtered.copy()
df1_filtered1['amount'] = 0
generate_clinical_summary(df1_filtered.iloc[0])

KeyError: 'amount'

In [90]:
row = df1_filtered.iloc[0]
context = "\n".join(f"{col}: {row[col]}" for col in df1_filtered.columns if pd.notna(row[col]))

NameError: name 'row' is not defined

In [82]:
generate_clinical_summary(df1_filtered.iloc[0])

KeyError: 'amount'

In [64]:
df1_filtered_chunks = np.array_split(df1_filtered, num_processes)


In [65]:
# Use multiprocessing to apply the function to all rows of the DataFrame
with Pool(num_processes) as pool:
    results = pool.map(process_chunk, df1_filtered_chunks)

# Combine the results
df_processed = pd.concat(results)

KeyError: 'amount'

In [None]:
# Save the updated DataFrame to a new CSV file
output_file = 'datasets/single_patient/procedureevents_filtered_with_summary.csv'
df_processed.to_csv(output_file, index=False)

print(f"Clinical summaries generated and saved to {output_file}")