In [1]:
import numpy as np
import pandas as pd
from TabuLLM.embed import TextColumnTransformer
from TabuLLM.cluster import SphericalKMeans
df = pd.read_csv('../../data/raw.csv')
embeddings = TextColumnTransformer(
    model_type = 'st'
).fit_transform(df.loc[:, ['diagnoses']])
n_clusters = 10
cluster_labels = SphericalKMeans(n_clusters=n_clusters).fit_predict(embeddings)
assert np.array_equal(np.unique(cluster_labels), np.arange(n_clusters))

  from tqdm.autonotebook import tqdm, trange


In [2]:
from TabuLLM.explain import generate_prompt
prompt_instruction, prompt_body = generate_prompt(
    text_list = list(df['diagnoses'])
    , cluster_labels = cluster_labels
    , prompt_observations = 'pediatric cardiopulmonary bypass surgeries'
    , prompt_texts = 'planned procedures'
)

In [3]:
from pydantic import BaseModel

class GroupLabel(BaseModel):
    number: int
    description_short: str
    description_long: str

class MultipleGroupLabels(BaseModel):
    groups: list[GroupLabel]

    # method to convert the response to a DataFrame
    def to_df(self):
        return pd.DataFrame([group.model_dump() for group in self.groups]).sort_values('number').reset_index(drop=True)

from openai import OpenAI
import os
from dotenv import load_dotenv
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key=openai_api_key)

google_project_id = os.getenv('VERTEXAI_PROJECT')
google_location = os.getenv('VERTEXAI_LOCATION')
import vertexai
from vertexai.generative_models import GenerativeModel, GenerationConfig
vertexai.init(project=google_project_id, location=google_location)

In [5]:
completion = client.beta.chat.completions.parse(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": prompt_instruction},
        {"role": "user", "content": prompt_body},
    ],
    #response_format=CalendarEvent,
    response_format=MultipleGroupLabels,
)

groups = completion.choices[0].message.parsed.to_df()
groups

Unnamed: 0,number,description_short,description_long
0,1,Cardiomyopathies and Heart Failure,This group encompasses surgeries related to va...
1,2,Tetralogy of Fallot and Related Complications,This group includes surgeries for Tetralogy of...
2,3,Atrial Septal Defects (ASD),Surgeries in this group primarily address atri...
3,4,Mitral and Tricuspid Valve Disorders,This group focuses on surgeries addressing abn...
4,5,Transposition of the Great Arteries,Surgeries related to the transposition of the ...
5,6,Aortic Valve and Outflow Tract Congenital Issues,This group involves surgeries for congenital a...
6,7,Atrioventricular Septal Defects (AVSD),The focus here is on atrioventricular septal d...
7,8,Ventricular Septal Defects (VSD) and Related A...,This group includes surgeries for ventricular ...
8,9,Conotruncal Anomalies and Pulmonary Venous Con...,Surgeries here address conotruncal defects suc...
9,10,Perimembranous VSD and Associated Conditions,This group focuses on perimembranous ventricul...
