In [24]:
import os
import torch
import torchaudio
import soundfile as sf
from pyannote.audio import Pipeline
from groq import Groq
from supabase import create_client
from sentence_transformers import SentenceTransformer

In [None]:
# --- CONFIGURATION ---
GROQ_API_KEY = "YOUR_KEY"
SUPABASE_URL = "YOUR_KEY"
SUPABASE_KEY = "YOUR_KEY"
HF_AUTH_TOKEN = "YOUR_KEY"


In [31]:

class ClinicalIRSystem:
    def __init__(self):
        print("--- Initializing Clinical IR System (v4.0.4) ---")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        # 1. AI Clients
        self.groq_client = Groq(api_key=GROQ_API_KEY)
        self.supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
        
        # 2. Embedding Model (all-MiniLM-L6-v2)
        self.embed_model = SentenceTransformer('all-MiniLM-L6-v2', device=self.device)
        
        # 3. Diarization Pipeline [cite: 28, 42]
        self.diarization_pipeline = Pipeline.from_pretrained(
            "pyannote/speaker-diarization-community-1", 
            token=HF_AUTH_TOKEN
        ).to(self.device)

    def process_audio_file(self, audio_path, role_mapping):
        """Processes audio: Diarization -> Transcription -> Indexing [cite: 22]"""
        print(f"Step 1: Reading {audio_path}...")
        data, samplerate = sf.read(audio_path)
        waveform = torch.tensor(data).float()
        
        if len(waveform.shape) == 1:
            waveform = waveform.unsqueeze(0)
        elif waveform.shape[0] > waveform.shape[1]:
            waveform = waveform.T
            
        audio_payload = {"waveform": waveform, "sample_rate": samplerate}

        print("Step 2: Identifying speakers (Diarization)...")
        diar_output = self.diarization_pipeline(audio_payload)
        
        diar_segments = []
        for turn, speaker in diar_output.exclusive_speaker_diarization:
            diar_segments.append({"start": turn.start, "end": turn.end, "speaker": speaker})

        print("Step 3: Transcribing with Groq Whisper-v3[cite: 27]...")
        with open(audio_path, "rb") as file:
            transcription = self.groq_client.audio.transcriptions.create(
                file=(audio_path, file.read()),
                model="whisper-large-v3",
                response_format="verbose_json"
            )

        print("Step 4: Speaker-Aware Indexing to Supabase[cite: 56]...")
        for w_seg in transcription.segments:
            midpoint = (w_seg['start'] + w_seg['end']) / 2
            current_speaker = "UNKNOWN"
            for d_seg in diar_segments:
                if d_seg['start'] <= midpoint <= d_seg['end']:
                    current_speaker = d_seg['speaker']
                    break
            
            role = role_mapping.get(current_speaker, "OTHER")
            text = w_seg['text'].strip()
            embedding = self.embed_model.encode(text).tolist()
            
            # Indexing requirements: include speaker metadata 
            self.supabase.table("clinical_segments").insert({
                "content": text,
                "speaker_role": role,
                "embedding": embedding,
                "metadata": {"start": w_seg['start'], "end": w_seg['end']}
            }).execute()

    def get_full_transcript(self):
        """Retrieves every segment in chronological order for the full transcript"""
        print("\n--- FETCHING FULL SPEAKER-SEPARATED TRANSCRIPT ---")
        # Fetching all segments ordered by metadata->start time
        response = self.supabase.table("clinical_segments") \
            .select("speaker_role, content, metadata") \
            .order("metadata->start", desc=False) \
            .execute()
        
        transcript_text = ""
        for record in response.data:
            line = f"[{record['speaker_role']}]: {record['content']}"
            print(line)
            transcript_text += line + "\n"
        return transcript_text

    def generate_clinical_summary(self, transcript):
        """Summarization Engine grounded in retrieved segments """
        print("\n--- GENERATING LLM SUMMARY ---")
        
        prompt = f"""
        Summarize the following clinical interview. 
        Focus on Patient concerns and Clinician observations.
        
        TRANSCRIPT:
        {transcript}
        
        SUMMARY FORMAT:
        1. Patient Reported Symptoms:
        2. Clinician Observations/Questions:
        3. Follow-up Plan:
        """

        completion = self.groq_client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=[{"role": "user", "content": prompt}]
        )
        return completion.choices[0].message.content

# --- EXECUTION ---
if __name__ == "__main__":
    AUDIO_FILE = "audio.wav"
    ROLES = {"SPEAKER_00": "CLINICIAN", "SPEAKER_01": "PATIENT"}

    bot = ClinicalIRSystem()
    
    # 1. Ingest Audio
    # bot.process_audio_file(AUDIO_FILE, ROLES)
    
    # 2. Get Full Transcript
    full_transcript = bot.get_full_transcript()
    
    # 3. Generate Summary 
    summary = bot.generate_clinical_summary(full_transcript)
    print("\n--- CLINICAL SUMMARY ---\n", summary)

--- Initializing Clinical IR System (v4.0.4) ---


Loading weights: 100%|██████████| 103/103 [00:00<00:00, 875.40it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m



--- FETCHING FULL SPEAKER-SEPARATED TRANSCRIPT ---
[CLINICIAN]: Hello, this is Dr. Smith. We are reviewing the patient's record for today's follow-up on their hypertension.
[OTHER]: Thanks, doctor. I've been taking the medication, but I notice some dizziness in the morning, usually right after I wake up.
[OTHER]: Dizziness can be a side effect. Let's check your blood pressure now.
[CLINICIAN]: It looks like 140 over 90, which is still a bit high.
[OTHER]: Thank you.

--- GENERATING LLM SUMMARY ---

--- CLINICAL SUMMARY ---
 Here is the summary of the clinical interview:

1. Patient Reported Symptoms:
   - Dizziness in the morning, usually right after waking up
   - The patient reports taking their hypertension medication as prescribed

2. Clinician Observations/Questions:
   - The patient's blood pressure is 140 over 90, which is still high
   - The clinician notes that dizziness can be a side effect of the medication

3. Follow-up Plan:
   - None explicitly stated in the transcript, 