In [1]:
import pyaudio
import wave
import webrtcvad
import threading
import time

class AudioRecorder:
    def __init__(self, sample_rate=16000, frame_duration=30):
        self.sample_rate = sample_rate
        self.frame_duration = frame_duration
        self.frame_size = int(sample_rate * frame_duration / 1000)
        self.vad = webrtcvad.Vad(3)
        self.channels = 1
        self.format = pyaudio.paInt16
        self.buffer = []
        self.is_recording = False
        self.start_time = None
        self.end_time = None

    def start(self):
        """Start recording until stopped manually."""
        self.is_recording = True
        self.buffer = []
        self.start_time = time.time()

        def record_loop():
            pa = pyaudio.PyAudio()
            stream = pa.open(format=self.format,
                             channels=self.channels,
                             rate=self.sample_rate,
                             input=True,
                             frames_per_buffer=self.frame_size)

            while self.is_recording:
                pcm = stream.read(self.frame_size, exception_on_overflow=False)
                if self.vad.is_speech(pcm, self.sample_rate):
                    self.buffer.append(pcm)

            self.end_time = time.time()
            stream.stop_stream()
            stream.close()
            pa.terminate()


        threading.Thread(target=record_loop, daemon=True).start()
        print("Recording started...")

    def stop(self):
        """Stop recording."""
        self.is_recording = False
        print("Recording stopped.")

    def save_file(self, filename="speech.wav"):
        if self.buffer:
            with wave.open(filename, "wb") as wf:
                wf.setnchannels(self.channels)
                wf.setsampwidth(pyaudio.PyAudio().get_sample_size(self.format))
                wf.setframerate(self.sample_rate)
                wf.writeframes(b"".join(self.buffer))
            print(f"Saved audio to {filename}")
        else:
            print("No speech detected")

    def get_conversation_time(self):
        """Get total recording time."""
        if self.start_time and self.end_time:
            return round(self.end_time - self.start_time, 2)
        return 0

  import pkg_resources


In [2]:
recorder = AudioRecorder()
recorder.start()
print("Recording for 50 seconds...")

Recording started...
Recording for 50 seconds...


In [3]:
recorder.stop()
recorder.save_file("my_recording.wav")
print("Done! Saved as my_recording.wav")

Recording stopped.
Saved audio to my_recording.wav
Done! Saved as my_recording.wav


In [17]:
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()

class SpeechProcessor:
    def __init__(self, filepath:str):
        self.filepath = filepath
    
    async def transcribe_audio(self):
       client = OpenAI(api_key = os.getenv("OPENAI_API_KEY"))
       audio_file = open(self.filepath,"rb")
       transcription = client.audio.transcriptions.create(
           model = "gpt-4o-mini-transcribe",
           file = audio_file,
           response_format="json",
           language = "en",
           prompt = """While converting an audio file to text correct the words.
           and check they are medically correct or not and complete the ords dont add extra text."
            """
       )
       return transcription.text

In [18]:
filepath = "D:/AxisMD/voice/my_recording.wav"
transcripter = SpeechProcessor(filepath)
text = await transcripter.transcribe_audio()
print(f"here is the text{text}")

here is the textDermatologists are medical doctors specializing in skin, hair, and nails. Dermatologists also handle cosmetic disorders like hair loss and scars. Your dermatologist will examine you and order tests to make a diagnosis and treat a condition with medication or a procedure.


In [4]:
import json
from typing import List, Dict, Optional

class ClinicalPromptBuilder:
    def __init__(self, base_template: Dict, audio_transcript:str):
        self.base_template = base_template
        self.audio_transcript = audio_transcript

    def build_prompt(self):
        """
        Builds a clinical note generation prompt for the LLM.
        """
        template_dict = self.base_template.copy()
        

        prompt = f"""
        This is dictated by a doctor in the specialty{template_dict.get("specialty")} and subspecialty {template_dict.get("subspecialty")}. 
        to a voice assistant to generate a clinical note. While dictating, there might be 
        transcription errors (wrong drug names, conditions, or procedures).

        Raw transcript:
        \"\"\"{self.audio_transcript}\"\"\"

        Your task:
        1. Correct transcription mistakes using appropriate medical context.
        2. Extract structured information into the JSON schema:
        {json.dumps(template_dict, indent=2)}

        Rules:
        - Only include information explicitly stated or implied in the transcript.
        - Insert findings into the relevant sections (including physical exam objectives).
        - Assign correct ICD codes and CPT codes.
        - If insufficient content, return:
        {{
        "error": "Insufficient or unrelated content"
        }}
        - Format based on requested style: {template_dict.get("output_style")}.
        - Output must be ONLY the JSON object, no extra text.
        """
        return prompt.strip()

In [22]:
schema = {}

prompt = ClinicalPromptBuilder(schema, text)
format = prompt.build_prompt()

In [23]:
print(format)

This is dictated by a doctor in the specialtyNone and subspecialty None. 
        to a voice assistant to generate a clinical note. While dictating, there might be 
        transcription errors (wrong drug names, conditions, or procedures).

        Raw transcript:
        """Dermatologists are medical doctors specializing in skin, hair, and nails. Dermatologists also handle cosmetic disorders like hair loss and scars. Your dermatologist will examine you and order tests to make a diagnosis and treat a condition with medication or a procedure."""

        Your task:
        1. Correct transcription mistakes using appropriate medical context.
        2. Extract structured information into the JSON schema:
        {}

        Rules:
        - Only include information explicitly stated or implied in the transcript.
        - Insert findings into the relevant sections (including physical exam objectives).
        - Assign correct ICD codes and CPT codes.
        - If insufficient content, r