In [28]:
import threading
import numpy as np
import pyaudio
import whisper
import noisereduce as nr  # noise reduction package
import json
import os
from datetime import datetime
from openai import OpenAI  # Import the OpenAI client class

# ---------------------------
# Configuration File Handling
# ---------------------------
CONFIG_FILE = "config.json"

def load_or_create_config():
    """
    Loads the configuration from CONFIG_FILE.
    If the file does not exist or the API key is missing,
    asks the user for an API key, saves it, and returns the configuration.
    """
    config = {}
    if os.path.exists(CONFIG_FILE):
        try:
            with open(CONFIG_FILE, "r") as f:
                config = json.load(f)
        except Exception as e:
            print(f"Error reading config file: {e}")
    
    # Check if the API key exists and is non-empty.
    if not config.get("api_key"):
        api_key = input("Please enter your OpenAI API key: ").strip()
        config["api_key"] = api_key
        try:
            with open(CONFIG_FILE, "w") as f:
                json.dump(config, f, indent=4)
            print(f"Config file '{CONFIG_FILE}' updated with the API key.")
        except Exception as e:
            print(f"Error saving config file: {e}")
    
    return config

config = load_or_create_config()
API_KEY = config.get("api_key")

# ---------------------------
# Global variables for session data
# ---------------------------
session_transcriptions = []
session_lock = threading.Lock()
session_timestamp = datetime.now().isoformat()

# ---------------------------
# GPT‑4 Analysis Function
# ---------------------------
def analyze_conversation(transcriptions):
    """
    Sends the entire transcription to GPT‑4 for analysis.
    GPT‑4 returns a JSON object containing:
      - meeting_title: A subject summarizing the conversation.
      - summary: A bullet‑point summary of the conversation.
      - discussed_tasks: A list of any tasks that were discussed.
    """
    conversation_text = "\n".join(transcriptions)
    prompt = f"""
You are an expert meeting analyst. Analyze the following meeting transcription and provide:
1. A meeting title (subject) summarizing the conversation.
2. A summary of the conversation in bullet points.
3. Any tasks discussed during the conversation (if any).

Format your answer as a JSON object with the following keys: "meeting_title", "summary", "discussed_tasks".

Meeting transcription:
\"\"\"{conversation_text}\"\"\"
"""
    try:
        # Initialize the client using the provided API key and base URL.
        client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=API_KEY,
        )
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are an expert meeting analyst."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.5,
            max_tokens=500
        )
        analyzed_data = response.choices[0].message.content  # Using dot notation
        return json.loads(analyzed_data)
    except Exception as e:
        print(f"Error analyzing conversation: {e}")
        return {"meeting_title": "Unknown", "summary": [], "discussed_tasks": []}

# ---------------------------
# Logging Function
# ---------------------------
def log_session(session_timestamp, transcriptions, analysis, log_file="transcription_log.json"):
    """
    Save the session transcriptions and analysis to a JSON file.
    If the file exists, append the new session record.
    Otherwise, create a new log file.
    The record contains: timestamp, meeting_title, raw_records, summary, discussed_tasks.
    """
    record = {
        "timestamp": session_timestamp,
        "meeting_title": analysis.get("meeting_title", "Unknown"),
        "raw_records": transcriptions,
        "summary": analysis.get("summary", []),
        "discussed_tasks": analysis.get("discussed_tasks", [])
    }
    if os.path.exists(log_file):
        try:
            with open(log_file, "r") as f:
                data = json.load(f)
        except Exception as e:
            print(f"Error reading log file, starting new log: {e}")
            data = []
    else:
        data = []

    data.append(record)
    
    try:
        with open(log_file, "w") as f:
            json.dump(data, f, indent=4)
        print(f"Session log saved to {log_file}")
    except Exception as e:
        print(f"Error saving session log: {e}")

# ---------------------------
# Audio Processing Function
# ---------------------------
def process_audio_in_real_time(model, audio_frames, rate):
    """
    Process a batch of audio frames: apply noise reduction, transcribe using Whisper,
    and add the recognized text to the global session log.
    """
    try:
        print("Processing audio...")
        audio_array = np.frombuffer(b"".join(audio_frames), dtype=np.int16).astype(np.float32) / 32768.0
        audio_denoised = nr.reduce_noise(y=audio_array, sr=rate)
        result = model.transcribe(audio_denoised, fp16=False, language="en")
        recognized_text = result.get("text", "").strip()
        print(f"Recognized Text: {recognized_text}")
        with session_lock:
            session_transcriptions.append(recognized_text)
    except Exception as e:
        print(f"Error processing audio: {e}")

# ---------------------------
# Real-Time Speech Recognition Function
# ---------------------------
def real_time_speech_recognition(chunk_size=1024, fmt=pyaudio.paInt16, channels=1, rate=16000):
    """
    Continuously read audio from the microphone, process in 5-second batches,
    and transcribe the speech using the Whisper model.
    """
    print("Starting real-time speech recognition. Press Ctrl+C to stop.")
    audio_interface = pyaudio.PyAudio()
    stream = audio_interface.open(format=fmt, channels=channels, rate=rate, input=True, frames_per_buffer=chunk_size)
    audio_frames = []
    print("Loading Whisper model...")
    model = whisper.load_model("small.en")
    try:
        while True:
            audio_data = stream.read(chunk_size, exception_on_overflow=False)
            audio_frames.append(audio_data)
            if len(audio_frames) >= int(rate / chunk_size * 5):  # Process every 5 seconds
                thread = threading.Thread(
                    target=process_audio_in_real_time,
                    args=(model, audio_frames.copy(), rate)
                )
                thread.start()
                audio_frames = []
    except KeyboardInterrupt:
        print("Stopping real-time speech recognition.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        stream.stop_stream()
        stream.close()
        audio_interface.terminate()

# ---------------------------
# Main Execution
# ---------------------------
if __name__ == "__main__":
    try:
        real_time_speech_recognition()
    except Exception as e:
        print(f"Failed to start speech recognition: {e}")
    finally:
        with session_lock:
            if session_transcriptions:
                analysis = analyze_conversation(session_transcriptions)
                log_session(session_timestamp, session_transcriptions, analysis)
            else:
                print("No transcriptions to log for this session.")

Config file 'config.json' updated with the API key.
Starting real-time speech recognition. Press Ctrl+C to stop.
Loading Whisper model...
Processing audio...
Recognized Text: You
Processing audio...
Recognized Text: you
Processing audio...
Recognized Text: You
Processing audio...
Recognized Text: you
Processing audio...
Recognized Text: You
Processing audio...
Recognized Text: between the machine, the dishwasher, and the person.
Processing audio...
Processing audio...
Error processing audio: cannot reshape tensor of 0 elements into shape [1, 0, 12, -1] because the unspecified dimension size -1 can be any value and is ambiguous
Processing audio...
Processing audio...
Recognized Text: I was just, I was also like, I think I was also like, you were washing dishes.
Recognized Text: 
Recognized Text: It was a dishwasher. So you were like the time an adult. You were like the machine. You were washing dishes.
Processing audio...
Recognized Text: I had no friends in the restaurant, they were al