In [None]:
from pyannote.audio import Pipeline

In [None]:
from dotenv import load_dotenv

In [None]:
import os
from pathlib import Path

In [None]:

load_dotenv()
hugging_face_token = os.getenv("HUGGING_FACE_TOKEN")

In [None]:
audio_path = Path("/Users/phapman/Desktop/DT_decision_making.wav")

In [None]:
audio_path.exists()

In [None]:
import torch

In [None]:
def diarize_audio(audio_path: str):
    pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=hugging_face_token)
    diarization = pipeline(audio_path)
    return [
        {"start": turn.start, "end": turn.end, "speaker": speaker}
        for turn, _, speaker in diarization.itertracks(yield_label=True)
    ]

In [None]:
import torch

In [None]:
diarize_audio(audio_path)

In [None]:
import os
import requests
from pathlib import Path
from dotenv import load_dotenv
from tnh_scholar.utils import slugify
import json
from typing import Dict, Any

In [None]:
from tnh_scholar.logging_config import get_child_logger
logger = get_child_logger("pyannote_testing_notebook")

In [None]:
load_dotenv()
api_token = os.getenv("PYANNOTEAI_API_TOKEN")

In [None]:
input_path = Path("/Users/phapman/Desktop/DT_decision_making.mp3")

In [None]:
input_path.exists()

In [None]:
def get_pyannote_upload_url(input_path: Path, media_url: str, api_token: str) -> str:
    """Get pre-signed URL for uploading media to Pyannote.ai."""
    url = "https://api.pyannote.ai/v1/media/input"
    headers = {
        "Authorization": f"Bearer {api_token}",
        "Content-Type": "application/json"
    }

    body = {"url": media_url}

    try:
        return get_response_status(url, body, headers)
    except requests.exceptions.HTTPError as e:
        print(f"HTTP Error Response Body: {e.response.text}")
        raise

def get_response_status(url, body, headers):
    response = requests.post(url, json=body, headers=headers)
    logger.debug(f"Status Code: {response.status_code}")
    logger.debug(f"Response Content: {response.text}")
    logger.debug(f"Request Body: {body}")
    response.raise_for_status()
    return response.json()["url"]

# # Test the function
# try:
#     presigned_url = get_pyannote_upload_url(Path("dt-decision-making.mp3"), api_token)
# except Exception as e:
#     print(f"Error type: {type(e)}")
#     print(f"Error details: {str(e)}")

In [None]:
def upload_audio_to_pyannote(input_path: Path, bucket: str = None, api_token: str = None) -> str:
    """
    Upload audio file to Pyannote.ai using pre-signed URL.
    
    Args:
        input_path: Path to audio file
        api_token: Optional API token (defaults to env var)
        
    Returns:
        str: Media URL for uploaded file
        
    Raises:
        ValueError: If API token not found
    """
    # Get API token
    if not api_token:
        load_dotenv()
        api_token = os.getenv("PYANNOTEAI_API_TOKEN")
    if not api_token:
        raise ValueError("Pyannote API token not found")

    # Use default bucket name if not specified
    if not bucket:
        bucket = "default"
        
    media_url = f"media://{bucket}/{slugify(input_path.stem)}{input_path.suffix}"
    
    # Get upload URL and upload file
    presigned_url = get_pyannote_upload_url(input_path, media_url, api_token)

    with open(input_path, "rb") as f:
        response = requests.put(presigned_url, data=f)
        response.raise_for_status()

    return media_url



In [None]:
# Usage
media_url = upload_audio_to_pyannote(input_path)

In [None]:
# Replace the input value with your temporary storage location.
body = {
  "url" : "media://default/dt-decision-making.mp3",
}

url = "https://api.pyannote.ai/v1/diarize"
headers = {
   "Authorization": "Bearer {0}".format(api_token),
   "Content-Type": "application/json"
}

response = requests.post(url, json=body, headers=headers)
response.raise_for_status()
print(response.json())


In [None]:

jobId = "3291e52c-a49f-4169-982e-679f956c3c6f"

url = f"https://api.pyannote.ai/v1/jobs/{jobId}"

headers = {"Authorization": "Bearer {0}".format(api_token)}

response = requests.request("GET", url, headers=headers)

print(response.text)

In [None]:
response.text

In [None]:
def get_diarization_result(job_id: str, api_token: str) -> Dict[str, Any]:
    """
    Fetch and parse diarization results from PyAnnotate API.
    
    Args:
        job_id: The job identifier from PyAnnotate
        api_token: Authentication token for the API
        
    Returns:
        Dict containing the parsed response data
        
    Raises:
        requests.RequestException: If the API request fails
        json.JSONDecodeError: If the response cannot be parsed as JSON
        KeyError: If expected data is missing from response
    """
    url = f"https://api.pyannote.ai/v1/jobs/{job_id}"
    headers = {"Authorization": f"Bearer {api_token}"}
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raises an exception for 4XX/5XX status codes
        
        # Parse JSON response into dictionary
        result = response.json()  # This is equivalent to json.loads(response.text)
        
        logger.debug(f"Successfully retrieved diarization result for job {job_id}")
        return result
        
    except requests.RequestException as e:
        logger.error(f"API request failed: {e}")
        raise
        
    except json.JSONDecodeError as e:
        logger.error(f"Failed to parse API response as JSON: {e}")
        raise

In [None]:
get_diarization_result(jobId, api_token)