In [None]:
import os
import tempfile
import librosa
import whisper
import nest_asyncio
import uvicorn
import numpy as np
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import JSONResponse
from transformers import pipeline
from langdetect import detect
from yake import KeywordExtractor
from nltk import word_tokenize, pos_tag
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
nest_asyncio.apply()

whisper_model = whisper.load_model("base")
sentiment_pipeline = pipeline("sentiment-analysis")
emotion_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)
keyword_extractor = KeywordExtractor()
embedder = SentenceTransformer("all-MiniLM-L6-v2")

In [None]:
app = FastAPI()

def extract_cognitive_features(text, audio, sr):
    words = word_tokenize(text.lower())
    hesitations = sum(1 for w in words if w in ['uh', 'um', 'hmm', 'er', 'ah'])
    sentences = [s.strip() for s in text.split('.') if s.strip()]
    pauses_per_sentence = round(audio.tolist().count(0) / max(len(sentences), 1), 2)
    
    speech_rate = round(len(words) / (len(audio) / sr), 2)  # words per second
    pitch = librosa.yin(audio, fmin=75, fmax=300)
    pitch_var = round(np.std(pitch), 2)

    return {
        "num_sentences": len(sentences),
        "num_words": len(words),
        "speech_rate_wps": speech_rate,
        "pauses_per_sentence": pauses_per_sentence,
        "hesitation_count": hesitations,
        "pitch_variability": pitch_var
    }

In [None]:
def detect_recall_issues(text):
    tokens = word_tokenize(text)
    pos_tags = pos_tag(tokens)
    nouns = [word for word, tag in pos_tags if tag.startswith("NN")]
    keywords = [kw[0] for kw in keyword_extractor.extract_keywords(text)[:5]]
    missing_keywords = [kw for kw in keywords if kw not in nouns]
    return {
        "important_keywords": keywords,
        "missing_keywords": missing_keywords
    }

In [None]:
@app.post("/analyze-cognition/")
async def analyze_cognition(file: UploadFile = File(...)):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
        tmp.write(await file.read())
        tmp_path = tmp.name

    y, sr = librosa.load(tmp_path, sr=None)
    duration = librosa.get_duration(y=y, sr=sr)
    result = whisper_model.transcribe(tmp_path)
    text = result["text"]

    lang = detect(text)
    sentiment = sentiment_pipeline(text)
    emotion = emotion_pipeline(text)
    embedding = embedder.encode([text])[0]

    audio_features = extract_cognitive_features(text, y, sr)
    recall_issues = detect_recall_issues(text)

    os.remove(tmp_path)

    return JSONResponse(content={
        "transcription": text,
        "language": lang,
        "sentiment": sentiment,
        "emotion": emotion,
        "duration_sec": round(duration, 2),
        "cognitive_features": audio_features,
        "recall_issues": recall_issues,
        "text_embedding": embedding.tolist()
    })

In [None]:
def run_api():
    uvicorn.run(app, host="0.0.0.0", port=8000)