In [30]:
import numpy as np
import sounddevice as sd
import librosa
import torch
from transformers import pipeline

# -------------------------
# 1. Text-based emotion using Hugging Face
# -------------------------
text_model = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    framework="pt"  # use PyTorch
)

def get_text_emotion(text):
    if not text.strip():
        return None
    result = text_model(text)
    return result[0]['label']

# -------------------------
# 2. Audio-based emotion using pitch/energy heuristic
# -------------------------
def get_audio_emotion(duration=3, sr=16000):
    print("Recording audio...")
    audio = sd.rec(int(duration*sr), samplerate=sr, channels=1)
    sd.wait()
    y = audio.flatten()
    
    # Extract pitch and energy
    try:
        pitch = librosa.yin(y, fmin=50, fmax=300).mean()
    except:
        pitch = 0
    energy = (y**2).mean()
    
    # Simple heuristic rules
    if pitch > 180 and energy > 0.01:
        return "happy"
    elif pitch < 100 and energy < 0.005:
        return "sad"
    else:
        return "neutral"

# -------------------------
# 3. Fuse text + audio emotions
# -------------------------
def fuse_emotions(text_emotion, audio_emotion):
    if text_emotion is None:
        return audio_emotion
    if audio_emotion is None:
        return text_emotion
    return audio_emotion if text_emotion != audio_emotion else text_emotion

# -------------------------
# 4. Main program
# -------------------------
def main():
    mode = input("Choose input mode (text/audio/both): ").strip().lower()
    
    text_emotion = None
    audio_emotion = None
    
    if mode in ["text", "both"]:
        text_input = input("Enter text: ")
        text_emotion = get_text_emotion(text_input)
        print(f"Text Emotion: {text_emotion}")
    
    if mode in ["audio", "both"]:
        audio_emotion = get_audio_emotion()
        print(f"Audio Emotion: {audio_emotion}")
    
    final_emotion = fuse_emotions(text_emotion, audio_emotion)
    print(f"Final Detected Emotion: {final_emotion}")

if __name__ == "__main__":
    main()


Device set to use cpu


Text Emotion: anger
Final Detected Emotion: anger
