In [None]:
!pip install -q transformers datasets torchaudio accelerate soundfile pillow


In [None]:
from datasets import load_dataset
import numpy as np

# Load ONE audio sample (streaming → no disk usage)
dataset = load_dataset(
    "librispeech_asr",
    "clean",
    split="validation",
    streaming=True
)

sample = next(iter(dataset))
audio_decoder = sample["audio"]

# Decode using torchcodec (works in latest HF)
samples = audio_decoder.get_all_samples()

audio_array = samples.data.numpy()
sampling_rate = samples.sample_rate

# Convert stereo → mono
if audio_array.ndim > 1:
    audio_array = audio_array.mean(axis=0)

audio_array = audio_array.astype(np.float32)

print("Audio ready:", audio_array.shape, sampling_rate)


README.md: 0.00B [00:00, ?B/s]

Resolving data files:   0%|          | 0/48 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/48 [00:00<?, ?it/s]

Audio ready: (105440,) 16000


In [None]:
from transformers import pipeline

asr_pipeline = pipeline(
    "automatic-speech-recognition",
    model="facebook/wav2vec2-base-960h",
    device=0
)

voice_text = asr_pipeline(audio_array)["text"]
print("Voice Text:", voice_text)


Loading weights:   0%|          | 0/212 [00:00<?, ?it/s]

Wav2Vec2ForCTC LOAD REPORT from: facebook/wav2vec2-base-960h
Key                        | Status  | 
---------------------------+---------+-
wav2vec2.masked_spec_embed | MISSING | 

Notes:
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


Voice Text: HE WAS IN A FEVERED STATE OF MIND OWING TO THE BLIGHT HIS WIFE'S ACTION THREATENED TO CAST UPON HIS ENTIRE FUTURE


In [None]:
# Customer text (email / chat)
text_input = """
My order arrived late and the screen is cracked.
I want a refund immediately.
"""

# Image caption (from BLIP or simulated)
image_description = "Image shows a smartphone with a cracked screen."


In [None]:
combined_text = f"""
Customer Text:
{text_input}

Voice Transcription:
{voice_text}

Image Description:
{image_description}
"""

print(combined_text)



Customer Text:

My order arrived late and the screen is cracked.
I want a refund immediately.


Voice Transcription:
HE WAS IN A FEVERED STATE OF MIND OWING TO THE BLIGHT HIS WIFE'S ACTION THREATENED TO CAST UPON HIS ENTIRE FUTURE

Image Description:
Image shows a smartphone with a cracked screen.



In [None]:
from transformers import pipeline

intent_classifier = pipeline(
    "zero-shot-classification",
    model="facebook/bart-large-mnli",
    device=0
)

intent_labels = [
    "refund request",
    "delivery delay",
    "product damage",
    "technical issue",
    "praise",
    "general complaint"
]

intent_result = intent_classifier(
    combined_text,
    candidate_labels=intent_labels
)

intent = intent_result["labels"][0]
print("Detected Intent:", intent)


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/515 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Detected Intent: refund request


In [None]:
emotion_classifier = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    device=0
)

emotion_output = emotion_classifier(combined_text)
emotion = emotion_output[0]["label"]

print("Detected Emotion:", emotion)


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/329M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/105 [00:00<?, ?it/s]

RobertaForSequenceClassification LOAD REPORT from: j-hartmann/emotion-english-distilroberta-base
Key                             | Status     |  | 
--------------------------------+------------+--+-
roberta.embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


tokenizer_config.json:   0%|          | 0.00/294 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Detected Emotion: sadness


In [None]:
urgent_keywords = [
    "refund", "immediately", "now", "asap", "unacceptable", "worst"
]

urgency = "low"

if any(word in combined_text.lower() for word in urgent_keywords):
    urgency = "high"
elif emotion in ["anger", "fear", "disgust"]:
    urgency = "medium"

print("Urgency Level:", urgency)


Urgency Level: high


In [None]:
aspect_sentiment = {}

if "late" in combined_text.lower():
    aspect_sentiment["delivery"] = "negative"

if "crack" in combined_text.lower() or "broken" in combined_text.lower():
    aspect_sentiment["product_quality"] = "negative"

if "support" in combined_text.lower():
    aspect_sentiment["customer_support"] = "negative"

print("Aspect Sentiment:", aspect_sentiment)


Aspect Sentiment: {'delivery': 'negative', 'product_quality': 'negative'}


In [None]:
def agent_decision(intent, emotion, urgency):
    if urgency == "high" and emotion in ["anger", "disgust"]:
        return "escalate_to_human"
    elif "refund" in intent:
        return "generate_return_label"
    elif "technical" in intent:
        return "search_knowledge_base"
    else:
        return "auto_reply"

action = agent_decision(intent, emotion, urgency)
print("Agent Action:", action)


Agent Action: generate_return_label


In [None]:
response_generator = pipeline(
    "text-generation",
    model="google/flan-t5-base",
    device=0
)

prompt = f"""
You are a customer support assistant.

Customer emotion: {emotion}
Urgency level: {urgency}
Intent: {intent}
Issues: {aspect_sentiment}

Respond empathetically and professionally.
"""

response = response_generator(
    prompt,
    max_new_tokens=150,
    do_sample=False
)[0]["generated_text"]

print("AI Response:")
print(response)


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/282 [00:00<?, ?it/s]



generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

The model 'T5ForConditionalGeneration' is not supported for text-generation. Supported models are ['PeftModelForCausalLM', 'AfmoeForCausalLM', 'ApertusForCausalLM', 'ArceeForCausalLM', 'AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BitNetForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'BltForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'CwmForCausalLM', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV2ForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'DogeForCausalLM', 'Dots1ForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'Ernie4_5ForCausalLM', 'Ernie4_5_MoeForCausalLM', 'Exaone4ForCausalLM', 'FalconForCausalLM', 'FalconH1ForCausalLM', 'FalconMambaForCausa

AI Response:

You are a customer support assistant.

Customer emotion: sadness
Urgency level: high
Intent: refund request
Issues: {'delivery': 'negative', 'product_quality': 'negative'}

Respond empathetically and professionally.



In [None]:
print("\n--- FINAL SYSTEM OUTPUT ---")
print("Intent:", intent)
print("Emotion:", emotion)
print("Urgency:", urgency)
print("Aspect Sentiment:", aspect_sentiment)
print("Agent Action:", action)
print("Response:", response)



--- FINAL SYSTEM OUTPUT ---
Intent: refund request
Emotion: sadness
Urgency: high
Aspect Sentiment: {'delivery': 'negative', 'product_quality': 'negative'}
Agent Action: generate_return_label
Response: 
You are a customer support assistant.

Customer emotion: sadness
Urgency level: high
Intent: refund request
Issues: {'delivery': 'negative', 'product_quality': 'negative'}

Respond empathetically and professionally.

