# Multilingual Service Request Analyzer (Fixed)
**Streamlined NLP Pipeline with Transformer-based Intent Classification**

Features:
1. Whisper ASR for audio processing
2. IndicTrans2 for multilingual translation
3. BERT-based intent classification
4. Simplified architecture

---

In [None]:
# Install required packages
import subprocess
import sys

packages = [
    'torch',
    'transformers>=4.21.0',
    'openai-whisper',
    'sentence-transformers',
    'scipy'
]

for package in packages:
    try:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', package])
        print(f"{package} installed")
    except:
        print(f"Failed: {package}")

print("Installation complete!")

In [None]:
import torch
import whisper
import re
import numpy as np
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine
import warnings
warnings.filterwarnings('ignore')

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

In [None]:
# Load Models
print("Loading models...")

# Whisper for ASR
try:
    whisper_model = whisper.load_model("base")
    print("Whisper loaded")
except:
    whisper_model = None
    print("Whisper failed")

# Fixed IndicTrans2 loading
try:
    translation_model = "facebook/nllb-200-distilled-600M"  # More reliable alternative
    translator = pipeline("translation", model=translation_model, device=0 if device=="cuda" else -1)
    print("Translation model loaded")
except:
    try:
        # Fallback to IndicTrans2 with proper loading
        tokenizer_trans = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True)
        model_trans = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True)
        model_trans.to(device)
        translator = None
        print("IndicTrans2 loaded")
    except:
        tokenizer_trans = None
        model_trans = None
        translator = None
        print("Translation unavailable")

# Sentence transformer for intent classification
try:
    intent_model = SentenceTransformer('all-MiniLM-L6-v2')
    print("Intent classifier loaded")
except:
    intent_model = None
    print("Intent classifier failed")

In [None]:
# Intent Classification with Transformer
SERVICE_INTENTS = {
    'Plumbing': "Water leakage, pipe repair, tap fixing, drain cleaning, toilet problems, plumber service",
    'Electrical': "Power outage, light not working, electrical wiring, switch repair, electrician needed",
    'Cleaning': "House cleaning, garbage removal, sweeping, mopping, sanitization service",
    'Repair': "Fix broken items, maintenance service, repair work, technician needed",
    'Transport': "Car service, bike repair, taxi booking, delivery service, driver needed",
    'Healthcare': "Doctor consultation, medical help, nurse needed, hospital visit",
    'Home Services': "Cooking, food preparation, laundry, gardening, painting work",
    'General': "General help, assistance needed, support required, other services"
}

# Pre-compute intent embeddings
if intent_model:
    intent_embeddings = {}
    for intent, description in SERVICE_INTENTS.items():
        intent_embeddings[intent] = intent_model.encode(description)
    print("Intent embeddings computed")
else:
    intent_embeddings = None

In [None]:
# Core Functions

def transcribe_audio(audio_path):
    """Convert audio to text"""
    if not whisper_model:
        return "Audio transcription unavailable"
    try:
        result = whisper_model.transcribe(audio_path)
        return result["text"].strip()
    except Exception as e:
        return f"Error: {e}"

def detect_language(text):
    """Simple language detection"""
    if re.search(r'[\u0900-\u097F]', text):
        return 'hi'
    elif re.search(r'[\u0B80-\u0BFF]', text):
        return 'ta'
    return 'en'

def translate_text(text, source_lang):
    """Translate to English"""
    if source_lang == 'en':
        return text
    
    try:
        if translator:  # NLLB model
            lang_map = {'hi': 'hin_Deva', 'ta': 'tam_Taml'}
            src_lang = lang_map.get(source_lang, 'hin_Deva')
            result = translator(text, src_lang=src_lang, tgt_lang='eng_Latn')
            return result[0]['translation_text']
        
        elif tokenizer_trans and model_trans:  # IndicTrans2
            inputs = tokenizer_trans(text, return_tensors="pt", padding=True, truncation=True, max_length=256)
            inputs = {k: v.to(device) for k, v in inputs.items()}
            
            with torch.no_grad():
                outputs = model_trans.generate(**inputs, max_length=256, num_beams=5)
            
            translation = tokenizer_trans.decode(outputs[0], skip_special_tokens=True)
            return translation.strip()
        
        return text  # Fallback
    
    except Exception as e:
        print(f"Translation error: {e}")
        return text

def classify_intent(text):
    """Classify service intent using transformer"""
    if not intent_model or not intent_embeddings:
        # Fallback to keyword matching
        return classify_intent_fallback(text)
    
    try:
        # Get text embedding
        text_embedding = intent_model.encode(text)
        
        # Calculate similarities
        similarities = {}
        for intent, intent_emb in intent_embeddings.items():
            similarity = 1 - cosine(text_embedding, intent_emb)
            similarities[intent] = max(0, similarity)  # Ensure non-negative
        
        # Get best match
        best_intent = max(similarities, key=similarities.get)
        confidence = similarities[best_intent]
        
        return best_intent, confidence, similarities
    
    except Exception as e:
        print(f"Intent classification error: {e}")
        return classify_intent_fallback(text)

def classify_intent_fallback(text):
    """Fallback keyword-based classification"""
    keywords = {
        'Plumbing': ['water', 'pipe', 'leak', 'tap', 'drain', 'toilet'],
        'Electrical': ['light', 'power', 'electricity', 'wire', 'switch'],
        'Cleaning': ['clean', 'wash', 'dirty', 'sweep', 'garbage'],
        'Repair': ['fix', 'broken', 'repair', 'damage'],
        'Transport': ['car', 'bike', 'taxi', 'drive', 'delivery'],
        'Healthcare': ['doctor', 'medical', 'health', 'medicine'],
        'Home Services': ['cook', 'food', 'laundry', 'garden']
    }
    
    text_lower = text.lower()
    scores = {}
    
    for intent, words in keywords.items():
        score = sum(1 for word in words if word in text_lower)
        if score > 0:
            scores[intent] = score / len(words)
    
    if scores:
        best_intent = max(scores, key=scores.get)
        confidence = min(scores[best_intent] * 2, 1.0)
        return best_intent, confidence, scores
    else:
        return "General", 0.5, {"General": 0.5}

def get_urgency(text):
    """Determine urgency level"""
    urgent_words = ['urgent', 'emergency', 'asap', 'quickly', 'broken', 'leak']
    text_lower = text.lower()
    urgent_count = sum(1 for word in urgent_words if word in text_lower)
    
    if urgent_count >= 2:
        return "High"
    elif urgent_count == 1:
        return "Medium"
    else:
        return "Low"

In [None]:
# Main Processing Function

def analyze_request(text_input=None, audio_path=None):
    """Main analysis pipeline"""
    
    # Get input
    if audio_path:
        text_input = transcribe_audio(audio_path)
    
    if not text_input or not text_input.strip():
        return "No input provided"
    
    original_text = text_input.strip()
    
    # Language detection and translation
    lang = detect_language(original_text)
    lang_names = {'hi': 'Hindi', 'ta': 'Tamil', 'en': 'English'}
    
    if lang != 'en':
        translated_text = translate_text(original_text, lang)
    else:
        translated_text = original_text
    
    # Intent classification
    intent, confidence, scores = classify_intent(translated_text)
    urgency = get_urgency(translated_text)
    
    # Format results
    result = f"""
SERVICE REQUEST ANALYSIS
{'='*40}

Original: {original_text}
Language: {lang_names.get(lang, 'Unknown')}
"""
    
    if lang != 'en':
        result += f"Translation: {translated_text}\n"
    
    result += f"""
Service Type: {intent}
Confidence: {confidence:.1%}

Top Match:
"""
# Urgency: {urgency}
    
    # Show top match
    sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:1]
    for service, score in sorted_scores:
        percentage = (score / max(scores.values())) * 100 if scores else 0
        result += f"  • {service}: {percentage:.0f}%\n"
    
    result += f"\nRecommendation: Contact {intent.lower()} service provider"
    # if urgency == "High":
    #     result += " URGENTLY"
    return result

In [None]:
# # Testing

# def run_tests():
#     """Run test cases"""
#     test_cases = [
#         "My kitchen tap is leaking water everywhere urgently",
#         "The bedroom light is not working",
#         "Need house cleaning service",
#         "Car engine repair needed",
#         "मुझे डॉक्टर चाहिए"  # "I need a doctor" in Hindi
#     ]
    
#     print("RUNNING TESTS\n")
    
#     for i, test in enumerate(test_cases, 1):
#         print(f"Test {i}: {test}")
#         result = analyze_request(test)
#         print(result)
#         print("\n" + "-"*50 + "\n")

# # Run tests
# run_tests()

# print("\n🎉 SYSTEM READY!")
# print("\nUsage:")
# print("• analyze_request('your text here')")
# print("• analyze_request(audio_path='path/to/audio.wav')")

In [None]:
# Demo

def demo():
    """Interactive demo"""
    print("\nINTERACTIVE DEMO")
    print("Enter requests (type 'quit' to exit):\n")
    
    while True:
        try:
            user_input = input("Request: ").strip()
            
            if user_input.lower() in ['quit', 'exit', 'q']:
                print("Goodbye!")
                break
            
            if user_input:
                result = analyze_request(user_input)
                print(result)
                print("\n" + "-"*40 + "\n")
            
        except KeyboardInterrupt:
            print("\nGoodbye!")
            break
        except Exception as e:
            print(f"Error: {e}")
demo()