## 1. Setup

In [None]:
%pip install -q --no-cache-dir --force-reinstall airavata-python-sdk[notebook]
import airavata_jupyter_magic
%authenticate
%request_runtime hpc_cpu --file=cybershuttle.yml --walltime=60 --use=NeuroData25VC1:cloud,expanse:shared,anvil:shared
%switch_runtime hpc_cpu

## 2. The Program

In [None]:
import gradio as gr
from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer, MarianMTModel, MarianTokenizer
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# =========================================== TRANSLATE FUNCTION ===========================================
# Function to get the model name based on the source and target language
def get_model_name(source_language, target_language):
    return f"Helsinki-NLP/opus-mt-{source_language}-{target_language}"

# Function to perform translation
def translate_text(input_text, source_language='en', target_language='es'):
    model_name = get_model_name(source_language, target_language)
    
    # Load the MarianMT model and tokenizer for the specific language pair
    model = MarianMTModel.from_pretrained(model_name)
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    
    # Prepare the input text with the correct prefix for translation
    input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True, padding="max_length")
    
    # Generate the translation
    translated_ids = model.generate(input_ids, max_length=150, num_beams=4, early_stopping=True)
    
    # Decode the translated output
    translation = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
    
    return translation

# =========================================== SUMMARIZE FUNCTION ===========================================
model_name = "t5-base"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

def summarize_text(input_text):
    # Preprocess input
    input_text = input_text.strip()
    if len(input_text.split()) < 15:  # Minimum words needed for good summary
        return "Input too short - please provide at least 15-20 words for meaningful summarization."
    
    # Format for T5 (crucial!)
    input_text = "summarize: " + input_text
    
    # Tokenize with better truncation
    input_ids = tokenizer.encode(
        input_text,
        return_tensors="pt",
        max_length=512,
        truncation=True,
        padding="max_length"  # Helps with short texts
    )
    
    # Generate with adjusted parameters
    summary_ids = model.generate(
        input_ids,
        max_length=100,       # Reduced from 150
        min_length=30,        # Reduced from 50
        length_penalty=3.0,   # Increased to favor shorter summaries
        num_beams=6,          # Increased from 4
        early_stopping=True,
        no_repeat_ngram_size=3  # Prevents word repetition
    )
    
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    # Post-process output
    if summary.lower() == input_text[11:].lower():  # If output == input
        return "Summary failed (input may be too short or unclear). Try with longer text."
    
    return summary

# =========================================== Q&A FUNCTION ===========================================
def answer_question(context, question):
    """
    Enhanced question answering with T5
    Args:
        context: Background information text (1-3 sentences work best)
        question: Clear question about the context
    Returns:
        Concise answer extracted from context
    """
    # Improved input formatting
    input_text = f"answer question based on context: {question} context: {context}"
    
    # Better tokenization with attention to question-context balance
    input_ids = tokenizer.encode(
        input_text,
        return_tensors="pt",
        max_length=512,
        truncation=True,
        padding="max_length"  # Helps with consistency
    )
    
    # Optimized generation parameters
    answer_ids = model.generate(
        input_ids,
        max_length=100,        # More concise answers
        min_length=5,          # Avoid empty answers
        num_beams=5,           # Better quality than 4 beams
        early_stopping=True,
        repetition_penalty=2.5, # Reduce repeated phrases
        length_penalty=1.5,     # Prefer shorter answers
        no_repeat_ngram_size=3, # Prevent word repetition
        temperature=0.7         # Adds slight creativity
    )
    
    # Improved decoding
    answer = tokenizer.decode(
        answer_ids[0],
        skip_special_tokens=True,
        clean_up_tokenization_spaces=True
    )
    
    # Post-processing for better results
    answer = answer.split(".")[0]  # Take the first complete thought
    answer = answer.strip()
    
    return answer if answer else "I couldn't find an answer in the context."

# =========================================== CLASSIFY FUNCTION ===========================================
hf_classifier = pipeline(
    "text-classification",
    model="finiteautomata/bertweet-base-sentiment-analysis",
    return_all_scores=True
)
vader = SentimentIntensityAnalyzer()

def classify_sentiment(input_text):
    """
    Hybrid sentiment analysis combining transformer models with VADER intensity analysis
    Returns formatted string with nuanced sentiment assessment
    """
    try:
        # Get HuggingFace predictions
        hf_results = hf_classifier(input_text, truncation=True)[0]
        pos_score = next(r['score'] for r in hf_results if r['label'] == 'POS')
        neg_score = next(r['score'] for r in hf_results if r['label'] == 'NEG')
        
        # Get VADER intensity scores
        vader_scores = vader.polarity_scores(input_text)
        
        # Combined weighted score (70% HF, 30% VADER)
        combined_pos = (pos_score * 0.7) + (vader_scores['pos'] * 0.3)
        combined_neg = (neg_score * 0.7) + (vader_scores['neg'] * 0.3)
        
        # Determine final sentiment
        if combined_pos > combined_neg:
            sentiment = "POSITIVE"
            base_confidence = combined_pos
            intensity = vader_scores['pos']
        else:
            sentiment = "NEGATIVE"
            base_confidence = combined_neg
            intensity = vader_scores['neg']
        
        # Dynamic confidence adjustment based on intensity
        adjusted_confidence = min(base_confidence * (1 + intensity), 0.99)
        
        # Strength classification with wider bands
        strength_ranges = [
            (0.9, "Extremely"),
            (0.8, "Very"),
            (0.7, "Strongly"),
            (0.6, "Fairly"),
            (0.5, "Moderately"),
            (0.4, "Somewhat"),
            (0, "Slightly")
        ]
        
        strength = next(
            desc for threshold, desc in strength_ranges 
            if adjusted_confidence >= threshold
        )
        
        # Add intensity qualifiers
        modifiers = {
            "Extremely": "!",
            "Very": "!",
            "Strongly": "",
            "Fairly": "",
            "Moderately": " (somewhat)",
            "Somewhat": " (mildly)",
            "Slightly": " (barely)"
        }
        
        return (
            f"{strength} {sentiment}{modifiers[strength]} "
            f"(Confidence: {adjusted_confidence:.0%})"
        )
        
    except Exception as e:
        return f"Analysis error: {str(e)}"

# =========================================== MAIN FUNCTION ===========================================
# Language mapping dictionary
LANGUAGE_MAP = {
    "en": "English",
    "es": "Español (Spanish)",
    "fr": "Français (French)",
    "de": "Deutsch (German)",
    "it": "Italiano (Italian)",
    "pt": "Português (Portuguese)",
    "ja": "日本語 (Japanese)",
    "zh": "中文 (Chinese)"
}

language_options = [f"{code} - {name}" for code, name in LANGUAGE_MAP.items()]

def validate_input(task, input_text):
    if not input_text.strip():
        raise gr.Error("Input text cannot be empty!")

    if task == "Answer Question":
        lines = input_text.strip().split("\n")
        if len(lines) < 2:
            raise gr.Error(
                "For 'Answer Question', input must have:\n"
                "Line 1: Context (text with the answer)\n"
                "Line 2: Question"
            )
    return True

def run_task(task, input_text, source_lang, target_lang):
    validate_input(task, input_text)

    if task == "Summarize":
        return summarize_text(input_text)
    elif task == "Translate":
        source_code = source_lang.split(" ")[0]
        target_code = target_lang.split(" ")[0]
        return translate_text(input_text, source_code, target_code)
    elif task == "Answer Question":
        lines = input_text.strip().split("\n")
        context, question = lines[0], lines[1]
        return answer_question(context, question)
    elif task == "Classify":
        return classify_sentiment(input_text)
    else:
        return "Please select a valid task."

def show_translation_fields(task):
    return gr.update(visible=(task == "Translate"))

def update_help_text(task):
    if task == "Answer Question":
        return "Tip: Enter context (line 1) and question (line 2)."
    elif task == "Translate":
        return "Tip: Enter text and select source and target languages."
    else:
        return "Tip: Enter text and click 'Run Task'."

with gr.Blocks() as demo:
    gr.Markdown("## 🧠 AI NLP Tool")

    task = gr.Dropdown(["Summarize", "Translate", "Answer Question", "Classify"], label="Select Task", value="Summarize")
    help_message = gr.Markdown("Tip: Enter text and click 'Run Task'.")

    input_text = gr.Textbox(label="Enter Input Text", lines=8, placeholder="Enter your text here...")
    
    with gr.Row(visible=False) as language_row:
        source_lang = gr.Dropdown(language_options, label="From", value=language_options[0])
        target_lang = gr.Dropdown(language_options, label="To", value=language_options[1])

    run_btn = gr.Button("Run Task")
    output_text = gr.Textbox(label="Result", lines=8)

    # Interactions
    task.change(fn=show_translation_fields, inputs=task, outputs=language_row)
    task.change(fn=update_help_text, inputs=task, outputs=help_message)
    run_btn.click(fn=run_task, inputs=[task, input_text, source_lang, target_lang], outputs=output_text)

demo.launch()