In [None]:
!pip install -q flask flask_cors pyngrok transformers accelerate bitsandbytes shap lime scikit-learn numpy

In [None]:
pip install shap==0.46.0 "numpy<2" "scikit-learn>=1.3,<1.6"

-----------------------------------------

In [None]:
# ============================================================
# Kaggle Backend for Press Conference Simulator
# ============================================================
from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from typing import Dict, Any, List
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ============================================================
# 0Ô∏è‚É£ Optional dependencies
# ============================================================
try:
    import shap
except Exception:
    shap = None
try:
    from lime.lime_text import LimeTextExplainer
except Exception:
    LimeTextExplainer = None


# ============================================================
# 1Ô∏è‚É£ Explainability helpers
# ============================================================
def _split_sentences(text: str) -> List[str]:
    parts = [p.strip() for p in text.replace("!", ".").replace("?", ".").split(".")]
    return [p for p in parts if p]


def _semantic_explain(speech: str, question: str) -> str:
    segments = _split_sentences(speech)
    if not segments:
        return "No speech segments to analyze."
    corpus = segments + [question]
    vec = TfidfVectorizer(stop_words="english")
    X = vec.fit_transform(corpus)
    sims = cosine_similarity(X[-1], X[:-1]).ravel()
    if sims.size == 0:
        return "No similarity signal detected."
    order = np.argsort(sims)[::-1][:2]
    tops = [f"‚Ä¢ \"{segments[i]}\" (sim={sims[i]:.2f})" for i in order if sims[i] > 0]
    return "Likely influential speech parts:\n" + ("\n".join(tops) if tops else "No strong matches.")


def _avg_logprob_for_target(model, tokenizer, context: str, target: str) -> float:
    """Computes mean log-prob of generating target given context."""
    import torch
    model_device = next(model.parameters()).device
    with torch.no_grad():
        full = context + ("\n" if context and not context.endswith("\n") else "") + target
        enc = tokenizer(full, return_tensors="pt")
        input_ids = enc["input_ids"].to(model_device)
        attn_mask = enc.get("attention_mask", None)
        if attn_mask is not None:
            attn_mask = attn_mask.to(model_device)
        ctx_ids = tokenizer(context, return_tensors="pt")["input_ids"].to(model_device)
        ctx_len = ctx_ids.shape[1]
        outputs = model(input_ids=input_ids, attention_mask=attn_mask)
        logits = outputs.logits
        logprobs = logits.log_softmax(dim=-1)
        target_token_ids = input_ids[:, ctx_len:]
        prev_positions = logprobs[:, ctx_len - 1: -1, :] if ctx_len > 0 else logprobs[:, :-1, :]
        seq = min(prev_positions.shape[1], target_token_ids.shape[1])
        tok_logprobs = prev_positions[:, -seq:, :].gather(dim=-1, index=target_token_ids[:, :seq].unsqueeze(-1)).squeeze(-1)
        return float(tok_logprobs.mean().item())


def _shap_explain(speech: str, question: str, model, tokenizer) -> str:
    import numpy as np, traceback, re
    from nltk.corpus import stopwords

    if shap is None:
        return "SHAP not available. Install `shap` (pip install shap==0.46.0)."

    # Ensure stopwords available
    try:
        STOPWORDS = set(stopwords.words("english"))
    except LookupError:
        import nltk
        nltk.download("stopwords")
        STOPWORDS = set(stopwords.words("english"))

    try:
        # --- Use Hugging Face tokenizer directly
        masker = shap.maskers.Text(tokenizer)

        # --- Scoring function (mean log-prob of generating question given context)
        def score_fn(texts: List[str]) -> np.ndarray:
            scores = []
            for t in texts:
                try:
                    val = _avg_logprob_for_target(model, tokenizer, t[:512], question[:256])
                    scores.append(float(val))
                except Exception:
                    scores.append(-999.0)
            return np.asarray(scores, dtype=float)

        # --- Run SHAP
        explainer = shap.Explainer(score_fn, masker)
        shap_values = explainer([speech[:512]])

        if getattr(shap_values, "values", None) is None:
            return "SHAP returned no values."

        vals = np.abs(shap_values.values[0])
        tokens = shap_values.data[0]
        L = min(len(tokens), len(vals))

        # --- Filter out stopwords & non-alphabetic tokens
        filtered_tokens, filtered_vals = [], []
        for t, v in zip(tokens[:L], vals[:L]):
            clean_t = re.sub(r"[^a-zA-Z]", "", t).lower()
            if clean_t and clean_t not in STOPWORDS and len(clean_t) > 1:
                filtered_tokens.append(t)
                filtered_vals.append(v)

        if not filtered_tokens:
            return "No informative tokens after stopword filtering."

        # --- Normalize values for readability
        filtered_vals = np.array(filtered_vals)
        #filtered_vals /= filtered_vals.max() + 1e-9  # avoid div-by-zero
        filtered_vals /= filtered_vals.sum() + 1e-9


        # --- Sort top tokens
        order = np.argsort(filtered_vals)[::-1][:8]
        top = [f"{filtered_tokens[i]} ({filtered_vals[i]:.3f})" for i in order if filtered_vals[i] > 0]

        if not top:
            return "No positive SHAP signal."

        return "Top influential speech tokens (SHAP, no stopwords): " + ", ".join(top)

    except Exception as e:
        return f"SHAP failed: {type(e).__name__}: {e}\nTraceback:\n{traceback.format_exc()}"




def _attention_explain(text: str, model, tokenizer) -> str:
    """Returns top tokens by last-layer attention weights."""
    import torch
    model.config.output_attentions = True
    enc = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    enc = {k: v.to(next(model.parameters()).device) for k, v in enc.items()}
    with torch.no_grad():
        outputs = model(**enc, output_attentions=True)
    if not hasattr(outputs, "attentions") or outputs.attentions is None:
        return "Attention not available."
    attn = outputs.attentions[-1][0]
    mean_attn = attn.mean(0).mean(0).cpu().numpy()
    tokens = tokenizer.convert_ids_to_tokens(enc["input_ids"][0])
    order = np.argsort(mean_attn)[::-1][:8]
    top = [f"{tokens[i]} ({mean_attn[i]:.3f})" for i in order]
    return "Top attention-weighted tokens: " + ", ".join(top)


def explainability_node_all(state: Dict[str, Any], model=None, tokenizer=None) -> Dict[str, Any]:
    """Compute all explainability modes in one go."""
    speech = state.get("speech", "") or ""
    question = state.get("generated_question", "") or ""
    results = {}

    if not speech or not question:
        state["explanation"] = {"error": "Insufficient data for explainability."}
        return state

    try:
        # --- Semantic similarity
        results["semantic"] = _semantic_explain(speech, question)

        # --- SHAP explainability
        try:
            results["shap"] = _shap_explain(speech, question, model, tokenizer)
        except Exception as e:
            results["shap"] = f"SHAP error: {type(e).__name__}: {e}"

        # --- LIME explanation (optional, may be slow)
        if LimeTextExplainer is None:
            results["lime"] = "LIME not available (package missing)."
        else:
            try:
                explainer = LimeTextExplainer()
                exp = explainer.explain_instance(
                    speech,
                    lambda texts: np.array([[np.random.rand(), np.random.rand()] for _ in texts]),
                    num_features=6
                )
                results["lime"] = "LIME placeholder output: " + str(exp.as_list()[:3])
            except Exception as e:
                results["lime"] = f"LIME error: {type(e).__name__}: {e}"

    except Exception as e:
        results["general_error"] = f"Explainability failed: {type(e).__name__}: {e}"

    state["explanation"] = results
    return state


# ============================================================
# 2Ô∏è‚É£ Model loading
# ============================================================
print("üöÄ Loading Mistral model...")
model_id = "mistralai/Mistral-7B-Instruct-v0.3"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto" if device == "cuda" else None,
    output_attentions=True
)
print("‚úÖ Model ready on device:", device)


# ============================================================
# 3Ô∏è‚É£ Flask app setup
# ============================================================
app = Flask(__name__)
CORS(app)


# ============================================================
# 4Ô∏è‚É£ Routes
# ============================================================
@app.route("/generate", methods=["POST"])
def generate():
    """Handles text generation for journalist question."""
    data = request.get_json(force=True)
    messages = data.get("messages")
    prompt = data.get("prompt")

    # Support both message-style and legacy prompt input
    if messages is None and prompt:
        messages = [
            {"role": "system", "content": "You are an investigative journalist."},
            {"role": "user", "content": prompt}
        ]

    print("=" * 80)
    print("üì© Generation request received.")
    text = tokenizer.apply_chat_template(messages, tokenize=False)
    inputs = tokenizer(text, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_new_tokens=600, temperature=0.8, do_sample=True)
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("‚úÖ Generated response:", response_text)
    return jsonify({"response": response_text.strip()})





@app.route("/explain", methods=["POST"])
def explain():
    """Handles explainability computation (all modes at once)."""
    data = request.get_json(force=True)
    speech = data.get("speech", "")
    question = data.get("question", "")

    print("=" * 80)
    print("üß© Explainability request received (all modes)")

    state = {"speech": speech, "generated_question": question}
    explained = explainability_node_all(state, model=model, tokenizer=tokenizer)

    for mode, output in explained["explanation"].items():
        print(f"‚úÖ {mode.upper()} ‚Üí {output[:200]}{'...' if len(output) > 200 else ''}")

    return jsonify(explained["explanation"])


@app.route("/ping", methods=["GET"])
def ping():
    return jsonify({"status": "alive"})



@app.route("/analyze", methods=["POST"])
def analyze():
    """Analyze the full press conference conversation."""
    data = request.get_json(force=True)
    persona = data.get("persona", "")
    role = data.get("role", "")
    topic = data.get("topic", "")
    speech = data.get("speech", "")
    history = data.get("history", [])

    # --- Build conversation text
    convo = "\n".join([f"{t['role'].capitalize()}: {t['content']}" for t in history])

    prompt = f"""
You are an **AI communication evaluator**.
Analyze the following simulated press conference between a journalist ({persona})
and a guest ({role}) on the topic "{topic}".

Opening speech:
\"\"\"{speech}\"\"\"

Full conversation:
{convo}

---
TASK:
You must produce a *valid JSON object* exactly matching the structure below, with all fields filled in.
Do not include explanations, comments, markdown, or any text outside the JSON.

Each score must be an integer from 0‚Äì5, defined as:
0 = incoherent / irrelevant
1 = poor
2 = fair
3 = average
4 = strong
5 = excellent

---
Return **only** this JSON (no text before or after):

{{
  "summary": "2‚Äì3 sentences summarizing the conversation.",
  "strengths": ["List 3‚Äì5 strong points about the guest's responses."],
  "weaknesses": ["List 3‚Äì5 weaknesses or missed opportunities."],
  "suggestions": ["List 3‚Äì5 practical recommendations for improvement."],
  "scores": {{
    "clarity": <0‚Äì5>,
    "relevance": <0‚Äì5>,
    "persuasiveness": <0‚Äì5>,
    "consistency": <0‚Äì5>,
    "engagement": <0‚Äì5>
  }}
}}

<END>
"""



    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    
    outputs = model.generate(**inputs, max_new_tokens=800, temperature=0.7)
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    print("\n=== RAW MODEL OUTPUT ===")
    print(response_text)
    print("=========================\n")
    
    import json, re
    # 1Ô∏è‚É£ Get only the last JSON-like block (after <END>)*
    if "<END>" in response_text:
        response_tail = response_text.split("<END>", 1)[1].strip()
    else:
        response_tail = response_text
        
    # 2Ô∏è‚É£ Extract the last {...} block
    match = re.search(r"\{[\s\S]*\}", response_tail)
    if not match:
        analysis = {"error": "No JSON found", "raw_output": response_tail}
    else:
        json_block = match.group(0)
        
        # 3Ô∏è‚É£ Clean up: remove escaped characters (\n, \") if present
        cleaned = (
            json_block
            .encode('utf-8')
            .decode('unicode_escape')   # converts \n, \t, and \" into real characters
            .replace('\r', '')
            .strip()
        )
        
        # 4Ô∏è‚É£ Parse the cleaned JSON
        try:
            analysis = json.loads(cleaned)
        except Exception as e:
            analysis = {"error": f"Failed to parse cleaned JSON: {e}", "raw_output": cleaned}



    print("\n=== ANALYSIS RESULT ===")
    print(json.dumps(analysis, indent=4, ensure_ascii=False))
    print("========================\n")
    
    return jsonify(analysis)


# ============================================================
# 5Ô∏è‚É£ Start ngrok tunnel
# ============================================================
NGROK_AUTH_TOKEN = "..."  # Replace with your token
!ngrok config add-authtoken {NGROK_AUTH_TOKEN}

public_url = ngrok.connect(5000)
print("‚úÖ Public URL:", public_url.public_url)
print("Use this URL in your local Flask app (api_endpoints.py).")


# ============================================================
# 6Ô∏è‚É£ Run server
# ============================================================
app.run(port=5000)


üöÄ Loading Mistral model...


2025-11-10 00:44:13.069292: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762735453.092475    1090 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762735453.099880    1090 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
The following generation flags are not valid and may be ignored: ['output_attentions']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['output_attentions']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

‚úÖ Model ready on device: cuda
Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
‚úÖ Public URL: https://unbevelled-articularly-linn.ngrok-free.dev
Use this URL in your local Flask app (api_endpoints.py).
 * Serving Flask app '__main__'
 * Debug mode: off


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.


`sdpa` attention does not support `output_attentions=True` or `head_mask`. Please set your attention to `eager` if you want any of these features.


‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un Minister of Health.
Le sujet de la conf√©rence est : AI in National Healthcare Systems.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Investigative Hawk**, un journaliste d‚Äôinvestigation.
Ton r√¥le : exposer les incoh√©rences, demander des preuves, creuser les faits.
Pose toujours une **seule** question claire et incisive √† la fois.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Ne donne jamais de r√©ponse ni de justification.
- Appuie-toi sur le discours et l‚Äôhistorique, sans t‚Äô√©carter du sujet.

Contexte de la conf√©rence :
- Sujet : AI in National Healthcare Systems
- Interlocuteur (guest) : Mi

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:49, 49.21s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "Today, our ministry is proud to announce the launch of MedAI, a national platform using artificial intelligence to improve patient diagnosis, optimize hospital logi...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords):  Med (0.121), AI (0.116),  stored (0.103),  patient (0.077),  diagnosis (0.077),  times (0.072),  waiting (0.072),  reduce (0.072)
‚úÖ LIME ‚Üí LIME placeholder output: [('all', 0.03236345102105749), ('and', 0.025915615859129926), ('national', 0.024486703157430737)]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.
‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un Minister of Health.
Le sujet de la conf√©rence est : AI in National Healthcare Systems.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Investigative Hawk**, un journaliste d‚Äôinvestigation.
Ton r√¥le : exposer les incoh√©rences, demander des preuves, creuser les faits.
Pose toujours une **seule** question claire et incisive √† la fois.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Ne donne jamais de r√©ponse ni de justification.
- Appuie-toi sur le discours et l‚Äôhistorique, sans t‚Äô√©carter du sujet.

Contexte de la conf√©rence :
- Sujet : AI in National Healthcare Sy

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:47, 47.82s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "We are committed to ensuring that all data is anonymized and securely stored" (sim=0.11)
‚Ä¢ "Today, our ministry is proud to announce the launch of MedAI, a national...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords):  Med (0.235), AI (0.175), onym (0.128), ized (0.062), ly (0.050),  secure (0.049),  using (0.025),  platform (0.025)
‚úÖ LIME ‚Üí LIME placeholder output: [('that', 0.030999631294725435), ('reduce', -0.026559385689096413), ('is', -0.02570278069007723)]


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== RAW MODEL OUTPUT ===

You are an **AI communication evaluator**.
Analyze the following simulated press conference between a journalist (investigative_hawk)
and a guest (Minister of Health) on the topic "AI in National Healthcare Systems".

Opening speech:
"""Today, our ministry is proud to announce the launch of MedAI, a national platform using artificial intelligence to improve patient diagnosis, optimize hospital logistics, and reduce waiting times. With AI assistance, doctors can focus more on patients while algorithms handle data analysis. We are committed to ensuring that all data is anonymized and securely stored"""

Full conversation:
Journalist: Minister of Health, could you clarify the specific algorithms used in the MedAI platform and how they have been proven to improve patient diagnosis and reduce waiting times? Also, what measures are in place to ensure the reliability of these algorithms and to mitigate potential misdiagnoses?
Guest: don t know
Journalist: Minister o

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.
‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un Minister of Health.
Le sujet de la conf√©rence est : AI in National Healthcare Systems.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Investigative Hawk**, un journaliste d‚Äôinvestigation.
Ton r√¥le : exposer les incoh√©rences, demander des preuves, creuser les faits.
Pose toujours une **seule** question claire et incisive √† la fois.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Ne donne jamais de r√©ponse ni de justification.
- Appuie-toi sur le discours et l‚Äôhistorique, sans t‚Äô√©carter du sujet.

Contexte de la conf√©rence :
- Sujet : AI in National Healthcare Sy

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:46, 46.11s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "Today, our ministry is proud to announce the launch of MedAI, a national platform using artificial intelligence to improve patient diagnosis, optimize hospital logi...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords):  Med (0.187), AI (0.150), Today (0.103),  diagnosis (0.054),  stored (0.052),  patient (0.049),  patients (0.029),  launch (0.028)
‚úÖ LIME ‚Üí LIME placeholder output: [('improve', -0.03238225999055931), ('ministry', -0.026437320791312068), ('on', -0.023633962701017995)]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.
‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un Minister of Health.
Le sujet de la conf√©rence est : AI in National Healthcare Systems.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Investigative Hawk**, un journaliste d‚Äôinvestigation.
Ton r√¥le : exposer les incoh√©rences, demander des preuves, creuser les faits.
Pose toujours une **seule** question claire et incisive √† la fois.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Ne donne jamais de r√©ponse ni de justification.
- Appuie-toi sur le discours et l‚Äôhistorique, sans t‚Äô√©carter du sujet.

Contexte de la conf√©rence :
- Sujet : AI in National Healthcare Sy

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:45, 45.11s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "With AI assistance, doctors can focus more on patients while algorithms handle data analysis" (sim=0.16)
‚Ä¢ "Today, our ministry is proud to announce the launch of M...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords):  Med (0.271), AI (0.190), Today (0.070),  stored (0.069),  diagnosis (0.054),  patient (0.047),  platform (0.043),  focus (0.024)
‚úÖ LIME ‚Üí LIME placeholder output: [('are', -0.02548085498767952), ('AI', 0.021377863655802604), ('times', 0.02101822950530254)]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.
‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un CEO.
Le sujet de la conf√©rence est : AI in healthcare.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Investigative Hawk**, un journaliste d‚Äôinvestigation.
Ton r√¥le : exposer les incoh√©rences, demander des preuves, creuser les faits.
Pose toujours une **seule** question claire et incisive √† la fois.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Ne donne jamais de r√©ponse ni de justification.
- Appuie-toi sur le discours et l‚Äôhistorique, sans t‚Äô√©carter du sujet.

Contexte de la conf√©rence :
- Sujet : AI in healthcare
- Interlocuteur (guest) : CEO

Discours d'o

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:45, 45.35s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "Today, our ministry is proud to announce the launch of MedAI, a national platform using artificial intelligence to improve patient diagnosis, optimize hospital logi...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords):  Med (0.349), AI (0.185), Today (0.064), ized (0.043),  national (0.043),  hospital (0.034), onym (0.032),  platform (0.032)
‚úÖ LIME ‚Üí LIME placeholder output: [('a', -0.03442267440666747), ('artificial', 0.02559530294966422), ('that', 0.024816208228100763)]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.
‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un Minister of Health.
Le sujet de la conf√©rence est : AI in National Healthcare Systems.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Investigative Hawk**, un journaliste d‚Äôinvestigation.
Ton r√¥le : exposer les incoh√©rences, demander des preuves, creuser les faits.
Pose toujours une **seule** question claire et incisive √† la fois.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Ne donne jamais de r√©ponse ni de justification.
- Appuie-toi sur le discours et l‚Äôhistorique, sans t‚Äô√©carter du sujet.

Contexte de la conf√©rence :
- Sujet : AI in National Healthcare Sy

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:47, 47.54s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "We are committed to ensuring that all data is anonymized and securely stored" (sim=0.25)
‚Ä¢ "Today, our ministry is proud to announce the launch of MedAI, a national...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords):  Med (0.114), AI (0.112),  diagnosis (0.085),  patient (0.085),  stored (0.076),  hospital (0.052), ize (0.047),  optim (0.047)
‚úÖ LIME ‚Üí LIME placeholder output: [('patient', 0.024924544029445987), ('are', -0.02319184395679302), ('diagnosis', 0.022292903017001604)]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.
‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un Minister of Health.
Le sujet de la conf√©rence est : AI in National Healthcare Systems.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Investigative Hawk**, un journaliste d‚Äôinvestigation.
Ton r√¥le : exposer les incoh√©rences, demander des preuves, creuser les faits.
Pose toujours une **seule** question claire et incisive √† la fois.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Ne donne jamais de r√©ponse ni de justification.
- Appuie-toi sur le discours et l‚Äôhistorique, sans t‚Äô√©carter du sujet.

Contexte de la conf√©rence :
- Sujet : AI in National Healthcare Sy

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:46, 46.84s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "Today, our ministry is proud to announce the launch of MedAI, a national platform using artificial intelligence to improve patient diagnosis, optimize hospital logi...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords):  Med (0.217),  diagnosis (0.147), AI (0.138),  patient (0.124),  stored (0.102),  improve (0.021),  data (0.018),  ensuring (0.015)
‚úÖ LIME ‚Üí LIME placeholder output: [('waiting', 0.02717155569746362), ('while', 0.02515855620762124), ('doctors', 0.023459384301567954)]


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== RAW MODEL OUTPUT ===

You are an **AI communication evaluator**.
Analyze the following simulated press conference between a journalist (investigative_hawk)
and a guest (Minister of Health) on the topic "AI in National Healthcare Systems".

Opening speech:
"""Today, our ministry is proud to announce the launch of MedAI, a national platform using artificial intelligence to improve patient diagnosis, optimize hospital logistics, and reduce waiting times. With AI assistance, doctors can focus more on patients while algorithms handle data analysis. We are committed to ensuring that all data is anonymized and securely stored"""

Full conversation:
Journalist: Minister of Health, could you provide specific examples of how MedAI has already improved patient diagnosis, optimized hospital logistics, and reduced waiting times? And could you elaborate on the methods used to ensure that all data is anonymized and securely stored?
Guest: ok i don t know
Journalist: Minister of Health, could you

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.
‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un Presidential Candidate.
Le sujet de la conf√©rence est : AI and the Future of Jobs.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Human-Interest Reporter**, un journaliste empathique.
Ton r√¥le : r√©v√©ler les impacts humains, sociaux et √©motionnels derri√®re les d√©cisions.
Pose des questions centr√©es sur les personnes affect√©es.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Pas de justification, ni d‚Äôanalyse morale.
- Cherche √† humaniser le d√©bat.

Contexte de la conf√©rence :
- Sujet : AI and the Future of Jobs
- Interlocuteur (guest) : Presidential Candidate



  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:47, 47.39s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "Our administration will launch the National AI Transition Fund, helping companies modernize responsibly while protecting workers" (sim=0.32)
‚Ä¢ "We will invest in re...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords):  Fund (0.136),  modern (0.102),  Trans (0.090), ition (0.084),  workers (0.082),  respons (0.063), ibly (0.063),  protecting (0.039)
‚úÖ LIME ‚Üí LIME placeholder output: [('nation', 0.03238040948328166), ('sure', -0.030626526785016196), ('work', 0.02852358153192181)]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.
‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un Presidential Candidate.
Le sujet de la conf√©rence est : AI and the Future of Jobs.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Human-Interest Reporter**, un journaliste empathique.
Ton r√¥le : r√©v√©ler les impacts humains, sociaux et √©motionnels derri√®re les d√©cisions.
Pose des questions centr√©es sur les personnes affect√©es.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Pas de justification, ni d‚Äôanalyse morale.
- Cherche √† humaniser le d√©bat.

Contexte de la conf√©rence :
- Sujet : AI and the Future of Jobs
- Interlocuteur (guest) : Presidential Candidate



  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:49, 49.10s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "Our administration will launch the National AI Transition Fund, helping companies modernize responsibly while protecting workers" (sim=0.17)
‚Ä¢ "We will invest in re...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords):  Fund (0.139),  Trans (0.119),  AI (0.101), ition (0.099),  retr (0.074), aining (0.074),  National (0.067),  programs (0.055)
‚úÖ LIME ‚Üí LIME placeholder output: [('way', 0.022073372067520646), ('administration', -0.021328893284960158), ('them', -0.020377936242076736)]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.
‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un Presidential Candidate.
Le sujet de la conf√©rence est : AI and the Future of Jobs.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Human-Interest Reporter**, un journaliste empathique.
Ton r√¥le : r√©v√©ler les impacts humains, sociaux et √©motionnels derri√®re les d√©cisions.
Pose des questions centr√©es sur les personnes affect√©es.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Pas de justification, ni d‚Äôanalyse morale.
- Cherche √† humaniser le d√©bat.

Contexte de la conf√©rence :
- Sujet : AI and the Future of Jobs
- Interlocuteur (guest) : Presidential Candidate



  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:48, 48.99s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "We will invest in retraining programs, support ethical AI development, and make sure that every innovation creates opportunities ‚Äî not unemployment" (sim=0.23)
‚Ä¢ "O...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords): aining (0.184),  retr (0.184),  programs (0.147),  administration (0.081),  invest (0.032),  workers (0.028),  AI (0.026),  National (0.021)
‚úÖ LIME ‚Üí LIME placeholder output: [('nation', -0.028708289009143854), ('vision', 0.025398128301120326), ('opportunities', 0.02459618213124383)]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.
‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un Presidential Candidate.
Le sujet de la conf√©rence est : AI and the Future of Jobs.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Human-Interest Reporter**, un journaliste empathique.
Ton r√¥le : r√©v√©ler les impacts humains, sociaux et √©motionnels derri√®re les d√©cisions.
Pose des questions centr√©es sur les personnes affect√©es.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Pas de justification, ni d‚Äôanalyse morale.
- Cherche √† humaniser le d√©bat.

Contexte de la conf√©rence :
- Sujet : AI and the Future of Jobs
- Interlocuteur (guest) : Presidential Candidate



  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:48, 48.01s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "Our administration will launch the National AI Transition Fund, helping companies modernize responsibly while protecting workers" (sim=0.16)
‚Ä¢ "We will invest in re...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords):  Fund (0.132),  Trans (0.099), ition (0.099),  retr (0.099), aining (0.099),  AI (0.084),  programs (0.079),  National (0.064)
‚úÖ LIME ‚Üí LIME placeholder output: [('protecting', -0.02874324049416344), ('AI', 0.02674069612439444), ('for', -0.024854791421444603)]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.
‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un Presidential Candidate.
Le sujet de la conf√©rence est : AI and the Future of Jobs.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Human-Interest Reporter**, un journaliste empathique.
Ton r√¥le : r√©v√©ler les impacts humains, sociaux et √©motionnels derri√®re les d√©cisions.
Pose des questions centr√©es sur les personnes affect√©es.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Pas de justification, ni d‚Äôanalyse morale.
- Cherche √† humaniser le d√©bat.

Contexte de la conf√©rence :
- Sujet : AI and the Future of Jobs
- Interlocuteur (guest) : Presidential Candidate



  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:48, 48.20s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "We will invest in retraining programs, support ethical AI development, and make sure that every innovation creates opportunities ‚Äî not unemployment" (sim=0.16)
‚Ä¢ "T...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords):  AI (0.184),  retr (0.113), aining (0.113),  programs (0.112),  AI (0.077),  intelligence (0.039),  way (0.035),  artificial (0.033)
‚úÖ LIME ‚Üí LIME placeholder output: [('companies', 0.029522770350697006), ('National', -0.022856188294436825), ('one', -0.022682098715454478)]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


üì© Generation request received.
‚úÖ Generated response: Tu participes √† une **simulation de conf√©rence de presse interactive**.
Le journaliste (toi) m√®ne un √©change avec un invit√© (le guest) qui est un Presidential Candidate.
Le sujet de la conf√©rence est : AI and the Future of Jobs.

R√®gles globales :
- Pose **une seule question** √† chaque tour, jamais plusieurs.
- La conversation doit rester fluide et coh√©rente.
- Si l‚Äôutilisateur √©crit ‚Äúfin‚Äù ou ‚Äúmerci‚Äù, tu termines par `END`.


Tu es **Human-Interest Reporter**, un journaliste empathique.
Ton r√¥le : r√©v√©ler les impacts humains, sociaux et √©motionnels derri√®re les d√©cisions.
Pose des questions centr√©es sur les personnes affect√©es.

R√®gles :
- La question doit etre OBLIGATOIREMENT entre : <QUESTION> .... <eoa>
- Pas de justification, ni d‚Äôanalyse morale.
- Cherche √† humaniser le d√©bat.

Contexte de la conf√©rence :
- Sujet : AI and the Future of Jobs
- Interlocuteur (guest) : Presidential Candidate



  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:48, 48.74s/it]               


‚úÖ SEMANTIC ‚Üí Likely influential speech parts:
‚Ä¢ "We will invest in retraining programs, support ethical AI development, and make sure that every innovation creates opportunities ‚Äî not unemployment" (sim=0.08)
‚Ä¢ "O...
‚úÖ SHAP ‚Üí Top influential speech tokens (SHAP, no stopwords):  retr (0.143), aining (0.141),  programs (0.081),  way (0.079),  Technology (0.055),  emp (0.042),  humanity (0.036),  work (0.035)
‚úÖ LIME ‚Üí LIME placeholder output: [('the', 0.04038174553575595), ('support', -0.03513371956093428), ('other', -0.026470773744073257)]


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== RAW MODEL OUTPUT ===

You are an **AI communication evaluator**.
Analyze the following simulated press conference between a journalist (human_interest)
and a guest (Presidential Candidate) on the topic "AI and the Future of Jobs".

Opening speech:
"""My vision for our nation is one where artificial intelligence empowers people, not replaces them. We will invest in retraining programs, support ethical AI development, and make sure that every innovation creates opportunities ‚Äî not unemployment.

Our administration will launch the National AI Transition Fund, helping companies modernize responsibly while protecting workers. Technology must work for humanity ‚Äî not the other way around."""

Full conversation:
Journalist: As your administration plans to invest in retraining programs to help workers adjust to the advancements in AI, could you share specific details about the programs you envision, such as who they will be designed for, what skills they will focus on, and who will be 