<div style='text-align: center;'>
    <span style='font-size: 30px; font-weight: bold;'>
        CM3070 Final Project
    </span>
</div>
<div style='text-align: center;'>
    <span style='font-size: 30px; font-weight: bold;'>
        Final Report
    </span>
</div>

# 9. User Testing and Feedback

In [1]:
# --------------------------------------------------------------------------------------------------
# Libraries
# --------------------------------------------------------------------------------------------------
import gradio as gr                                                      # For building the interface
import numpy as np                                                       # For numerical operations
import joblib                                                            # For model serialization
import os                                                                # For interacting with the operating system
import pandas as pd                                                      # For data manipulation
from datetime import datetime                                            # For handling date and time
import tensorflow as tf                                                  # For deep learning
from tensorflow.keras.models import load_model                           # For loading Keras models
from tensorflow.keras.preprocessing.sequence import pad_sequences        # For padding sequences
from transformers import TFBertForSequenceClassification, BertTokenizer  # For BERT-based models and tokenization

# --------------------------------------------------------------------------------------------------
# Load Models
# --------------------------------------------------------------------------------------------------
vectorizer_tfidf = joblib.load("saved_models/vectorizer_tfidf.pkl")  # TF-IDF Vectorizer
model_web_nb = joblib.load("saved_models/model_nb.pkl")              # Naive Bayes
model_web_lr = joblib.load("saved_models/model_lr_best.pkl")         # Logistic Regression

model_web_cnn = load_model("saved_models/model_cnn.keras")           # CNN
tokenizer_cnn = joblib.load("saved_models/tokenizer_cnn.pkl")        # Tokenizer for CNN

model_bert_tf = TFBertForSequenceClassification.from_pretrained("saved_models/model_bert_tf")  # BERT
tokenizer_bert_tf = BertTokenizer.from_pretrained("saved_models/model_bert_tf")                # Tokenizer for BERT

model_meta = joblib.load("saved_models/model_meta.pkl")  # Stacking Model

# --------------------------------------------------------------------------------------------------
# FUNCTION - Classifies the news article using multiple models and returns a final prediction
# --------------------------------------------------------------------------------------------------
def classify_news(article_text):
    meta_features = []
    
    # Naive Bayes
    X_tfidf = vectorizer_tfidf.transform([article_text])
    probs_nb = model_web_nb.predict_proba(X_tfidf)[0]  # (2,)
    label_nb = "Real" if probs_nb[1] > probs_nb[0] else "Fake"
    confidence_nb = probs_nb[1] if probs_nb[1] > probs_nb[0] else probs_nb[0]
    meta_features.append(probs_nb[1])  # Store positive class probability

    # Logistic Regression
    probs_lr = model_web_lr.predict_proba(X_tfidf)[0]  # (2,)
    label_lr = "Real" if probs_lr[1] > probs_lr[0] else "Fake"
    confidence_lr = probs_lr[1] if probs_lr[1] > probs_lr[0] else probs_lr[0]
    meta_features.append(probs_lr[1])

    # CNN Model
    seq = tokenizer_cnn.texts_to_sequences([article_text])
    padded_seq = pad_sequences(seq, maxlen=500, padding='post', truncating='post')
    probs_cnn = model_web_cnn.predict(padded_seq)[0]  # (1,)
    probs_cnn = np.array([1 - probs_cnn[0], probs_cnn[0]])  # Convert to (2,)
    label_cnn = "Real" if probs_cnn[1] > probs_cnn[0] else "Fake"
    confidence_cnn = probs_cnn[1] if probs_cnn[1] > probs_cnn[0] else probs_cnn[0]
    meta_features.append(probs_cnn[1])

    # BERT Model
    inputs = tokenizer_bert_tf(article_text, truncation=True, padding=True, max_length=128, return_tensors="tf")
    logits_bert_tf = model_bert_tf(**inputs).logits
    probs_bert_tf = tf.nn.softmax(logits_bert_tf, axis=1).numpy()[0]  # (2,)
    label_bert_tf = "Real" if probs_bert_tf[1] > probs_bert_tf[0] else "Fake"
    confidence_bert_tf = probs_bert_tf[1] if probs_bert_tf[1] > probs_bert_tf[0] else probs_bert_tf[0]
    meta_features.append(probs_bert_tf[1])

    # Stacking Model (Final Prediction)
    X_meta = np.array(meta_features).reshape(1, -1)  # Reshape for prediction
    probs_meta = model_meta.predict_proba(X_meta)[0]  # (2,)
    pred_class_meta = np.argmax(probs_meta)  # 0 = Fake, 1 = Real

    label_meta = "Fake" if pred_class_meta == 0 else "Real"
    confidence_meta = probs_meta[pred_class_meta]  # Extract confidence score

    # Final Prediction: Combine the result and confidence
    final_prediction = f"{label_meta} (Confidence: {confidence_meta:.2f})"
    
    # Model Predictions (for display)
    model_predictions = "\n".join([
        f"Naive Bayes: {label_nb} (Confidence: {confidence_nb:.2f})",
        f"Logistic Regression: {label_lr} (Confidence: {confidence_lr:.2f})",
        f"CNN: {label_cnn} (Confidence: {confidence_cnn:.2f})",
        f"BERT: {label_bert_tf} (Confidence: {confidence_bert_tf:.2f})"
    ])
    
    return final_prediction, model_predictions

# --------------------------------------------------------------------------------------------------
# FUNCTION - Save user feedback to a CSV file, including model predictions
# --------------------------------------------------------------------------------------------------
def save_feedback(article_text, final_prediction, model_predictions, feedback_accuracy, feedback_confidence, 
                  feedback_performance, feedback_ui, feedback_usefulness, feedback_recommendation, feedback_text):

    # Change '\n' to ',' for saving to CSV
    model_predictions_csv = model_predictions.replace("\n", ", ")
    
    # Collect feedback inputs
    all_feedback = []

    # Collect accuracy-related feedback (Radio: correct/wrong)
    if feedback_accuracy:
        all_feedback.append(feedback_accuracy)

    # Collect confidence-related feedback (Checkbox: too high/low)
    if feedback_confidence:
        all_feedback.append("The confidence score seemed too high/low")

    # Collect performance-related feedback (Radio: fast/slow)
    if feedback_performance:
        all_feedback.append(feedback_performance)

    # Collect UI-related feedback (Radio: easy/confusing)
    if feedback_ui:
        all_feedback.append(feedback_ui)

    # Collect overall feedback (Radio: useful/not useful)
    if feedback_usefulness:
        all_feedback.append(feedback_usefulness)

    # Collect recommendation feedback (Checkbox: recommend or not)
    if feedback_recommendation:
        all_feedback.append("I would recommend this to others")

    # Add additional textual feedback (open-ended)
#    if feedback_text:
#        all_feedback.append(feedback_text)
    
    # Prepare the feedback for saving (convert all feedback to string format)
    all_feedback = [str(item) for item in all_feedback]

    # Prepare data to be saved
    data = {
        "Article Text": article_text,                       # Store article text
        "Final Prediction": final_prediction,               # Store final prediction (ensemble)
        "Model Predictions": model_predictions_csv,  # Store individual model predictions
        "Feedback Selected": ", ".join(all_feedback),       # Store all feedback options
        "User Feedback": feedback_text                      # User's open-ended feedback
    }

    # Generate a timestamp for saving
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    FEEDBACK_FILE = f"user_feedback/user_feedback_{timestamp}.csv"

    # Save feedback to CSV
    df = pd.DataFrame([data])
    if os.path.exists(FEEDBACK_FILE):
        df.to_csv(FEEDBACK_FILE, mode='a', header=False, index=False)  # Append to existing file
    else:
        df.to_csv(FEEDBACK_FILE, mode='w', header=True, index=False)  # Create new file

    return "✅ Feedback saved! Thank you for your input."

# --------------------------------------------------------------------------------------------------
# Create Gradio Interface
# --------------------------------------------------------------------------------------------------
with gr.Blocks() as iface:
    gr.Markdown("# 📰 Fake News Detector (AI-Powered)")

    with gr.Row():
        with gr.Column(scale=3):  # Input section (60%)
            article_text = gr.Textbox(
                lines=6,
                placeholder="Paste a news article here...",
                label="📝 Enter News Article"
            )
            submit_button = gr.Button("Classify News", elem_id="submit_button")  # Ensure ID for the button

        with gr.Column(scale=2):  # Output section (40%)
            prediction_output = gr.Textbox(label="📢 Final Prediction (Ensemble)", interactive=False)
            model_predictions_output = gr.Textbox(lines=4, label="📊 Model Predictions", interactive=False)

    submit_button.click(fn=classify_news, inputs=[article_text], outputs=[prediction_output, model_predictions_output])

    # User Feedback Section
    gr.Markdown("## 📝 User Feedback")

    # Section 1 - Prediction Accuracy and Confidence Level
    gr.Markdown("### 🎯 Prediction Accuracy Issues")
    feedback_accuracy = gr.Radio(
        choices=[
            "The prediction was correct",
            "The prediction was wrong",
        ],
        label="Select any that apply",
        elem_id="feedback_accuracy"
    )

    feedback_confidence = gr.Checkbox(
        label="The confidence score seemed too high/low",
        elem_id="feedback_confidence"
    )
    
    # Section 2 - Model Performance & Speed
    gr.Markdown("### ⚡ Model Performance & Speed")
    feedback_performance = gr.Radio(
        choices=[
            "The response was fast",
            "The response was slow",
        ],
        label="Select any that apply",
        elem_id="feedback_performance"
    )
    
    # Section 3 - User Experience and Interface
    gr.Markdown("### 🎨 User Experience & Interface")
    feedback_ui = gr.Radio(
        choices=[
            "The interface was easy to use",
            "The interface was confusing",
        ],
        label="Select any that apply",
        elem_id="feedback_ui"
    )

    # Section 4 - Overall Feedback
    gr.Markdown("### 🌟 Overall Feedback")
    feedback_usefulness = gr.Radio(
        choices=[
            "The system was useful",
            "The system was not useful"
        ],
        label="Select any that apply",
        elem_id="feedback_usefulness"
    )

    feedback_recommendation = gr.Checkbox(
        label="I would recommend this to others",
        elem_id="feedback_recommendation"
    )
    
    feedback_text = gr.Textbox(lines=2, placeholder="Any additional feedback?", label="Your Feedback", elem_id="feedback_text")

    # Submit feedback button
    save_feedback_button = gr.Button("Submit Feedback", elem_id="save_feedback_button")
    feedback_message = gr.Textbox(label="📌 Feedback Status", interactive=False)

    save_feedback_button.click(
        fn=save_feedback,
        inputs=[
            article_text, prediction_output, model_predictions_output,
            feedback_accuracy, feedback_confidence, feedback_performance, feedback_ui, 
            feedback_usefulness, feedback_recommendation, feedback_text
        ],
        outputs=[feedback_message]
    )

iface.launch(share=True)

  from .autonotebook import tqdm as notebook_tqdm
2025-03-30 15:40:56.104808: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743320456.888523    5821 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743320457.075907    5821 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743320458.752341    5821 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743320458.752377    5821 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743320458.752380    5821

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://a21897d156cc395e29.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




I0000 00:00:1743320627.133737    6229 service.cc:152] XLA service 0x763d28008ef0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743320627.133806    6229 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce GTX 1080, Compute Capability 6.1
2025-03-30 15:43:47.524419: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1743320627.921854    6229 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step


I0000 00:00:1743320631.908567    6229 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


# 10. Retrieving user feedback

In [2]:
# --------------------------------------------------------------------------------------------------
# Libraries
# --------------------------------------------------------------------------------------------------
import gradio as gr            # For building the interface
import os                      # For interacting with the operating system
import pandas as pd            # For data manipulation
from datetime import datetime  # For handling date and time

# Define the folder where feedback files are stored
FEEDBACK_FOLDER = "user_feedback"

# --------------------------------------------------------------------------------------------------
# FUNCTION - Get all feedback files sorted by oldest first
# --------------------------------------------------------------------------------------------------
def get_sorted_feedback_files():
    feedback_path = os.path.join(os.getcwd(), FEEDBACK_FOLDER)  # Full path
    if not os.path.exists(feedback_path):  
        return []
    
    files = [os.path.join(feedback_path, f) for f in os.listdir(feedback_path) 
             if f.startswith("user_feedback_") and f.endswith(".csv")]

    # ✅ Sort files by actual datetime extracted from filename
    def extract_datetime(file_path):
        try:
            filename = os.path.basename(file_path)
            parts = filename.split("_")
            date_part = parts[2]                # YYYYMMDD
            time_part = parts[3].split(".")[0]  # HHMMSS
            
            return datetime.strptime(f"{date_part} {time_part}", "%Y%m%d %H%M%S")
        except Exception as e:
            return datetime.min  # If error, place at the very start

    files.sort(key=extract_datetime, reverse=True)  # ✅ Sorted by datetime (newest → oldest)

    return files

# --------------------------------------------------------------------------------------------------
# FUNCTION - Extract timestamp from filename
# --------------------------------------------------------------------------------------------------
def extract_timestamp(filename):
    try:
        parts = filename.split("_")         # Split filename by underscores
        date_part = parts[2]                # Extract YYYYMMDD
        time_part = parts[3].split(".")[0]  # Extract HHMMSS without ".csv"

        formatted_date = f"{date_part[:4]}-{date_part[4:6]}-{date_part[6:]}"  # YYYY-MM-DD
        formatted_time = f"{time_part[:2]}:{time_part[2:4]}:{time_part[4:]}"  # HH:MM:SS

        return f"{formatted_date} {formatted_time}"
    except Exception as e:
        return "Unknown Date & Time"  # Handle unexpected filename formats

# --------------------------------------------------------------------------------------------------
# FUNCTION - Load and format feedback data as a Markdown table
# --------------------------------------------------------------------------------------------------
def load_all_feedback():
    feedback_files = get_sorted_feedback_files()

    if not feedback_files:
        return "No feedback data found."

    feedback_text = ""
    total_feedback = 0  # Initialize total feedback counter
    feedback_counts = {
        "The prediction was correct": 0,
        "The prediction was wrong": 0,
        "The confidence score seemed too high/low": 0,
        "The response was fast": 0,
        "The response was slow": 0,
        "The interface was easy to use": 0,
        "The interface was confusing": 0,
        "The system was useful": 0,
        "The system was not useful": 0,
        "I would recommend this to others": 0
    }

    # Read all CSV files one by one (now sorted newest → oldest)
    entry_count = 1
    for file in feedback_files:
        try:
            df = pd.read_csv(file)
            filename = os.path.basename(file)        # Get filename only
            timestamp = extract_timestamp(filename)  # Extract date & time from filename

            for index, row in df.iterrows():
                feedback_items = row["Feedback Selected"].replace("[", "").replace("]", "").replace("'", "").split(", ")

                # Update feedback counts
                for feedback_item in feedback_items:
                    if feedback_item in feedback_counts:
                        feedback_counts[feedback_item] += 1

                model_predictions = row["Model Predictions"].replace("[", "").replace("]", "").replace("'", "").split(", ")
                
                feedback_text += f"\n### Feedback Entry {entry_count}\n\n"
                feedback_text += "| **Field** | **Details** |\n"
                feedback_text += "|---|---|\n"
                feedback_text += f"| 📂 Filename | {filename} |\n"
                feedback_text += f"| 🕒 Date & Time | {timestamp} |\n"
                feedback_text += f"| 📰 Article | {row['Article Text']} |\n"
                feedback_text += f"| 📢  Final Prediction (Ensemble) | {row['Final Prediction']} |\n"

                feedback_text += "| 📊 Model Predictions | "
                feedback_text += "<br>".join(model_predictions) + " |\n"

                feedback_text += "| 📝 User Feedback | "
                feedback_text += "<br>".join(feedback_items) + " |\n"

                user_feedback = row['User Feedback'].replace('\n', '<br>')
                feedback_text += f"| ✏ Additional Comments | {user_feedback} |\n"

                feedback_text += "\n---\n" 
                entry_count += 1             # Increment feedback entry count
                total_feedback += 1          # Count total feedback entries

        except Exception as e:
            feedback_text += f"\n⚠ Error reading {file}: {e}\n"

    # Add total feedback count and consolidated feedback counts table at the top
    feedback_summary = f"## 📊 Total Feedback Entries: {total_feedback}\n\n"

    # Create a table showing counts of each feedback item
    feedback_summary += "### Consolidated Feedback Counts\n\n"
    feedback_summary += "| **Feedback Type** | **Count** |\n"
    feedback_summary += "|---|---|\n"
    
    for feedback_type, count in feedback_counts.items():
        feedback_summary += f"| {feedback_type} | {count} |\n"

    # Return consolidated feedback summary and detailed feedback entries
    return feedback_summary + feedback_text.strip()

# --------------------------------------------------------------------------------------------------
# Automatically refresh feedback when new feedback is saved
# --------------------------------------------------------------------------------------------------
with gr.Blocks() as iface:
    gr.Markdown("# 📝 All User Feedback Records (Newest First)")

    # Display existing feedback
    feedback_display = gr.Markdown(load_all_feedback())  

    # Button to manually refresh
    refresh_button = gr.Button("Refresh Feedback")  

    # Refresh feedback when clicked
    refresh_button.click(fn=load_all_feedback, inputs=[], outputs=[feedback_display])  

iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://82c9b3f078664888d6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


