In [7]:
!pip install nltk sumy langdetect scikit-learn rouge


Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [10]:
import nltk
from nltk.corpus import stopwords
from langdetect import detect
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sklearn.feature_extraction.text import TfidfVectorizer
import tkinter as tk
from tkinter import messagebox, scrolledtext, Toplevel
import numpy as np
from rouge import Rouge

nltk.download('punkt')
nltk.download('stopwords')

# Preprocessing function
def preprocess_text(text):
    try:
        detected_lang = detect(text)  # Detect the language
        stop_words = stopwords.words(detected_lang)  # Load stopwords
    except:
        detected_lang = 'english'  # Default to English if detection fails
        stop_words = stopwords.words('english')
    
    words = nltk.word_tokenize(text.lower())  # Tokenize text
    filtered_words = [word for word in words if word.isalnum() and word not in stop_words]
    
    return ' '.join(filtered_words), detected_lang

# Summarization Techniques
def lsa_summarization(text, num_sentences=2):
    preprocessed_text, detected_lang = preprocess_text(text)
    parser = PlaintextParser.from_string(preprocessed_text, Tokenizer(detected_lang))
    summarizer = LsaSummarizer()
    summary = summarizer(parser.document, num_sentences)
    return ' '.join(str(sentence) for sentence in summary)

def tfidf_summarization(text, num_sentences=2):
    sentences = nltk.sent_tokenize(text)
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
    sentence_scores = np.array(tfidf_matrix.sum(axis=1)).ravel()
    ranked_sentences = [sentences[i] for i in sentence_scores.argsort()[-num_sentences:][::-1]]
    return ' '.join(ranked_sentences)

def frequency_based_summarization(text, num_sentences=2):
    words = nltk.word_tokenize(text.lower())
    freq = nltk.FreqDist(words)
    sentences = nltk.sent_tokenize(text)
    ranked_sentences = sorted(sentences, key=lambda s: sum(freq[word.lower()] for word in nltk.word_tokenize(s)), reverse=True)
    return ' '.join(ranked_sentences[:num_sentences])

def length_based_summarization(text, num_sentences=2):
    sentences = nltk.sent_tokenize(text)
    ranked_sentences = sorted(sentences, key=len, reverse=True)
    return ' '.join(ranked_sentences[:num_sentences])

# GUI functions
def summarize_text():
    input_text = text_input.get("1.0", tk.END).strip()
    num_sentences = int(sentence_input.get())
    
    if not input_text:
        messagebox.showwarning("Input Error", "Please enter text to summarize.")
        return

    selected_method = method_var.get()
    
    try:
        if selected_method == "LSA":
            summary = lsa_summarization(input_text, num_sentences)
        elif selected_method == "TF-IDF":
            summary = tfidf_summarization(input_text, num_sentences)
        elif selected_method == "Frequency-Based":
            summary = frequency_based_summarization(input_text, num_sentences)
        elif selected_method == "Length-Based":
            summary = length_based_summarization(input_text, num_sentences)

        # Show the summary
        text_output.delete('1.0', tk.END)  # Clear the output box
        text_output.insert(tk.INSERT, f"{selected_method} Summary:\n{summary}\n\n")

        # Store the current summary
        summaries[selected_method] = summary

    except Exception as e:
        messagebox.showerror("Error", str(e))

def check_score():
    selected_method = method_var.get()
    if selected_method not in summaries:
        messagebox.showwarning("Score Error", "Please summarize the text first before checking scores.")
        return
    
    reference_text = text_input.get("1.0", tk.END).strip()
    generated_summary = summaries[selected_method]
    
    # Calculate ROUGE score for the selected method
    rouge = Rouge()
    scores = rouge.get_scores(generated_summary, reference_text, avg=True)

    # Show the score in a new window
    score_window = Toplevel(root)
    score_window.title("ROUGE Score")

    tk.Label(score_window, text=f"{selected_method} ROUGE-1 F1 Score: {scores['rouge-1']['f']:.4f}").pack(pady=5)
    tk.Label(score_window, text=f"{selected_method} ROUGE-2 F1 Score: {scores['rouge-2']['f']:.4f}").pack(pady=5)
    tk.Label(score_window, text=f"{selected_method} ROUGE-L F1 Score: {scores['rouge-l']['f']:.4f}").pack(pady=5)

def compare_scores():
    reference_text = text_input.get("1.0", tk.END).strip()
    if not reference_text:
        messagebox.showwarning("Comparison Error", "Please enter text to compare scores.")
        return

    selected_methods = [method for method in methods if method_var.get() == method]

    if not selected_methods:
        messagebox.showwarning("Comparison Error", "Please summarize the text using at least one method.")
        return

    rouge = Rouge()
    best_score = 0
    best_method = ""

    for method in selected_methods:
        generated_summary = summaries.get(method, "")
        if not generated_summary:
            continue

        scores = rouge.get_scores(generated_summary, reference_text, avg=True)
        score = scores['rouge-1']['f']  # Using ROUGE-1 F1 score for comparison

        if score > best_score:
            best_score = score
            best_method = method

    # Show the best method and score
    messagebox.showinfo("Best Method", f"The best method is {best_method} with a ROUGE-1 F1 Score of {best_score:.4f}")

# GUI Setup
root = tk.Tk()
root.title("Multilingual Text Summarizer")

# Text Input
tk.Label(root, text="Input Text:").pack(pady=5)
text_input = scrolledtext.ScrolledText(root, height=10, width=60)
text_input.pack(pady=5)

# Number of Sentences
tk.Label(root, text="Number of Sentences:").pack(pady=5)
sentence_input = tk.Entry(root)
sentence_input.pack(pady=5)
sentence_input.insert(tk.END, '2')

# Summarization Method Selection
tk.Label(root, text="Choose Summarization Method:").pack(pady=5)
method_var = tk.StringVar(value="LSA")
methods = ["LSA", "TF-IDF", "Frequency-Based", "Length-Based"]
for method in methods:
    tk.Radiobutton(root, text=method, variable=method_var, value=method).pack(pady=2)

# Summarize Button
summarize_button = tk.Button(root, text="Summarize", command=summarize_text)
summarize_button.pack(pady=10)

# Check Score Button
check_score_button = tk.Button(root, text="Check ROUGE Score", command=check_score)
check_score_button.pack(pady=10)

# Compare Scores Button
compare_scores_button = tk.Button(root, text="Compare All Scores", command=compare_scores)
compare_scores_button.pack(pady=10)

# Text Output
tk.Label(root, text="Summary:").pack(pady=5)
text_output = scrolledtext.ScrolledText(root, height=10, width=60)
text_output.pack(pady=5)

# Dictionary to hold summaries
summaries = {}

root.mainloop()


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
