In [1]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [2]:
import nltk
from nltk.cluster.util import cosine_distance
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from rouge import Rouge
import numpy as np
import networkx as nx
import gensim.downloader as api

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

stop_words = stopwords.words('english')

word_embeddings = api.load("glove-wiki-gigaword-50")

def read_article(file_name):
    with open(file_name, "r", encoding="utf-8") as file:
        filedata = file.readlines()
    article = filedata[0].split(". ")
    sentences = []

    for sentence in article:
        sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
    sentences.pop()
    return sentences, filedata[0]

def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
    all_words = list(set(sent1 + sent2))

    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
    return 1 - cosine_distance(vector1, vector2)

def tfidf_similarity(sentences):
    # Flatten sentences for TF-IDF
    sentences_flat = [" ".join(sentence) for sentence in sentences]
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(sentences_flat)
    return cosine_similarity(tfidf_matrix, tfidf_matrix)

def embedding_similarity(sentences):
    def get_sentence_vector(sentence):
        vectors = [word_embeddings[w] for w in sentence if w in word_embeddings]
        if len(vectors) == 0:
            return np.zeros(50)
        return np.mean(vectors, axis=0)

    sentence_vectors = [get_sentence_vector(sentence) for sentence in sentences]
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i != j:
                similarity_matrix[i][j] = cosine_similarity([sentence_vectors[i]], [sentence_vectors[j]])[0, 0]
    return similarity_matrix

def gen_sim_matrix(sentences, stop_words, method="bag_of_words"):
    if method == "tfidf":
        return tfidf_similarity(sentences)
    elif method == "embedding":
        return embedding_similarity(sentences)
    else:
        similarity_matrix = np.zeros((len(sentences), len(sentences)))
        for idx1 in range(len(sentences)):
            for idx2 in range(len(sentences)):
                if idx1 == idx2:
                    continue
                similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)
        return similarity_matrix

def evaluate_summary(original_text, summary_text):
    rouge = Rouge()
    scores = rouge.get_scores(summary_text, original_text)
    return scores

def generate_summary(file_name, top_n=5, method="bag_of_words"):
    summarize_text = []
    sentences, original_text = read_article(file_name)

    print("Original Text: \n")
    print(original_text)
    print("\nSummary: \n")

    sentence_similarity_matrix = gen_sim_matrix(sentences, stop_words, method)
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix)
    scores = nx.pagerank(sentence_similarity_graph)
    ranked_sentence = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
    for i in range(top_n):
        summarize_text.append(" ".join(ranked_sentence[i][1]) + ".")

    summary_text = " ".join(summarize_text)
    print(summary_text)

    rouge_scores = evaluate_summary(original_text, summary_text)
    print("\nEvaluation:\n")
    print(f"ROUGE Scores: {rouge_scores}")

generate_summary("tech.txt", top_n=3, method="embedding")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


Original Text: 

Technology has revolutionized the way humans interact, work, and live, transforming societies and industries at an unprecedented pace. At its core, technology encompasses the tools, techniques, and systems developed to solve problems, enhance productivity, and improve quality of life. From the invention of the wheel to the age of artificial intelligence, each technological leap has reshaped humanity’s trajectory. In modern times, digital technologies like the internet, smartphones, and cloud computing have interconnected the world, enabling seamless communication and access to information. Advances in fields such as biotechnology, renewable energy, and robotics are addressing global challenges like climate change, healthcare, and resource scarcity. Artificial intelligence and machine learning are automating tasks, driving innovation, and fostering breakthroughs across sectors, from autonomous vehicles to personalized medicine. However, technology also brings challenges