# Final Code for Abstractive Based Summary

## Code for Abstractive Type

In [1]:
#Using bm25 algorithm
import math
from six import iteritems
from six.moves import range
from functools import partial
from multiprocessing import Pool
from jr_abst.utils import effective_n_jobs

PARAM_K1 = 1.5
PARAM_B = 0.75
EPSILON = 0.25

class BM25(object):
    def __init__(self, corpus):
        self.corpus_size = 0
        self.avgdl = 0
        self.doc_freqs = []
        self.idf = {}
        self.doc_len = []
        self._initialize(corpus)

    def _initialize(self, corpus):
        nd = {}  # word -> number of documents with word
        num_doc = 0
        for document in corpus:
            self.corpus_size += 1
            self.doc_len.append(len(document))
            num_doc += len(document)
            frequencies = {}
            for word in document:
                if word not in frequencies:
                    frequencies[word] = 0
                frequencies[word] += 1
            self.doc_freqs.append(frequencies)

            for word, freq in iteritems(frequencies):
                if word not in nd:
                    nd[word] = 0
                nd[word] += 1

        self.avgdl = float(num_doc) / self.corpus_size
        #collecting the idf sum to calculate an average idf for some epsilon value
        idf_sum = 0
        #collect words with negative idf to set them a special epsilon value and sometimes
        #idf can be negative if word is contained in more than half of documents
        negative_idfs = []
        for word, freq in iteritems(nd):
            idf = math.log(self.corpus_size - freq + 0.5) - math.log(freq + 0.5)
            self.idf[word] = idf
            idf_sum += idf
            if idf < 0:
                negative_idfs.append(word)
        self.average_idf = float(idf_sum) / len(self.idf)

        eps = EPSILON * self.average_idf
        for word in negative_idfs:
            self.idf[word] = eps

    def get_score(self, document, index):
        score = 0
        doc_freqs = self.doc_freqs[index]
        for word in document:
            if word not in doc_freqs:
                continue
            score += (self.idf[word] * doc_freqs[word] * (PARAM_K1 + 1)
                      / (doc_freqs[word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * self.doc_len[index] / self.avgdl)))
        return score

    def get_scores(self, document):
        scores = [self.get_score(document, index) for index in range(self.corpus_size)]
        return scores

    def get_scores_bow(self, document):
        scores = []
        for index in range(self.corpus_size):
            score = self.get_score(document, index)
            if score > 0:
                scores.append((index, score))
        return scores

    
def _get_scores_bow(bm25, document):
    return bm25.get_scores_bow(document)


def _get_scores(bm25, document):
    return bm25.get_scores(document)


def iter_bm25_bow(corpus, n_jobs=1):
    bm25 = BM25(corpus)
    n_processes = effective_n_jobs(n_jobs)
    if n_processes == 1:
        for doc in corpus:
            yield bm25.get_scores_bow(doc)
        return
    
    get_score = partial(_get_scores_bow, bm25)
    pool = Pool(n_processes)

    for bow in pool.imap(get_score, corpus):
        yield bow
    pool.close()
    pool.join()


def get_bm25_weights(corpus, n_jobs=1):
    bm25 = BM25(corpus)
    n_processes = effective_n_jobs(n_jobs)
    if n_processes == 1:
        weights = [bm25.get_scores(doc) for doc in corpus]
        return weights
    
    get_score = partial(_get_scores, bm25)
    pool = Pool(n_processes)
    weights = pool.map(get_score, corpus)
    pool.close()
    pool.join()
    return weights

In [2]:
#graph
from abc import ABCMeta, abstractmethod

class IGraph(object):
    __metaclass__ = ABCMeta

    #abstractmethods
    def __len__(self):
        #Returns number of nodes in graph.
        pass

    def nodes(self):
        #Returns all nodes of graph.
        pass

    def edges(self):
        #Returns all edges of graph.
        pass

    def neighbors(self, node):
        #Return all nodes that are directly accessible from given node.
        pass

    def has_node(self, node):
        #Returns whether the requested node exists.
        pass

    def add_node(self, node):
        #Adds given node to the graph.
        pass

    def add_edge(self, edge, wt=1):
        #Adds an edge to the graph connecting two nodes. An edge, here is a tuple of two nodes.
        pass

    def has_edge(self, edge):
        #Returns whether an edge exists.
        pass

    def edge_weight(self, edge):
        #Returns weigth of given edge.
        pass

    def del_node(self, node):
        #Removes node and its edges from the graph.
        pass


class Graph(IGraph):
    #Implementing the undirected graph, based on IGraph.
    DEFAULT_WEIGHT = 0

    def __init__(self):
        self.node_neighbors = {}

    def __len__(self):
        #Returns number of nodes in graph.
        return len(self.node_neighbors)

    def has_edge(self, edge):
        #Returns whether an edge exists.
        u, v = edge
        return (u in self.node_neighbors
                and v in self.node_neighbors
                and v in self.node_neighbors[u]
                and u in self.node_neighbors[v])

    def edge_weight(self,edge):
        #Returns weight of given edge.
        u, v = edge
        return self.node_neighbors.get(u, {}).get(v, self.DEFAULT_WEIGHT)

    def neighbors(self,node):
        #Returns all nodes that are directly accessible from given node.
        return list(self.node_neighbors[node])

    def has_node(self,node):
        #Returns whether the requested node exists.
        return node in self.node_neighbors

    def add_edge(self, edge, wt=1):
        #Adds an edge to the graph connecting two nodes.
        if wt == 0.0:
            # empty edge is similar to no edge at all or removing it
            if self.has_edge(edge):
                self.del_edge(edge)
            return
        u, v = edge
        if v not in self.node_neighbors[u] and u not in self.node_neighbors[v]:
            self.node_neighbors[u][v] = wt
            if u != v:
                self.node_neighbors[v][u] = wt
        else:
            raise ValueError("Edge (%s, %s) already in graph" % (u, v))

    def add_node(self, node):
        #Adds given node to the graph.
        if node in self.node_neighbors:
            raise ValueError("Node %s already in graph" % node)
        self.node_neighbors[node] = {}

    def nodes(self):
        #Returns all nodes of the graph.
        return list(self.node_neighbors)

    def edges(self):
        #Returns all edges of the graph.
        return list(self.iter_edges())

    def iter_edges(self):
        #Returns iterator of all edges of the graph.
        for u in self.node_neighbors:
            for v in self.node_neighbors[u]:
                yield (u,v)

    def del_node(self, node):
        #Removes given node and its edges from the graph.
        for each in self.neighbors(node):
            if each != node:
                self.del_edge((each, node))
        del self.node_neighbors[node]

    def del_edge(self, edge):
        #Removes given edges from the graph.
        u, v = edge
        del self.node_neighbors[u][v]
        if u != v:
            del self.node_neighbors[v][u]

In [3]:
#Commons
from jr_abst.summarization.graph import Graph

def build_graph(sequence):
    graph = Graph()
    for item in sequence:
        if not graph.has_node(item):
            graph.add_node(item)
    return graph


def remove_unreachable_nodes(graph):
    for node in graph.nodes():
        if all(graph.edge_weight((node, other)) == 0 for other in graph.neighbors(node)):
            graph.del_node(node)

In [4]:
#pagerank_weighted
import numpy
from numpy import empty as empty_matrix
from scipy.linalg import eig
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import eigs
from six.moves import range
from jr_abst.utils import deprecated


def pagerank_weighted(graph, damping=0.85):
    #Get dictionary of graph nodes and its ranks.
    coeff_adjacency_matrix = build_adjacency_matrix(graph, coeff=damping)
    probabilities = (1 - damping) / float(len(graph))
    pagerank_matrix = coeff_adjacency_matrix.toarray()
    # trying to minimize memory allocations
    pagerank_matrix += probabilities
    vec = principal_eigenvector(pagerank_matrix.T)
    # Because pagerank_matrix is positive, vec is always real (i.e. not complex)
    return process_results(graph, vec.real)


def build_adjacency_matrix(graph, coeff=1):
    #Get matrix representation of given graph.
    row = []
    col = []
    data = []
    nodes = graph.nodes()
    nodes2id = {v: i for i, v in enumerate(nodes)}
    length = len(nodes)
    for i in range(length):
        current_node = nodes[i]
        neighbors = graph.neighbors(current_node)
        neighbors_sum = sum(graph.edge_weight((current_node, neighbor)) for neighbor in neighbors)
        for neighbor in neighbors:
            edge_weight = float(graph.edge_weight((current_node, neighbor)))
            if edge_weight != 0.0:
                row.append(i)
                col.append(nodes2id[neighbor])
                data.append(coeff * edge_weight / neighbors_sum)
    return csr_matrix((data, (row, col)), shape=(length, length))


def build_probability_matrix(graph, coeff=1.0):
    #Get square matrix of shape nxn, where n is number of nodes of the given graph.
    dimension = len(graph)
    matrix = empty_matrix((dimension, dimension))
    probability = coeff / float(dimension)
    matrix.fill(probability)
    return matrix


def principal_eigenvector(a):
    #Get eigenvector of square matrix a.
    # Note that we prefer to use eigs even for dense matrix
    # because we need only one eigenvector.
    if len(a) < 3: #works only for dim A < 3
        vals, vecs = eig(a)
        ind = numpy.abs(vals).argmax()
        return vecs[:, ind]
    else:
        vals, vecs = eigs(a, k=1)
        return vecs[:, 0]


def process_results(graph, vec):
    #Get graph nodes and corresponding absolute values of provided eigenvector.
    scores = {}
    for i, node in enumerate(graph.nodes()):
        scores[node] = abs(vec[i])
    return scores

In [5]:
#syntactic_unit.py
class SyntacticUnit(object):
    def __init__(self, text, token=None, tag=None, index=-1):
        self.text = text
        self.token = token
        self.tag = tag[:2] if tag else None  # Just first two letters of tag
        self.index = index
        self.score = -1
        
    def __str__(self):
        return "Original unit: '" + self.text + "' *-*-*-* " + "Processed unit: '" + self.token + "'"

    def __repr__(self):
        return str(self)

In [6]:
#textcleaner.py
from gensim.parsing.preprocessing import preprocess_documents
from jr_abst.summarization.syntactic_unit import SyntacticUnit
from jr_abst.utils import tokenize, has_pattern
from six.moves import range
import re
import logging

logger = logging.getLogger(__name__)
HAS_PATTERN = has_pattern()
if HAS_PATTERN:
    from pattern.en import tag

SEPARATOR = r'@'
RE_SENTENCE = re.compile(r'(\S.+?[.!?])(?=\s+|$)|(\S.+?)(?=[\n]|$)', re.UNICODE)
AB_SENIOR = re.compile(r'([A-Z][a-z]{1,2}\.)\s(\w)', re.UNICODE)
AB_ACRONYM = re.compile(r'(\.[a-zA-Z]\.)\s(\w)', re.UNICODE)
AB_ACRONYM_LETTERS = re.compile(r'([a-zA-Z])\.([a-zA-Z])\.', re.UNICODE)
UNDO_AB_SENIOR = re.compile(r'([A-Z][a-z]{1,2}\.)' + SEPARATOR + r'(\w)', re.UNICODE)
UNDO_AB_ACRONYM = re.compile(r'(\.[a-zA-Z]\.)' + SEPARATOR + r'(\w)', re.UNICODE)

def split_sentences(text):
    processed = replace_abbreviations(text)
    return [undo_replacement(sentence) for sentence in get_sentences(processed)]


def replace_abbreviations(text):
    return replace_with_separator(text, SEPARATOR, [AB_SENIOR, AB_ACRONYM])


def undo_replacement(sentence):
    return replace_with_separator(sentence, r" ", [UNDO_AB_SENIOR, UNDO_AB_ACRONYM])


def replace_with_separator(text, separator, regexs):
    replacement = r"\1" + separator + r"\2"
    result = text
    for regex in regexs:
        result = regex.sub(replacement, result)
    return result


def get_sentences(text):
    for match in RE_SENTENCE.finditer(text):
        yield match.group()


def merge_syntactic_units(original_units, filtered_units, tags=None):
    units = []
    for i in range(len(original_units)):
        if filtered_units[i] == '':
            continue
        text = original_units[i]
        token = filtered_units[i]
        tag = tags[i][1] if tags else None
        sentence = SyntacticUnit(text, token, tag, i)
        units.append(sentence)
    return units


def join_words(words, separator=" "):
    return separator.join(words)


def clean_text_by_sentences(text):
    original_sentences = split_sentences(text)
    filtered_sentences = [join_words(sentence) for sentence in preprocess_documents(original_sentences)]
    return merge_syntactic_units(original_sentences, filtered_sentences)


def clean_text_by_word(text, deacc=True):
    text_without_acronyms = replace_with_separator(text, "", [AB_ACRONYM_LETTERS])
    original_words = list(tokenize(text_without_acronyms, to_lower=True, deacc=deacc))
    filtered_words = [join_words(word_list, "") for word_list in preprocess_documents(original_words)]
    if HAS_PATTERN:
        tags = tag(join_words(original_words))  # tag needs the context of the words in the text
    else:
        tags = None
    units = merge_syntactic_units(original_words, filtered_words, tags)
    return {unit.text: unit for unit in units}


def tokenize_by_word(text):
    text_without_acronyms = replace_with_separator(text, "", [AB_ACRONYM_LETTERS])
    return tokenize(text_without_acronyms, to_lower=True, deacc=True)

In [7]:
#Summarizer
import logging
from utils import deprecated
from jr_abst.summarization.summarizer import summarize
from jr_abst.summarization.pagerank_weighted import pagerank_weighted as _pagerank
from jr_abst.summarization.textcleaner import clean_text_by_sentences as _clean_text_by_sentences
from jr_abst.summarization.commons import build_graph as _build_graph
from jr_abst.summarization.commons import remove_unreachable_nodes as _remove_unreachable_nodes
from jr_abst.summarization.bm25 import iter_bm25_bow as _bm25_weights
from gensim.corpora import Dictionary
from math import log10 as _log10
from six.moves import range


INPUT_MIN_LENGTH = 10
WEIGHT_THRESHOLD = 1.e-3
logger = logging.getLogger(__name__)


def _set_graph_edge_weights(graph):
    documents = graph.nodes()
    weights = _bm25_weights(documents)
    for i, doc_bow in enumerate(weights):
        if i % 1000 == 0 and i > 0:
            logger.info('PROGRESS: processing %s/%s doc (%s non zero elements)', i, len(documents), len(doc_bow))
        for j, weight in doc_bow:
            if i == j or weight < WEIGHT_THRESHOLD:
                continue
            edge = (documents[i], documents[j])
            if not graph.has_edge(edge):
                graph.add_edge(edge, weight)

    # Handles the case in which all similarities are zero.
    # The resultant summary will consist of random sentences.
    if all(graph.edge_weight(edge) == 0 for edge in graph.iter_edges()):
        _create_valid_graph(graph)


def _create_valid_graph(graph):
    nodes = graph.nodes()
    for i in range(len(nodes)):
        for j in range(len(nodes)):
            if i == j:
                continue
            edge = (nodes[i], nodes[j])
            if graph.has_edge(edge):
                graph.del_edge(edge)
            graph.add_edge(edge, 1)


def _get_doc_length(doc):
    return sum(item[1] for item in doc)


def _get_similarity(doc1, doc2, vec1, vec2):
    numerator = vec1.dot(vec2.transpose()).toarray()[0][0]
    length_1 = _get_doc_length(doc1)
    length_2 = _get_doc_length(doc2)
    denominator = _log10(length_1) + _log10(length_2) if length_1 > 0 and length_2 > 0 else 0
    return numerator / denominator if denominator != 0 else 0


def _build_corpus(sentences):
    split_tokens = [sentence.token.split() for sentence in sentences]
    dictionary = Dictionary(split_tokens)
    return [dictionary.doc2bow(token) for token in split_tokens]


def _get_important_sentences(sentences, corpus, important_docs):
    hashable_corpus = _build_hasheable_corpus(corpus)
    sentences_by_corpus = dict(zip(hashable_corpus, sentences))
    return [sentences_by_corpus[tuple(important_doc)] for important_doc in important_docs]


def _get_sentences_with_word_count(sentences, word_count):
    length = 0
    selected_sentences = []

    # Loops until the word count is reached.
    for sentence in sentences:
        words_in_sentence = len(sentence.text.split())

        # Checks if the inclusion of the sentence gives a better approximation to the word parameter.
        if abs(word_count - length - words_in_sentence) > abs(word_count - length):
            return selected_sentences
        selected_sentences.append(sentence)
        length += words_in_sentence
    return selected_sentences


def _extract_important_sentences(sentences, corpus, important_docs, word_count):
    important_sentences = _get_important_sentences(sentences, corpus, important_docs)
    return important_sentences \
        if word_count is None \
        else _get_sentences_with_word_count(important_sentences, word_count)


def _format_results(extracted_sentences, split):
    if split:
        return [sentence.text for sentence in extracted_sentences]
    return "\n".join(sentence.text for sentence in extracted_sentences)


def _build_hasheable_corpus(corpus):
    return [tuple(doc) for doc in corpus]


def summarize_corpus(corpus, ratio=0.2):
    hashable_corpus = _build_hasheable_corpus(corpus)

    #The function ends, if the corpus is empty.
    if len(corpus) == 0:
        logger.warning("Input corpus is empty.")
        return []

    if len(corpus) < INPUT_MIN_LENGTH:
        logger.warning("Input corpus is expected to have at least %d documents.", INPUT_MIN_LENGTH)

    logger.info('Building graph')
    graph = _build_graph(hashable_corpus)

    logger.info('Filling graph')
    _set_graph_edge_weights(graph)

    logger.info('Removing unreachable nodes of graph')
    _remove_unreachable_nodes(graph)

    #Warns user to add more text.
    if len(graph.nodes()) < 3:
        logger.warning("Please add more sentences to the text. The number of reachable nodes is below 3")
        return []

    logger.info('Pagerank graph')
    pagerank_scores = _pagerank(graph)

    logger.info('Sorting pagerank scores')
    hashable_corpus.sort(key=lambda doc: pagerank_scores.get(doc, 0), reverse=True)

    return [list(doc) for doc in hashable_corpus[:int(len(corpus) * ratio)]]


def summarize(text, ratio=0.2, word_count=None, split=False):
    # Gets a list of processed sentences.
    sentences = _clean_text_by_sentences(text)

    if len(sentences) == 0:
        logger.warning("Input text is empty.")
        return [] if split else u""

    if len(sentences) == 1:
        raise ValueError("Input must have more than one sentence")

    if len(sentences) < INPUT_MIN_LENGTH:
        logger.warning("Input text is expected to have at least %d sentences.", INPUT_MIN_LENGTH)

    corpus = _build_corpus(sentences)

    most_important_docs = summarize_corpus(corpus, ratio=ratio if word_count is None else 1)

    # If couldn't get important docs, the algorithm ends.
    if not most_important_docs:
        logger.warning("Couldn't get relevant sentences.")
        return [] if split else u""

    # Extracts the most important sentences with the selected criterion.
    extracted_sentences = _extract_important_sentences(sentences, corpus, most_important_docs, word_count)

    # Sorts the extracted sentences by apparition order in the original text.
    extracted_sentences.sort(key=lambda s: s.index)

    return _format_results(extracted_sentences, split)

In [8]:
##Keeping all the text files in a list for process
data=[]
import glob
for list_o_file in glob.iglob('**/*.txt', recursive=True):
    print(list_o_file)
    data.append(list_o_file)
print(data)
data_len=len(data)
print("\nTotal count of text files available are:",data_len)

input.txt
trail.txt
News Articles\tech\001.txt
News Articles\tech\002.txt
News Articles\tech\003.txt
News Articles\tech\004.txt
News Articles\tech\005.txt
News Articles\tech\006.txt
News Articles\tech\007.txt
News Articles\tech\008.txt
News Articles\tech\009.txt
News Articles\tech\010.txt
News Articles\tech\011.txt
News Articles\tech\012.txt
News Articles\tech\013.txt
News Articles\tech\014.txt
News Articles\tech\015.txt
News Articles\tech\016.txt
News Articles\tech\017.txt
News Articles\tech\018.txt
News Articles\tech\019.txt
News Articles\tech\020.txt
News Articles\tech\021.txt
News Articles\tech\022.txt
News Articles\tech\023.txt
News Articles\tech\024.txt
News Articles\tech\025.txt
News Articles\tech\026.txt
News Articles\tech\027.txt
News Articles\tech\028.txt
News Articles\tech\029.txt
News Articles\tech\030.txt
News Articles\tech\031.txt
News Articles\tech\032.txt
News Articles\tech\033.txt
News Articles\tech\034.txt
News Articles\tech\035.txt
News Articles\tech\036.txt
News Art

In [9]:
##Main class
import os
import time
from os.path import abspath,join,dirname
from inspect import getsourcefile
start1 = time.time()

if __name__=="__main__":
    
    ##I/P file
    for everyfile in data:
        start = time.time()
        ip=everyfile#join(dirname(abspath(getsourcefile(lambda:0))),"*.txt")
        with open(ip,"r",encoding="utf-8") as op:
            Input_text=op.read()
            op.close()
        print("\n*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*")
        print("The input File is:\n")
        print(Input_text)
        print("\n*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*")
        Summarized_data=summarize(Input_text)
        print("\nThe Summarized data for the above txt is:\n",Summarized_data)
        print("\n*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*")
        n=0
        if n<=len(data):
            file=open(os.path.join('Output for Abstractive', os.path.basename(everyfile)) , 'w')
            for s in Summarized_data:
                file.write(s) 
            file.close()
        end = time.time()
        print(f"\n Runtime of the above Summary is : {end - start} seconds")

# Total Time Taken...
end1 = time.time()
print(f"Total Runtime of the program is : {end1 - start1} seconds")        
        


*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

﻿We observe today not a victory of party but a celebration of freedom--symbolizing an end as well as a beginning--signifying renewal as well as change. For I have sworn before you and Almighty God the same solemn oath our for bears prescribed nearly a century and three-quarters ago. The world is very different now. For man holds in his mortal hands the power to abolish all forms of human poverty and all forms of human life. And yet the same revolutionary beliefs for which our forebears fought are still at issue around the globe--the belief that the rights of man come not from the generosity of the state but from the hand of God. We dare not forget today that we are the heirs of that first revolution. Let the word go forth from this time and place, to friend and foe alike, that the torch has been passed to a new generation of Americans--born 

 Runtime of the above Summary is : 0.029675722122192383 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

Ad sales boost Time Warner profit

Quarterly profits at US media giant TimeWarner jumped 76% to $1.13bn (£600m) for the three months to December, from $639m year-earlier.

The firm, which is now one of the biggest investors in Google, benefited from sales of high-speed internet connections and higher advert sales. TimeWarner said fourth quarter sales rose 2% to $11.1bn from $10.9bn. Its profits were buoyed by one-off gains which offset a profit dip at Warner Bros, and less users for AOL.

Time Warner said on Friday that it now owns 8% of search-engine Google. But its own internet business, AOL, had has mixed fortunes. It lost 464,000 subscribers in the fourth quarter profits were lower than in the preceding three quarters. However, the company said AOL's underlying profit before


The Summarized data for the above txt is:
 Search engine firm Google has released a trial tool which is concerning some net users because it directs people to pre-selected commercial websites.
The AutoLink feature comes with Google's latest toolbar and provides links in a webpage to Amazon.com if it finds a book's ISBN number on the site.
If a user clicks the AutoLink feature in the Google toolbar then a webpage with a book's unique ISBN number would link directly to Amazon's website.
Some users said AutoLink would only be fair if websites had to sign up to allow the feature to work on their pages or if they received revenue for any "click through" to a commercial site.

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

 Runtime of the above Summary is : 0.022940397262573242 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
T

 Runtime of the above Summary is : 0.019946575164794922 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

Security scares spark browser fix

Microsoft is working on a new version of its Internet Explorer web browser.

The revamp has been prompted by Microsoft's growing concern with security as well as increased competition from rival browsers. Microsoft said the new version will be far less vulnerable to the bugs that make its current browser a favourite of tech-savvy criminals. Test versions of the new program, called IE 7, are due to be released by the summer.

The announcement about Internet Explorer was made by Bill Gates, Microsoft chairman and chief software architect, during a keynote speech at the RSA Security conference currently being held in San Francisco. Although details were scant, Mr Gates, said IE7 would include new protections against viruses, spyware and phishing s


 Runtime of the above Summary is : 0.013963460922241211 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

Lasers help bridge network gaps

An Indian telecommunications firm has turned to lasers to help it overcome the problems of setting up voice and data networks in the country.

Tata Teleservices is using the lasers to make the link between customers' offices and its own core network. The laser bridges work across distances up to 4km and can be set up much faster than cable connections. In 12 months the lasers have helped the firm set up networks in more than 700 locations.

"In this particular geography getting permission to dig the ground and lay the pipes is a bit of a task," said Mr R. Sridharan, vice president of networks at Tata. "Heavy traffic and the layout under the ground mean that digging is uniquely difficult," he said. In some locations, he said, permission to dig up


The Summarized data for the above txt is:
 "By handing down this harsh sentence against a weblogger, their aim is to dissuade journalists and internet-users from expressing themselves online or contacting foreign media." In the days before his arrest Mr Sigarchi gave interviews to the BBC Persian Service and the US-funded Radio Farda.
Iranian authorities have arrested about 20 online journalists during the current crackdown.
Mr Sigarchi was sentenced one day after an online campaign highlighted his case in a day of action in defence of bloggers around the world.

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

 Runtime of the above Summary is : 0.015651226043701172 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

Microsoft seeking spyware trojan

Microsoft is investigating a trojan program that attempts


The Summarized data for the above txt is:
 One in 10 adult Americans - equivalent to 22 million people - owns an MP3 player, according to a survey.
The American love affair with digital music players has been made possible as more and more homes get broadband.
Of the 22 million Americans who own MP3 players, 59% are men compared to 41% of women.

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

 Runtime of the above Summary is : 0.015636920928955078 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

'No re-draft' for EU patent law

A proposed European law on software patents will not be re-drafted by the European Commission (EC) despite requests by MEPs.

The law is proving controversial and has been in limbo for a year. Some major tech firms say it is needed to protect inventions, while others fear it wil

 Runtime of the above Summary is : 0.015619754791259766 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

Ask Jeeves joins web log market

Ask Jeeves has bought the Bloglines website to improve the way it handles content from web journals or blogs.

The Bloglines site has become hugely popular as it gives users one place in which to read, search and share all the blogs they are interested in. Ask Jeeves said it was not planning to change Bloglines but would use the 300 million articles it has archived to round out its index of the web. How much Ask Jeeves paid for Bloglines was not revealed.

Bloglines has become popular because it lets users build a list of the blogs they want to follow without having to visit each journal site individually. To do this it makes use of a technology known as Really Simple Syndication (RSS) that many blogs have adopted to let other sites know when new

 Runtime of the above Summary is : 0.0196840763092041 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

Gadget growth fuels eco concerns

Technology firms and gadget lovers are being urged to think more about the environment when buying and disposing of the latest hi-tech products.

At the Consumer Electronics Show in Las Vegas earlier this month, several hi-tech firms were recognised for their strategies to help the environment. Ebay also announced the Rethink project bringing together Intel, Apple, and IBM among others to promote recycling. The US consumer electronics market is set to grow by over 11% in 2005. But more awareness is needed about how and where old gadgets can be recycled as well as how to be more energy efficient, said the US Environmental Protection Agency (EPA). Of particular growing concern is how much energy it takes to recharge portable devices, one of the fast

 The shift is happening as tech savvy criminals turn to technology to help them con people out of cash, steal valuable data or take over home PCs. Viruses written to make headlines by infecting millions are getting rarer.
The growing criminal use of malware has meant the end of the neat categorisation of different sorts of viruses and malicious programs.
He said many of the criminal programs came from Eastern Europe where cash-rich organised gangs can find a ready supply of technical experts that will crank out code to order.
Few of the malicious programs written by hi-tech thieves are cleverly written, many are much more pragmatic and use tried and tested techniques to infect machines or to trick users into installing a program or handing over important data.
In some cases spyware was being written that searched for rival malicious programs on PCs it infects and then trying to erase them so it has sole ownership of that machine.

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

 If you don't know art but know what you like, new search technology could prove a useful gateway to painting.
ArtGarden, developed by BT's research unit, is being tested by the Tate as a new way of browsing its online collection of paintings.
Rather than search by the name of an artist or painting, users are shown a selection of pictures.
The Tate is committed to making its art more accessible and technology such as ArtGarden can help with that, said Ms Rellie.

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

 Runtime of the above Summary is : 0.031240224838256836 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:


Writing a Microsoft Word document can be a dangerous business, according to document security firm Workshare.

Up to 75% of all business documents contained sensitive information most firms wou

 A Norwegian student who ran a website which linked to downloadable MP3 files has been ordered to pay compensation by the country's Supreme Court.
Frank Allan Bruvik was ordered to pay 100,000 kroner (£8,000) to the music industry in Norway.
A Norwegian court ruled in 2003 that Bruvik would have to pay 100,000 kroner to the music industry, but the country's Court of Appeal cleared him, saying that the copyright violation occurred when others posted the music.

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

 Runtime of the above Summary is : 0.03124213218688965 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

The future in your pocket

If you are a geek or gadget fan, the next 12 months look like they are going to be a lot of fun.

The relentless pace of development in the hi-tech world and rampant compe


 Runtime of the above Summary is : 0.015635967254638672 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

Blogger grounded by her airline

A US airline attendant is fighting for her job after she was suspended over postings on her blog, or online diary.

Queen of the Sky, otherwise known as Ellen Simonetti, evolved into an anonymous semi-fictional account of life in the sky. But after she posted pictures of herself in uniform, Delta Airlines suspended her indefinitely without pay. Ms Simonetti was told her suspension was a result of "inappropriate" images. Delta Airlines declined to comment.


Ms Simonetti started her personal blog in January to help her get over her mother's death. She had ensured she made no mention of which airline she worked for, and created fictional names for cities and companies. The airline's name was changed to Anonymous Airline and the city in which she w


The Summarized data for the above txt is:
 New Media for a New Millennium (NM2) will have as its endgame the development of a completely new media genre, which will allow audiences to create their own media worlds based on their specific interests or tastes.
Viewers will be able to participate in storylines, manipulate plots and even the sets and props of TV shows.
The three-year project will work on seven productions as it develops a set of software tools that will allow viewers to edit content to their needs.
"Media users will no longer be passive viewers but become active engagers." It will also be important that the tools are sophisticated enough to obey the complex rules of cinematography and editing said John Wyver, from TV producer Illuminations Television Limited, which is also involved in the project.
"It's not just a matter of stringing together the romantic or action portions of a production," said Mr Wyver.

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*


*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

The Force is strong in Battlefront

The warm reception that has greeted Star Wars: Battlefront is a reflection not of any ingenious innovation in its gameplay, but of its back-to-basics approach and immense nostalgia quotient.

Geared towards online gamers, it is based around little more than a series of all-out gunfights, set in an array of locations all featured in, or hinted at during, the two blockbusting film trilogies. Previous Star Wars titles like the acclaimed Knights Of The Old Republic and Jedi Knight have regularly impressed with their imaginative forays into the far corners of the franchise's extensive universe, and their use of weird and wonderful new characters. Battlefront on the other hand wholeheartedly revisits the most recognisable elements of the hit movies themselves.

The sights, sounds and protagonists on show here wi


The Summarized data for the above txt is:
 Halo is considered by many video game pundits to be one of the finest examples of interactive entertainment ever produced and more than 1.5 million people worldwide have pre-ordered the sequel.
But what marked Halo as a classic were the thousands of details which brought a feeling of polish and the enormously-high production values not usually associated with video gaming.
"The first time I played it I just stood there watching the spent shells fall out of my gun," said Errera, remarking on the level of detail in the game.
The game also inspired thousands of people to write their own fiction based on the storyline and produce downloadable video clips of the many weird and wonderful things that can be done in the game.
"It blew me away the first time someone managed to climb to the top of Halo," said Errera, referring to a fan who had created a video of Master Chief scaling the landscape of the graphical world.
Errera said expectations of the 


The Summarized data for the above txt is:
 It is launching a test service to allow people to publish blogs, or online journals, called MSN Spaces.
Microsoft is trailing behind competitors like Google and AOL, which already offer services which make it easy for people to set up web journals.

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

 Runtime of the above Summary is : 0.01558995246887207 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

Broadband fuels online change

Fast web access is encouraging more people to express themselves online, research suggests.

A quarter of broadband users in Britain regularly upload content and have personal sites, according to a report by UK think-tank Demos. It said that having an always-on, fast connection is changing the way people use the internet. More than five

 In late December movie studios launched a legal campaign against websites that helped people swap pirated movies using the BitTorrent network.
One of the sites shut down by the legal campaign was suprnova.org which helped boost the popularity of the BitTorrent system by checking that trackers led to the movies or TV programmes they claimed to.
Now the man behind suprnova.org, who goes by the nickname Sloncek, is preparing to release software for a new file-swapping network dubbed Exeem.
In an interview with Novastream web radio, Sloncek said Exeem would combine ideas from the BitTorrent and Kazaa file-sharing systems.
Like BitTorrent, Exeem will have trackers that help point people toward the file they want.
But, he said, it would struggle to be as popular as BitTorrent and Suprnova because early versions were not taking enough care to make sure good copies of files were being shared.
Mr Pouwelse said that future versions of file-sharing systems are likely to incorporate some kind of 


The Summarized data for the above txt is:
 In Nice last week, the European Commission announced its Networked & Electronic Media (NEM) initiative.
The Commission wants people to be able to locate the content they desire and have it delivered seamlessly, when on the move, at home or at work, no matter who supplies the devices, network, content, or content protection scheme.
They have identified that many groups have defined the forms of digital media in the areas that NEM encompasses.
Many feel that the most difficult and challenging area for the Commission will be to identify a solution for different Digital Rights Management (DRM) schemes.

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

 Runtime of the above Summary is : 0.011963605880737305 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

A decade of


The Summarized data for the above txt is:
 A US airline attendant suspended over "inappropriate images" on her blog - web diary - says she has been fired.
A Delta spokesperson confirmed on Wednesday that Ms Simonetti was no longer an employee.
Queen of the Sky has received a lot of support and advice from the global blogging community since news of her suspension was brought to light on the BBC News website and others.
A legal expert in the US speculated that Delta might be concerned that the fictional content on the blog may be linked back to the airline after the images were posted.

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

 Runtime of the above Summary is : 0.12865352630615234 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

Mobiles double up as bus tickets

Mobiles could soon double up as tra

 And I can spend the day working with my girlfriend Anne, a children's writer, at her house in Cambridge, sharing her wireless network.
With no modem installed in her computer, she had to borrow internet access from friends or use the dial-up connection on her daughter's laptop, so she had to choose between copying her files onto her USB memory card or accepting a slower and flakier net connection.
But she did not, because having fast, always on, and easy access to the net has become part of the routine of her daily life, and when it was taken away it was too much effort to go back to the old ways of doing things.
According to Ofcom there were almost four million broadband users in the UK in April 2004, and numbers are climbing fast.
My dad finally made the change earlier this month and new net users are selecting broadband from the start.
They encourage other people to get broadband so that they can share digital photos and do all of the other things that need fast and reliable connec

 Internet portal Lycos has made a screensaver that endlessly requests data from sites that sell the goods and services mentioned in spam e-mail.
"We've found a way to make it much higher cost for spammers by putting a load on their servers." By getting thousands of people to download and use the screensaver, Lycos hopes to get spamming websites constantly running at almost full capacity.
Mr Pollmann said there was no intention to stop the spam websites working by subjecting them with too much data to cope with.
Lycos did not want to use e-mail to fight back, said Mr Pollmann.
To limit the chance of mistakes being made, Lycos is using people to ensure that the sites are selling spam goods.

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

 Runtime of the above Summary is : 0.031242847442626953 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

 Experts said the success of the Half-Life 2 anti-piracy system might tempt other game makers into creating their own version.
Half-Life 2 was officially released on 16 November but before gamers could get to grips with the long-awaited title they were forced to authenticate their copy of the game online.
Some of those who have been banned by the move protested their innocence in the online forums on the main Steam site and said they were being punished for what other people did with their account.
If Steam proves effective at cutting the piracy of games to a minimum, said Mr Fahey, other game makers may be tempted to set up copycat systems.

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

 Runtime of the above Summary is : 0.03191685676574707 seconds

*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
The input File is:

New browser 

In [10]:
## Evaluations of Text Summaries...
import os
import time
from os.path import abspath,join,dirname
from inspect import getsourcefile
ip=join(dirname(abspath(getsourcefile(lambda:0))),"input.txt")#r"News Articles\business\001.txt")
with open(ip,"r",encoding="utf-8") as op:
    Input_text=op.read()
    op.close()
print("The Input text file:\n",Input_text)
print("\nThe count of words:\n",len(Input_text))
print("________________")

opt=join(dirname(abspath(getsourcefile(lambda:0))),r"Output for Abstractive\input.txt")
with open(opt,"r",encoding="utf-8") as opx:
    Output_text=opx.read()
    opx.close()
print("The Output text file:\n",Output_text)
print("\nThe count of words:\n",len(Output_text))

The Input text file:
 ﻿We observe today not a victory of party but a celebration of freedom--symbolizing an end as well as a beginning--signifying renewal as well as change. For I have sworn before you and Almighty God the same solemn oath our for bears prescribed nearly a century and three-quarters ago. The world is very different now. For man holds in his mortal hands the power to abolish all forms of human poverty and all forms of human life. And yet the same revolutionary beliefs for which our forebears fought are still at issue around the globe--the belief that the rights of man come not from the generosity of the state but from the hand of God. We dare not forget today that we are the heirs of that first revolution. Let the word go forth from this time and place, to friend and foe alike, that the torch has been passed to a new generation of Americans--born in this century, tempered by war, disciplined by a hard and bitter peace, proud of our ancient heritage--and unwilling to wit

In [11]:
#Applying Rouge
import rouge
def prepare_results(p, r, f):
    return '\t{}:\t{}: {:5.2f}\t{}: {:5.2f}\t{}: {:5.2f}'.format(metric, 'P', 100.0 * p, 'R', 100.0 * r, 'F1', 100.0 * f)


for aggregator in ['Avg', 'Best', 'Individual']:
    print('Evaluation with {}'.format(aggregator))
    apply_avg = aggregator == 'Avg'
    apply_best = aggregator == 'Best'

    evaluator = rouge.Rouge(metrics=['rouge-n', 'rouge-l', 'rouge-w'],
                           max_n=3,
                           limit_length=True,
                           length_limit=500,
                           length_limit_type='words',
                           apply_avg=apply_avg,
                           apply_best=apply_best,
                           alpha=0.5, # Default F1_score
                           weight_factor=1.2,
                           stemming=True)
    hypothesis_1 = Output_text
    references_1 = [Input_text]
    scores = evaluator.get_scores(hypothesis_1, references_1)
    for metric, results in sorted(scores.items(), key=lambda x: x[0]):
        if not apply_avg and not apply_best: # value is a type of list as we evaluate each summary vs each reference
            for hypothesis_id, results_per_ref in enumerate(results):
                nb_references = len(results_per_ref['p'])
                for reference_id in range(nb_references):
                    print('\tHypothesis #{} & Reference #{}: '.format(hypothesis_id, reference_id))
                    print('' + prepare_results(results_per_ref['p'][reference_id], results_per_ref['r'][reference_id], results_per_ref['f'][reference_id]))
            print()
        else:
            print(prepare_results(results['p'], results['r'], results['f']))
    print()

Evaluation with Avg
	rouge-1:	P: 65.49	R: 58.09	F1: 61.57
	rouge-2:	P: 38.55	R: 34.18	F1: 36.23
	rouge-3:	P: 34.22	R: 30.33	F1: 32.16
	rouge-l:	P: 53.75	R: 48.63	F1: 51.06
	rouge-w:	P: 35.67	R:  9.08	F1: 14.48

Evaluation with Best
	rouge-1:	P: 65.49	R: 58.09	F1: 61.57
	rouge-2:	P: 38.55	R: 34.18	F1: 36.23
	rouge-3:	P: 34.22	R: 30.33	F1: 32.16
	rouge-l:	P: 53.75	R: 48.63	F1: 51.06
	rouge-w:	P: 35.67	R:  9.08	F1: 14.48

Evaluation with Individual
	Hypothesis #0 & Reference #0: 
	rouge-1:	P: 65.49	R: 58.09	F1: 61.57

	Hypothesis #0 & Reference #0: 
	rouge-2:	P: 38.55	R: 34.18	F1: 36.23

	Hypothesis #0 & Reference #0: 
	rouge-3:	P: 34.22	R: 30.33	F1: 32.16

	Hypothesis #0 & Reference #0: 
	rouge-l:	P: 53.75	R: 48.63	F1: 51.06

	Hypothesis #0 & Reference #0: 
	rouge-w:	P: 35.67	R:  9.08	F1: 14.48




In [12]:
##Backup

In [13]:
"""##Main class
import os
from os.path import abspath,join,dirname
from inspect import getsourcefile

if __name__=="__main__":
    
    ##I/P file
    ip=join(dirname(abspath(getsourcefile(lambda:0))),"input.txt")
    with open(ip,"r",encoding="utf-8") as op:
        Input_text=op.read()
        op.close()
    print("The input data is:\n",Input_text)
    print("\n*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*")
    Summarized_data=summarize(Input_text)
    print("\nThe Summarized data for the above txt is:\n",Summarized_data)
    #print(" ",summarize(Input_text))
    file = open('Output for Abstractive\summaryoutput.txt', 'w')
    for s in Summarized_data:
        file.write(s) 
    file.close()"""

'##Main class\nimport os\nfrom os.path import abspath,join,dirname\nfrom inspect import getsourcefile\n\nif __name__=="__main__":\n    \n    ##I/P file\n    ip=join(dirname(abspath(getsourcefile(lambda:0))),"input.txt")\n    with open(ip,"r",encoding="utf-8") as op:\n        Input_text=op.read()\n        op.close()\n    print("The input data is:\n",Input_text)\n    print("\n*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*")\n    Summarized_data=summarize(Input_text)\n    print("\nThe Summarized data for the above txt is:\n",Summarized_data)\n    #print(" ",summarize(Input_text))\n    file = open(\'Output for Abstractive\\summaryoutput.txt\', \'w\')\n    for s in Summarized_data:\n        file.write(s) \n    file.close()'

In [14]:
"""import rouge


def prepare_results(p, r, f):
    return '\t{}:\t{}: {:5.2f}\t{}: {:5.2f}\t{}: {:5.2f}'.format(metric, 'P', 100.0 * p, 'R', 100.0 * r, 'F1', 100.0 * f)


for aggregator in ['Avg', 'Best', 'Individual']:
    print('Evaluation with {}'.format(aggregator))
    apply_avg = aggregator == 'Avg'
    apply_best = aggregator == 'Best'

    evaluator = rouge.Rouge(metrics=['rouge-n', 'rouge-l', 'rouge-w'],
                           max_n=4,
                           limit_length=True,
                           length_limit=100,
                           length_limit_type='words',
                           apply_avg=apply_avg,
                           apply_best=apply_best,
                           alpha=0.5, # Default F1_score
                           weight_factor=1.2,
                           stemming=True)


    hypothesis_1 = "King Norodom Sihanouk has declined requests to chair a summit of Cambodia 's top political leaders , saying the meeting would not bring any progress in deadlocked negotiations to form a government .\nGovernment and opposition parties have asked King Norodom Sihanouk to host a summit meeting after a series of post-election negotiations between the two opposition groups and Hun Sen 's party to form a new government failed .\nHun Sen 's ruling party narrowly won a majority in elections in July , but the opposition _ claiming widespread intimidation and fraud _ has denied Hun Sen the two-thirds vote in parliament required to approve the next government .\n"
    references_1 = ["Prospects were dim for resolution of the political crisis in Cambodia in October 1998.\nPrime Minister Hun Sen insisted that talks take place in Cambodia while opposition leaders Ranariddh and Sam Rainsy, fearing arrest at home, wanted them abroad.\nKing Sihanouk declined to chair talks in either place.\nA U.S. House resolution criticized Hun Sen's regime while the opposition tried to cut off his access to loans.\nBut in November the King announced a coalition government with Hun Sen heading the executive and Ranariddh leading the parliament.\nLeft out, Sam Rainsy sought the King's assurance of Hun Sen's promise of safety and freedom for all politicians.",
                    "Cambodian prime minister Hun Sen rejects demands of 2 opposition parties for talks in Beijing after failing to win a 2/3 majority in recent elections.\nSihanouk refuses to host talks in Beijing.\nOpposition parties ask the Asian Development Bank to stop loans to Hun Sen's government.\nCCP defends Hun Sen to the US Senate.\nFUNCINPEC refuses to share the presidency.\nHun Sen and Ranariddh eventually form a coalition at summit convened by Sihanouk.\nHun Sen remains prime minister, Ranariddh is president of the national assembly, and a new senate will be formed.\nOpposition leader Rainsy left out.\nHe seeks strong assurance of safety should he return to Cambodia.\n",
                    ]

    hypothesis_2 = "China 's government said Thursday that two prominent dissidents arrested this week are suspected of endangering national security _ the clearest sign yet Chinese leaders plan to quash a would-be opposition party .\nOne leader of a suppressed new political party will be tried on Dec. 17 on a charge of colluding with foreign enemies of China '' to incite the subversion of state power , '' according to court documents given to his wife on Monday .\nWith attorneys locked up , harassed or plain scared , two prominent dissidents will defend themselves against charges of subversion Thursday in China 's highest-profile dissident trials in two years .\n"
    references_2 = "Hurricane Mitch, category 5 hurricane, brought widespread death and destruction to Central American.\nEspecially hard hit was Honduras where an estimated 6,076 people lost their lives.\nThe hurricane, which lingered off the coast of Honduras for 3 days before moving off, flooded large areas, destroying crops and property.\nThe U.S. and European Union were joined by Pope John Paul II in a call for money and workers to help the stricken area.\nPresident Clinton sent Tipper Gore, wife of Vice President Gore to the area to deliver much needed supplies to the area, demonstrating U.S. commitment to the recovery of the region.\n"

    all_hypothesis = [hypothesis_1, hypothesis_2]
    all_references = [references_1, references_2]

    scores = evaluator.get_scores(all_hypothesis, all_references)

    for metric, results in sorted(scores.items(), key=lambda x: x[0]):
        if not apply_avg and not apply_best: # value is a type of list as we evaluate each summary vs each reference
            for hypothesis_id, results_per_ref in enumerate(results):
                nb_references = len(results_per_ref['p'])
                for reference_id in range(nb_references):
                    print('\tHypothesis #{} & Reference #{}: '.format(hypothesis_id, reference_id))
                    print('\t' + prepare_results(results_per_ref['p'][reference_id], results_per_ref['r'][reference_id], results_per_ref['f'][reference_id]))
            print()
        else:
            print(prepare_results(results['p'], results['r'], results['f']))
    print()"""

'import rouge\n\n\ndef prepare_results(p, r, f):\n    return \'\t{}:\t{}: {:5.2f}\t{}: {:5.2f}\t{}: {:5.2f}\'.format(metric, \'P\', 100.0 * p, \'R\', 100.0 * r, \'F1\', 100.0 * f)\n\n\nfor aggregator in [\'Avg\', \'Best\', \'Individual\']:\n    print(\'Evaluation with {}\'.format(aggregator))\n    apply_avg = aggregator == \'Avg\'\n    apply_best = aggregator == \'Best\'\n\n    evaluator = rouge.Rouge(metrics=[\'rouge-n\', \'rouge-l\', \'rouge-w\'],\n                           max_n=4,\n                           limit_length=True,\n                           length_limit=100,\n                           length_limit_type=\'words\',\n                           apply_avg=apply_avg,\n                           apply_best=apply_best,\n                           alpha=0.5, # Default F1_score\n                           weight_factor=1.2,\n                           stemming=True)\n\n\n    hypothesis_1 = "King Norodom Sihanouk has declined requests to chair a summit of Cambodia \'s top poli