#### 1- Import Libs

In [9]:
import nltk 
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from nltk.tokenize import sent_tokenize
from heapq import nlargest

In [10]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\KMR\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

#### 2- TFIDF algorithm

In [11]:
vectorizer = TfidfVectorizer()
def tfidf_summarizer(text , number_sentences = 3):
    sentences = sent_tokenize(text)
    vectorized_sentences = vectorizer.fit_transform(sentences)

    sentences_score = vectorized_sentences.sum(axis = 1)
    top_sentences_rank = nlargest(number_sentences,range(len(sentences_score)),key = lambda i: sentences_score[i])
    summary = [sentences[i] for i in sorted(top_sentences_rank)]
    return ' '.join(summary)

In [None]:
text = """
Artificial Intelligence is the simulation of human intelligence in machines that are programmed to think and learn. 
AI is being used across different industries including finance, healthcare, and transportation. 
AI technologies include machine learning, natural language processing, and robotics. 
As AI continues to advance, it is changing the way we live and work. 
There are also concerns about job displacement and ethical implications.
"""

In [13]:
tfidf_summarizer(text)

'\nArtificial Intelligence is the simulation of human intelligence in machines that are programmed to think and learn. AI is being used across different industries including finance, healthcare, and transportation. As AI continues to advance, it is changing the way we live and work.'

In [14]:
import pickle

with open('tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer,f)

#### 3- Graph summarizer

In [1]:
pip install networkx

Collecting networkx
  Using cached networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Using cached networkx-3.5-py3-none-any.whl (2.0 MB)
Installing collected packages: networkx
Successfully installed networkx-3.5
Note: you may need to restart the kernel to use updated packages.


In [6]:
import numpy as np
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import sent_tokenize

In [7]:
vectorizer = TfidfVectorizer()
def textrang_summarizer(text,number_of_sentences):
    sentences = sent_tokenize(text)
    vectorized_sentences = vectorizer.fit_transform(sentences)
    sim_matrix = cosine_similarity(vectorized_sentences)


    nx_graph = nx.from_numpy_array(sim_matrix)
    scores = nx.pagerank(nx_graph)

    ranked_sentences = sorted(((scores[i],sent) for i,sent in enumerate(sentences)),reverse=True)

    summary_sentences = [sent for _, sent in ranked_sentences[:number_of_sentences]]
    return ' '.join(summary_sentences)



In [8]:
textrang_summarizer(text,3)

'As AI continues to advance, it is changing the way we live and work. \nArtificial Intelligence is the simulation of human intelligence in machines that are programmed to think and learn. AI is being used across different industries including finance, healthcare, and transportation.'