## **USING NLTK**

In [44]:
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx

In [45]:
def read_article(file_name):
    file = open("textSummarizationAI.txt", "r")
    filedata = file.readlines()
    article = filedata[0].split(". ")
    sentences = []

    for sentence in article:
        #print(sentence)
        sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
    sentences.pop()

    return sentences

In [46]:

def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []

    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]

    all_words = list(set(sent1 + sent2))

    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)

    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1

    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1

    return 1 - cosine_distance(vector1, vector2)

In [47]:
def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))

    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

    return similarity_matrix


In [50]:

def generate_summary(file_name, top_n=5):
    stop_words = stopwords.words('english')
    summarize_text = []

    #  Read text and split it
    sentences =  read_article(file_name)
   # print(sentences)

    # Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    # Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
    print("Indexes of top ranked_sentence order are ", ranked_sentence)

    for i in range(top_n):
      summarize_text.append(" ".join(ranked_sentence[i][1]))

    # output the summarize text
    print("Summarize Text: \n", ". ".join(summarize_text))
    with open('summarizedTEXT.txt', 'w') as f:
      for line in summarize_text:
          f.write(line)
          f.write('\n')


In [51]:
# let's begin
generate_summary( "msft.txt", 2)


Indexes of top ranked_sentence order are  [(0.28357619672073536, ['AI-powered', 'chatbots', 'and', 'virtual', 'assistants', 'are', 'being', 'used', 'to', 'provide', 'personalized', 'healthcare', 'information', 'and', 'support', 'to', 'patients,', 'improving', 'access', 'to', 'medical', 'advice', 'and', 'reducing', 'the', 'burden', 'on', 'healthcare', 'providers.In', 'the', 'realm', 'of', 'finance,', 'AI', 'algorithms', 'analyze', 'vast', 'amounts', 'of', 'financial', 'data', 'to', 'detect', 'patterns,', 'assess', 'risks,', 'and', 'make', 'predictions', 'in', 'areas', 'like', 'stock', 'market', 'trends', 'and', 'investment', 'strategies']), (0.18980264646305176, ['It', 'has', 'the', 'potential', 'to', 'revolutionize', 'various', 'aspects', 'of', 'our', 'lives', 'and', 'reshape', 'industries', 'across', 'the', 'globe.Machine', 'Learning,', 'a', 'subfield', 'of', 'AI,', 'enables', 'computers', 'to', 'learn', 'from', 'data', 'and', 'make', 'predictions', 'or', 'decisions', 'without', 'bein

# **USING BERT**

In [56]:
pip install --upgrade transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m61.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m

In [52]:
!pip install transformers==2.2.0
!pip install bert-extractive-summarizer

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers==2.2.0
  Downloading transformers-2.2.0-py3-none-any.whl (360 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m360.6/360.6 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
Collecting boto3 (from transformers==2.2.0)
  Downloading boto3-1.26.153-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.6/135.6 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece (from transformers==2.2.0)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m51.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sacremoses (from transformers==2.2.0)
  Downloading sacremoses-0.0.53.tar.gz (880 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m880.6/880.6 kB[0m [31m55.6 MB

# spaCy is an open-source software library for advanced natural language processing

In [53]:
!pip install spacy==2.0.12

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting spacy==2.0.12
  Downloading spacy-2.0.12.tar.gz (22.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.0/22.0 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting murmurhash<0.29,>=0.28 (from spacy==2.0.12)
  Downloading murmurhash-0.28.0.tar.gz (23 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting cymem<1.32,>=1.30 (from spacy==2.0.12)
  Downloading cymem-1.31.2.tar.gz (33 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting preshed<2.0.0,>=1.0.0 (from spacy==2.0.12)
  Downloading preshed-1.0.1.tar.gz (112 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.7/112.7 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting thinc<6.11.0,>=6.10.3 (from spacy==2.0.12)
  Downloading thinc-6

In [1]:
from summarizer import Summarizer,TransformerSummarizer


In [3]:
text = '''
       Artificial Intelligence (AI) is a rapidly evolving field that encompasses the development and application of
       intelligent systems capable of performing tasks that typically require human intelligence. It has the potential
       to revolutionize various aspects of our lives and reshape industries across the globe.Machine Learning, a subfield
        of AI, enables computers to learn from data and make predictions or decisions without being explicitly programmed.
        Through the analysis of large datasets, machine learning algorithms can uncover patterns, identify trends, and extract
        valuable insights that can inform decision-making processes in areas such as healthcare, finance, and marketing.
        Deep Learning, a subset of machine learning, is inspired by the structure and function of the human brain. Neural
         networks, composed of interconnected layers of artificial neurons, can learn complex representations and hierarchical
         patterns from data. This has led to breakthroughs in image recognition, natural language processing, and speech
         synthesis, powering applications like facial recognition, voice assistants, and autonomous vehicles.AI has found
          numerous applications in healthcare. Medical imaging techniques, combined with AI algorithms, can aid in the
          detection and diagnosis of diseases, such as cancer, with high accuracy. AI-powered chatbots and virtual
           assistants are being used to provide personalized healthcare information and support to patients, improving access to medical advice and reducing the burden on healthcare providers.In the realm of finance, AI algorithms analyze vast amounts of financial data to detect patterns, assess risks, and make predictions in areas like stock market trends and investment strategies. AI-powered fraud detection systems can identify suspicious activities and
        prevent fraudulent transactions, safeguarding the financial interests of individuals and organizations.

        '''

In [4]:
bert_model = Summarizer()
bert_summary = ''.join(bert_model(text, min_length=60))
print(bert_summary)


Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Artificial Intelligence (AI) is a rapidly evolving field that encompasses the development and application of 
       intelligent systems capable of performing tasks that typically require human intelligence. Through the analysis of large datasets, machine learning algorithms can uncover patterns, identify trends, and extract 
        valuable insights that can inform decision-making processes in areas such as healthcare, finance, and marketing. Medical imaging techniques, combined with AI algorithms, can aid in the 
          detection and diagnosis of diseases, such as cancer, with high accuracy.




In [6]:
 with open('bert_summary2.txt', 'w') as f:
      for line in bert_summary:
          f.write(line)

