In [8]:
import numpy as np
from sentence_transformers import SentenceTransformer
import torch
from sentence_transformers import util
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel



In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
def load_bert(bert_path):
    if bert_path:
        embed = SentenceTransformer(bert_path, device=device)

    else:
        embed = None
    return embed
#model = load_bert("/Users/mac/Desktop/TeamSolve/models/stsb-roberta-base-v2")
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/stsb-roberta-base-v2')
embeddings = model.encode(sentences)
print(embeddings)

def get_embedding(txt, model, model_type = "BERT"):
    embedding = []
    try_counter = 0
    if model:
            try:
                if type(txt) is str or type(txt) is not list:
                    txt = [txt]
                if(model_type == "USE"):
                    embedding = (np.asarray((model(txt))[0]))
    #                embedding = (np.asarray(embedding))
                
                elif(model_type == "BERT"):
                    embedding = (model.encode(txt))[0]

                elif(model_type == "BERT_MULTILINGUAL"):
                    embedding = (model.encode(txt))[0]                  
                embedding = (embedding/np.linalg.norm(embedding)).tolist()
                
            except Exception as error:
                print("error in getting embedding, retry count = " + str(try_counter))
#                raise Exception('error in embedding classifier')
    return embedding


def get_cosine_similarity(matrix, vector, model_type="BERT"):
    neighbors = 0
    try:
        if model_type=="TFIDF":
            neighbors = linear_kernel(matrix, vector).flatten()
        else:
            vector = np.asarray(vector)
            matrix = np.asarray(matrix)
            if matrix.shape == (0,): return [0]
            neighbors = np.asarray(util.pytorch_cos_sim(matrix, vector))
    except Exception as error:
        raise Exception('error in embedding classifier-similarity function')
    return neighbors

def get_embeddings(queries, model=None, model_type = "BERT"):
    #find embedding for a list of queries. Queries is expected to list of list
    try:
        if model_type=="TFIDF":
            embeddings = TfidfVectorizer().fit_transform(queries)
        else:
            embeddings = []
            for query in queries:
                embeddings.append(get_embedding(query, model, model_type))

    except Exception as error: 
        raise Exception('error in embedding classifier')
        
    return embeddings
def get_similar_sentences(text):
    sentences = text.split('.')
    index_occupied = []
    sentence_groups = []
    embeddings = get_embeddings(sentences, model, model_type="BERT")
    all_scores = list()
    for sent_id in range(len(sentences)):
        similar_g = []
        scores = get_cosine_similarity(embeddings, embeddings[sent_id], model_type="BERT")
        if sent_id in index_occupied:continue
        similar_g.append(sentences[sent_id])
        for n_sent in range(sent_id+1,len(sentences)):
            score = scores[n_sent][0]
            if score>=0.3:
                similar_g.append(sentences[n_sent])
                index_occupied.append(n_sent)
            else:
                break
        sentence_groups.append(similar_g)

        all_scores.append(scores)
    return sentence_groups

[[-0.7046393   0.21647064  0.23562156 ... -0.81505114 -0.3474665
   0.6278896 ]
 [ 0.05422518  0.892096    0.14136267 ...  0.6420069   0.23304999
   0.75140697]]


In [11]:
sentences = (get_similar_sentences("DMA 3 is part of the Kitchener network. The average DMA water demand is 15 CMH with max demand of 18 CMH at 8AM and min demand of 8 CMH at 3AM. Pumping station S1 supplies the water to the DMA. Flow meter FM3 is at the outlet of the pumping station and inlet of the DMA and measures the pump station outflow to the DMA. The DMA is isolated from the rest of the network by 2 closed valves: V11 and V12. In the DMA we have two pressure sensors P31 and P32. The average pressure in the DMA is 67 PSI. In the DMA we have 760 customers. Customer C31 typically experiences low pressure if there is a pipe burst in the DMA. You have access to data from pressure sensors P31, P32 and flow meter FM3 so whenever someone needs to know what the data is or set up an alarm, they can ask you to do that for them."))

In [12]:
sentences

[['DMA 3 is part of the Kitchener network',
  ' The average DMA water demand is 15 CMH with max demand of 18 CMH at 8AM and min demand of 8 CMH at 3AM',
  ' Pumping station S1 supplies the water to the DMA',
  ' Flow meter FM3 is at the outlet of the pumping station and inlet of the DMA and measures the pump station outflow to the DMA'],
 [' The DMA is isolated from the rest of the network by 2 closed valves: V11 and V12',
  ' In the DMA we have two pressure sensors P31 and P32'],
 [' The average pressure in the DMA is 67 PSI',
  ' In the DMA we have 760 customers',
  ' Customer C31 typically experiences low pressure if there is a pipe burst in the DMA',
  ' You have access to data from pressure sensors P31, P32 and flow meter FM3 so whenever someone needs to know what the data is or set up an alarm, they can ask you to do that for them'],
 ['']]

In [13]:
 from transformers import T5Tokenizer, T5Config, T5ForConditionalGeneration

In [14]:
#Instantiating the model and tokenizer 
my_model = T5ForConditionalGeneration.from_pretrained('t5-small')
tokenizer = T5Tokenizer.from_pretrained('t5-small')

In [21]:
for i in range(0, len(sentences)-1):
    Result = ''.join(sentences[i])
    #summarize = ["summarize"]
    #for i in range(s_len-1):
    text = "summarize:" + Result
    input_ids=tokenizer.encode(text, return_tensors='pt', max_length=512)
    summary_ids = my_model.generate(input_ids)
    summary_ids
    t5_summary = tokenizer.decode(summary_ids[0])
    print(t5_summary)

<pad> average water demand is 15 CMH with max demand of 18 CMH at 8AM and min
<pad> the pressure sensor is a sonic system that is able to operate a single
<pad> average pressure in the DMA is 67 PSI In the DMA we have 760


End of Code

In [None]:
Result =0
for j in (0, len(sentences)-1):
    for i in (0, len(sentences)-1):
        Result = ''.join(sentences[i][j])
        i=0
        #summarize = ["summarize"]
        for i in range(s_len-1):
            text = "summarize:" + Result
            input_ids=tokenizer.encode(text, return_tensors='pt', max_length=512)
            summary_ids = my_model.generate(input_ids)
            summary_ids
            t5_summary = tokenizer.decode(summary_ids[0])
            print(t5_summary)

In [None]:
Result

In [None]:
result = ' '.join(sum(sentences, []))

In [None]:
result

In [None]:
' '.join(sentences)

In [None]:
i=0
summarize = ["summarize"]
for i in range(s_len-1):
    text =  summarize + sentences[0]
    input_ids=tokenizer.encode(text, return_tensors='pt', max_length=512)
    summary_ids = my_model.generate(input_ids)
    summary_ids
    t5_summary = tokenizer.decode(summary_ids[0])
    print(t5_summary)

In [None]:
#summarizer = pipeline("summarization",model=summarizer("faceboo/bart-large-xsum"))

In [None]:
#!pip install -U nltk # To Upgrade NLTK to >3.5 for METEOR score


In [None]:
# import nltk

# print('The nltk version is {}.'.format(nltk.__version__)) # Verify version >3.5

In [None]:
# import numpy as np 
# import pandas as pd

In [None]:
!pip install gensim.summarization

In [None]:
from transformers import BartForConditionalGeneration, BartTokenizer, BartConfig

In [None]:
tokenizer=BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model=BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

In [None]:
original_text = 'Junk foods taste good that’s why it is mostly liked by everyone of any age group especially kids and school going children. They generally ask for the junk food daily because they have been trend so by their parents from the childhood. They never have been discussed by their parents about the harmful effects of junk foods over health. According to the research by scientists, it has been found that junk foods have negative effects on the health in many ways. They are generally fried food found in the market in the packets. They become high in calories, high in cholesterol, low in healthy nutrients, high in sodium mineral, high in sugar, starch, unhealthy fat, lack of protein and lack of dietary fibers. Processed and junk foods are the means of rapid and unhealthy weight gain and negatively impact the whole body throughout the life. It makes able a person to gain excessive weight which is called as obesity. Junk foods tastes good and looks good however do not fulfil the healthy calorie requirement of the body. Some of the foods like french fries, fried foods, pizza, burgers, candy, soft drinks, baked goods, ice cream, cookies, etc are the example of high-sugar and high-fat containing foods. It is found according to the Centres for Disease Control and Prevention that Kids and children eating junk food are more prone to the type-2 diabetes. In type-2 diabetes our body become unable to regulate blood sugar level. Risk of getting this disease is increasing as one become more obese or overweight. It increases the risk of kidney failure. Eating junk food daily lead us to the nutritional deficiencies in the body because it is lack of essential nutrients, vitamins, iron, minerals and dietary fibers. It increases risk of cardiovascular diseases because it is rich in saturated fat, sodium and bad cholesterol. High sodium and bad cholesterol diet increases blood pressure and overloads the heart functioning. One who like junk food develop more risk to put on extra weight and become fatter and unhealthier. Junk foods contain high level carbohydrate which spike blood sugar level and make person more lethargic, sleepy and less active and alert. Reflexes and senses of the people eating this food become dull day by day thus they live more sedentary life. Junk foods are the source of constipation and other disease like diabetes, heart ailments, clogged arteries, heart attack, strokes, etc because of being poor in nutrition. Junk food is the easiest way to gain unhealthy weight. The amount of fats and sugar in the food makes you gain weight rapidly. However, this is not a healthy weight. It is more of fats and cholesterol which will have a harmful impact on your health. Junk food is also one of the main reasons for the increase in obesity nowadays.This food only looks and tastes good, other than that, it has no positive points. The amount of calorie your body requires to stay fit is not fulfilled by this food. For instance, foods like French fries, burgers, candy, and cookies, all have high amounts of sugar and fats. Therefore, this can result in long-term illnesses like diabetes and high blood pressure. This may also result in kidney failure. Above all, you can get various nutritional deficiencies when you don’t consume the essential nutrients, vitamins, minerals and more. You become prone to cardiovascular diseases due to the consumption of bad cholesterol and fat plus sodium. In other words, all this interferes with the functioning of your heart. Furthermore, junk food contains a higher level of carbohydrates. It will instantly spike your blood sugar levels. This will result in lethargy, inactiveness, and sleepiness. A person reflex becomes dull overtime and they lead an inactive life. To make things worse, junk food also clogs your arteries and increases the risk of a heart attack. Therefore, it must be avoided at the first instance to save your life from becoming ruined.The main problem with junk food is that people don’t realize its ill effects now. When the time comes, it is too late. Most importantly, the issue is that it does not impact you instantly. It works on your overtime; you will face the consequences sooner or later. Thus, it is better to stop now.You can avoid junk food by encouraging your children from an early age to eat green vegetables. Their taste buds must be developed as such that they find healthy food tasty. Moreover, try to mix things up. Do not serve the same green vegetable daily in the same style. Incorporate different types of healthy food in their diet following different recipes. This will help them to try foods at home rather than being attracted to junk food.In short, do not deprive them completely of it as that will not help. Children will find one way or the other to have it. Make sure you give them junk food in limited quantities and at healthy periods of time. '

In [None]:
def summary(original_text):
    text = "summarize:" + original_text
    input_ids=tokenizer.encode(text, return_tensors='pt', max_length=512)
    summary_ids = my_model.generate(input_ids)
    summary_ids
    t5_summary = tokenizer.decode(summary_ids[0])
    print(t5_summary)

In [None]:
inputs = tokenizer.batch_encode_plus([original_text],return_tensors='pt')
summary_ids = model.generate(inputs['input_ids'], early_stopping=True)

In [None]:
bart_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print(bart_summary)

In [None]:
from transformers import T5Tokenizer, T5Config, T5ForConditionalGeneration

# Instantiating the model and tokenizer 
my_model = T5ForConditionalGeneration.from_pretrained('t5-small')
tokenizer = T5Tokenizer.from_pretrained('t5-small')

In [None]:
sentence len

In [None]:
for i in len

In [None]:
# Concatenating the word "summarize:" to raw text
text = "summarize:" + original_text
text

In [None]:
# encoding the input text
input_ids=tokenizer.encode(text, return_tensors='pt', max_length=512)

In [None]:
# Generating summary ids
summary_ids = my_model.generate(input_ids)
summary_ids

In [None]:
# Decoding the tensor and printing the summary.
t5_summary = tokenizer.decode(summary_ids[0])
print(t5_summary)