In [1]:
import torch
import requests
import json
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config



In [2]:
'''Get Data from DB '''
def get_data_from_db():
    host = 'https://search-solytics-tzxvnvrvkscgklaedz5lr3iqxu.ap-south-1.es.amazonaws.com/news_test_list/_search'
    json_body = '''{
        "query": {
                "bool": {
                    "must_not": {
                        "exists":{
                            "field":"sentiment_ML"
                            }
                        }
                    }
                }
    }'''
    headers = {
        'Content-Type': 'application/json',
    }
    params = {
        'size':1000
    }
    resp = requests.get(host,params=params, headers=headers, data=json_body)
    resp_text = json.loads(resp.text)
    document_list = []

    # print(resp)
    for data in resp_text['hits']['hits']:
        content_list = {}
        content_list["id"] = data["_id"]
        content_list["content"] = data["_source"]["Content"]
        document_list.append(content_list)
    return document_list


In [3]:
device = torch.device('cuda') 

In [4]:
data = get_data_from_db()
df = pd.DataFrame(data)

In [5]:
from tqdm import tqdm

In [6]:
import time
import gc
start = time.time()
result=[]
'''GPU model'''
model = T5ForConditionalGeneration.from_pretrained('t5-small').to(device)
tokenizer = T5Tokenizer.from_pretrained('t5-small')
text_len = []
cnt=0
for text in tqdm(df['content']):
    try:
        '''If artice could not be possible to evaluate through GPU'''
        if(len(text.split())>2000):
            text = text.split()
            print(len(text))
            text = text[:2000]
            text = ' '.join(text)

        preprocess_text = text.strip().replace("\n","")
        t5_prepared_Text = "summarize: "+preprocess_text
        tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt").to(device)
        summary_ids = model.generate(tokenized_text,
                                        num_beams=4,
                                        no_repeat_ngram_size=2,
                                        min_length=30,
                                        max_length=100,
                                        early_stopping=True)
        output = tokenizer.decode(summary_ids[0])
        print(output)
        result.append(output)
        gc.collect()
        torch.cuda.empty_cache()
    except Exception as e:
        print(e)
        print(len(text.split()))
        gc.collect()
        torch.cuda.empty_cache()

gc.collect()
torch.cuda.empty_cache()

del model
torch.cuda.empty_cache()

del tokenizer
torch.cuda.empty_cache()

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1197.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=242065649.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…

  0%|          | 0/574 [00:00<?, ?it/s]




  0%|          | 1/574 [00:03<30:49,  3.23s/it]

the ROG Strix Hero II isn’t the best gaming notebook that money can buy. however, there aren't many that can outperform it on a strictly price-for-performance basis. if you’re looking for something like Call of Duty, Counter-Strike or Overwatch, the Hero is the option you want.


  0%|          | 2/574 [00:04<25:48,  2.71s/it]

archbishop Ezekiel Kondo Kumir Kuku has been installed as the first Archbisbisp of the newest 39th province of anglican Communion in the Sudan. by the Archhop of Canterbury, by... at the All... the all....... he is the only archi... to be installed in a new state - the new archb... is 'the first archa... by anarchia! 


  1%|          | 3/574 [00:06<24:04,  2.53s/it]

the first company designated today is International Investments Holding S.A. de C.V., an asset holding company registered in Guadalajara, Mexico. the company is owned or controlled by Los Cuinis leader Abigael Gonzalez Valencia and his wife, Jeniffer Beaney Camacho Cazares, who assists her husband’s drug trafficking activities.


  1%|          | 4/574 [00:09<25:32,  2.69s/it]

the user is forbidden to use any automatic systems or equipment (robots) in order to access the system without a written approval from Lursoft. the information in the databases is of informative nature and it has no legal power, it does not bear any responsibility for actions or decisions that are based on the service provided.


  1%|          | 5/574 [00:11<22:38,  2.39s/it]

Nikolay Vasilyevich Ustryalov was a leading pioneer of Russian national Bolshevism. he was born in Saint Petersburg and graduated in law from Moscow University in 1913.


  1%|          | 6/574 [00:13<20:19,  2.15s/it]

Iran has sought "permanent authorisation" from India for its two local insurance companies. the proposal was put forth by a Managing Director at the India office of Iran's state-run national Iranian oil company last month. previously, India struggled to get tankers and insurance for transporting oil from Iran after the US and the European Union imposed sanctions between July 2012 and January 2014 on the Islamic Republic.


  1%|          | 7/574 [00:14<19:20,  2.05s/it]

Julio César  ⁇ lvarez Montelongo, con su nombre real, fue miembro de la Banda Ms para después perseguir una carrera en solitario.


  1%|▏         | 8/574 [00:16<18:03,  1.91s/it]

mounir el-Motassadeq is jailed in Germany for helping three of the sept. 11 hijackers. he paid tuition and rent for all three al-Qaeda terrorists to keep the appearance of being students, prosecutors said spokesman said.


  2%|▏         | 9/574 [00:18<18:55,  2.01s/it]

OCEAN MARITIME MANAGEMENT COMPANY LIMITED has identified 17 vessels as blocked property of the entity whose property and interests in property are blocked pursuant to E.O. 13551: Vessels. the entities and vessels named above have been added to OFAC's list of specially designated nationals and Blocked Persons with the identifying tag  ⁇ DPRK''
is responsible for developing the surface-to-surface Sejjil missile, a two-stage solid-propellant ballistic missile. owned and controlled by the IRGC IranAerospace Industries Organization (AIO)


  2%|▏         | 11/574 [00:21<15:39,  1.67s/it]

our award winning luxury resort is located on the tropical jungle island of Boca Brava, perched high up, with 360 degree views of some of the most prolific big game fishing grounds in the world. re-named the “Jurassic park” in 2001 by our first clients, shocked by the sheer size of “Monsterfish” they encountered fishing the waters surrounding Isla Montuosa and Hannibal Bank.
3552


  2%|▏         | 12/574 [00:24<19:43,  2.11s/it]

a neo-ba'athist party has ruled Syria continuously since the 1963 coup d'état. the party was founded on 7 April 1947 by Michel Aflaq, Salah al-Din al -Bitar and followers of Zaki Al-Arsuzi (an Alawite) in 1954, the parties merged with the Arab Socialist Party (ASP) to weaken the power of Syria's


  2%|▏         | 13/574 [00:26<18:52,  2.02s/it]

the first or paternal family name isDelcy Elo ⁇ na Rodr ⁇ guez Gómez. she has been vice president of Venezuela since 14 June 2018.[1][2] she was also a minister of Popular Power for Communication and Information of the Venezuela from 2013[3] to 2014[4]


  2%|▏         | 14/574 [00:28<17:58,  1.93s/it]

mexicanas fueron se ⁇ aladas por la Embajada de los Estados Unidos en Tijuana.


  3%|▎         | 15/574 [00:30<18:09,  1.95s/it]

derrière ces deux identités virtuelles se cachait un Gardois de 22 ans, Brahim El Khayari, qui avait très tôt prêté allégeance à Daech, pour rejoindre les rangs du califat de l’État islamique dont il faisait partie depuis de longs mois.


  3%|▎         | 16/574 [00:31<15:20,  1.65s/it]

the interactive transcript could not be loaded.Rating is available when the video has been rented. this feature is not available right now. please try again later later.


  3%|▎         | 17/574 [00:32<14:30,  1.56s/it]

the band was founded in 2006 in Kidal, mali. they mix traditional African music with western rock and pop influences - sing in Tamashek 'tamikrest' the main songwriter and leader is Ousmane Ag Mossa.Their music is characterized by electric guitars and vocals.


  3%|▎         | 18/574 [00:33<12:08,  1.31s/it]

we use cookies to understand how you use our site and improve your experience. to learn more, review our Cookie Policy. we accept cookies and privacy policies.


  3%|▎         | 19/574 [00:34<13:15,  1.43s/it]

a dos miembros de ETA en el pa ⁇ s Vasco francésEl ministro del Interior ha anunciado este miércoles. los titulares of la casa de Ossès también fueron arrestados los due ⁇ os du vivienda.


  3%|▎         | 20/574 [00:36<15:08,  1.64s/it]

the independent movement of Absolute Renovation (MIRA) is a social and political party in Colombia. it was founded on March 21, 2000 by 51,095 Colombians led by lawyer and former senator Carlos Alberto Baena and ex-senator Alexandra Moreno Piraquive. the party also has representation in the Chamber of Representatives and the Senate of the Republic of Colombia.


  4%|▎         | 21/574 [00:38<14:42,  1.60s/it]

the NCC provides cybersecurity leadership, services, training and a cybersecurity community for public officials, business executives and the workforce. the nCC is the national national resource resource for cybersecurity leaders, businesses and businesses..


  4%|▍         | 22/574 [00:41<17:36,  1.91s/it]

a 15-member "assassination squad" allegedly sent to target the journalist. it said the team checked into two hotels in Istanbul on Oct. 2 and left later that day, according to the report. the reports named the following Saudis as the squad members:1- Meshal Saad M Albostani (Born 1987) the footage appears to come from surveillance cameras, which would have been posted throughout the district housing the consulate and


  4%|▍         | 23/574 [00:41<14:35,  1.59s/it]

sign up for our newsletter and discover travel experiences you’ll really want to try.By signing up, you agree to receive promotional emails. you can unsubscribe at any time.


  4%|▍         | 24/574 [00:43<16:43,  1.82s/it]

la Polic ⁇ a Nacional ha detenido en Madrid al narco colombiano Germán Gonzalo Sánchez Rey. la operación se ha coordinado de forma conjunta con la oficina central cianional de INTERPOL in Madrid. l'argumento está relacionado con el tráfico de coca ⁇ 





KeyboardInterrupt: 

In [10]:
result[5]

'Iran has sought "permanent authorisation" from India for its two local insurance companies. the proposal was put forth by a Managing Director at the India office of Iran\'s state-run national Iranian oil company last month. previously, India struggled to get tankers and insurance for transporting oil from Iran after the US and the European Union imposed sanctions between July 2012 and January 2014 on the Islamic Republic.'

In [11]:
df['content'][5]

'Iran has sought "permanent authorisation" from India for its two local insurance companies to enable Iranian ships with Iranian insurance cover to call at Indian ports to help maintain oil supplies from the Persian Gulf nation when new sanctions imposed by the US take effect in November.\nThe proposal was put forth by M Sarmadi, Managing Director at the India office of Iran’s state-run National Iranian Oil Company, during a meeting with the top officials of the Shipping Ministry last month, reports the Hindu Business Line citing a document related to this issue.\nPreviously, India struggled to get tankers and insurance for transporting oil from Iran after the US and the European Union imposed sanctions between July 2012 and January 2014 on the Islamic Republic, forcing western insurers to halt insuring ships hauling crude from Iran. Following the sanctions, London-based International Group of Protection and Indemnity Clubs stopped providing third party liability cover to ships hauling

In [None]:
text = '''Prime Minister Narendra Modi’s first stop in Uttar Pradesh’s Ayodhya on August 5 will be a shrine dedicated to Lord Hanuman, where he will offer prayers ahead of the grand foundation laying ceremony for the construction of the Ram temple.

Mahant Raju Das, the head priest of Hanumangarhi, said they have been allocated seven minutes for Prime Minister’s rituals at the temple, according to news agency ANI.

Vedic priests will escort the Prime Minister while reciting mantras to the Hanumangarhi temple.'''

In [None]:
preprocess_text = text.strip().replace("\n","")
t5_prepared_Text = "summarize: "+preprocess_text
tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt").to(device)
summary_ids = model.generate(tokenized_text,
                                num_beams=4,
                                no_repeat_ngram_size=2,
                                min_length=30,
                                max_length=100,
                                early_stopping=True)
output = tokenizer.decode(summary_ids[0])
print(output)