In [1]:
import json
import pandas as pd
import numpy as np
import torch
import pickle
torch.cuda.empty_cache()
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.model_selection import train_test_split
from termcolor import colored
import pickle
from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences
from transformers import (
    AdamW,
    T5ForConditionalGeneration,
    T5TokenizerFast as T5Tokenizer
)
from tqdm.auto import tqdm

In [2]:
model_t5_eng = pickle.load(open('modeltest4.pkl', 'rb'))  
tokenizer_t5_eng = pickle.load(open('tokenizer.pkl', 'rb'))  

In [3]:
from transformers import pipeline
summarizer = pipeline("summarization", model="sshleifer/distilbart-xsum-6-6")

In [4]:
#with open('tokenizerabout2.pickle', 'rb') as handle:
#    tokenizerabout = pickle.load(handle)
#modelabout = keras.models.load_model('modelabout2.h5')

#with open('tokenizerposneg2.pickle', 'rb') as handle:
#   tokenizerposs = pickle.load(handle)
#modelposs = keras.models.load_model('modelposneg2.h5')

In [13]:
with open('tokenizer.pickle', 'rb') as handle:
    tokenizerabout = pickle.load(handle)
modelabout = keras.models.load_model('modelabout2.h5')

with open('tokenizerposneg.pickle', 'rb') as handle:
    tokenizerposs = pickle.load(handle)
modelposs = keras.models.load_model('modelposneg.h5')

In [14]:
class my_summary_model__t5_eng(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model_t5_eng = pickle.load(open('modeltest4.pkl', 'rb'))  
        self.tokenizer_t5_eng = pickle.load(open('tokenizer.pkl', 'rb'))  
    def forward(self, input_ids, attention_mask, decoder_attention_mask, labels=None):
        output = self.model_t5_eng(
            input_ids,
            attention_mask=attention_mask,
            labels=labels,
            decoder_attention_mask=decoder_attention_mask
        )

        return output.loss, output.logits
    def generate(
        self,
        input_ids,
        attention_mask,
        max_length,
        num_beams=2,
        repetition_penalty=2.5,
        length_penalty=1.0,
        early_stopping=True,
        decoder_start_token_id=None
    ):
        generated_ids = self.model_t5_eng.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_beams=num_beams,
            repetition_penalty=repetition_penalty,
            length_penalty=length_penalty,
            early_stopping=early_stopping,
            decoder_start_token_id=decoder_start_token_id
        )
        preds = []
        for gen_id in generated_ids:
            preds.append(tokenizer_t5_eng.decode(gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True))

        return "".join(preds)

In [15]:
def summ_text_main(text,len_penalty,max_len):
    model_t5_eng = my_summary_model__t5_eng()
    summary = model_t5_eng.generate(
        input_ids=tokenizer_t5_eng(text, max_length=1024, padding='max_length', truncation=True, return_attention_mask=True, add_special_tokens=True, return_tensors='pt')['input_ids'],
        attention_mask=tokenizer_t5_eng(text, max_length=1024, padding='max_length', truncation=True, return_attention_mask=True, add_special_tokens=True, return_tensors='pt')['attention_mask'],
        max_length=max_len,
        num_beams=2,
        repetition_penalty=2.5,
        length_penalty=len_penalty,
        early_stopping=True,
        decoder_start_token_id=0
    ) 
    return summary


In [16]:
def whatabout(news_article):
    class_labels = ['World', 'Sports', 'Business', 'Science/Technology']
    news_article_sequence = tokenizerabout.texts_to_sequences([news_article])
    news_article_sequence = pad_sequences(news_article_sequence, maxlen=170)
    predictions = modelabout.predict(news_article_sequence, verbose=0)
    predicted_class = np.argmax(predictions[0])
    predicted_probability = predictions[0][predicted_class]
    predicted_class_label = class_labels[predicted_class]
    predicted_percentage = "{:.2f}".format(predicted_probability * 100)
    print("Relevance:")
    print(f"Probability: " +predicted_percentage+ "% " + " Predicted Class:", predicted_class_label)


In [17]:
news_article = "The tea stall in Guajarat's Vadnagar where Prime Minister Narendra Modi once sold tea during his childhood is all set to become a tourist spot with the Centre deciding to give it a face-lift. The stall is located on one of the platforms of the Vadnagar Railway station. Converting the stall into a tourist spot is part of a larger project of putting Modi's birthplace Vadnagar in Mehsana district of Gujarat on the world tourism map. Officials of the Ministry of Culture and Tourism and Archaeological Survey of India (ASI) visited the town yesterday. The team of officials was led by Union Culture Mahesh Sharma, who later announced that the original charm of the tea stall will be preserved while giving it a modern touch. Apart from being the birthplace of our PM, Vadnagar is an important historical centre having famous Sharmishta Lake and a step-well. The ASI had recently found remains of a Buddhist Monastery during excavation, which is still going on, Sharma told reporters at Gandhinagar yesterday. PRESERVING THE ORIGINAL CHARM Ahead of the 2014 Lok Sabha elections, Modi had often mentioned that he used to sell tea during his childhood at the Vadnagar railway station along with his father.Inside the Vadnagar railway station, there is a small tea stall, from where our PM had probably started his life's journey. We also want to develop that tea stall as a tourism spot. We will try to preserve the original charm of the tea stall while giving it a modern touch. Our aim is to put Vadnagar on world tourism map, Sharma said. Earlier, Divisional Railway Manager (DRM) of Ahmedabad division Dinesh Kumar had said the entire project of developing Vadnagar and adjoining places in Mehsana district would cost over Rs 100 crore.The development of Vadnagar railway station is one of the components of the Rs 100 crore project to develop Vadnagar, Modhera and Patan as tourist destinations. As of now, the Ministry of Tourism has given Rs eight crore to the state Tourism Department to develop the railway station,Kumar had said."
whatabout(news_article)
print(whatabout)

Relevance:
Probability: 100.00%  Predicted Class: Science/Technology
<function whatabout at 0x000001BED80DFD30>


In [18]:
def posneg(news_article):
    class_labels = ['Irrelevant', 'Negative', 'Positive', 'Neutral']
    news_article_sequence = tokenizerposs.texts_to_sequences([news_article])
    news_article_sequence = pad_sequences(news_article_sequence, maxlen=100)
    predictions = modelposs.predict(news_article_sequence, verbose=0)
    predicted_class_index = np.argmax(predictions)
    predicted_class_label = class_labels[predicted_class_index]
    predicted_probability = predictions[0][predicted_class_index]
    predicted_percentage = "{:.2f}".format(predicted_probability * 100)
    print("Polarity:")
    print("Predicted Probability:", predicted_percentage, "%" + " Predicted Class:", predicted_class_label)


In [19]:
news_article ="A 38-year-old woman in the US, who was apprehended twice for allegedly trying to jump the White House fence last week, has been arrested for scaling a fence at the Treasury Building next to the White House."

posneg(news_article)
print(posneg)


Polarity:
Predicted Probability: 66.09 % Predicted Class: Irrelevant
<function posneg at 0x000001BED8394040>


In [22]:
import tkinter as tk
import time
from tkinter import *
from tkinter import filedialog
from tkinter import messagebox
def on_scale_change(event):
    global lenofsum
    global maxlen
    global minlensum
    global maxlensum
    new_value = scale.get()
    lenofsum=0.5
    maxlen=50
    minlensum=10
    maxlensum=50
    if new_value == default_value:
        return  
    elif new_value != default_value:
        if new_value <= 50:
            lenofsum = 0.5
            maxlen = 50
            minlensum = 10
            maxlensum = 50
            return 
        elif 51 <= new_value <= 75:
            lenofsum = 2.00
            maxlen = 100
            minlensum = 25
            maxlensum = 90
            return
        else:
            lenofsum = 3.00
            maxlen = 150
            minlensum = 45
            maxlensum = 150
            return
is_on = False
def whatabout(news_article):
    start_time = time.time()
    class_labels = ['World', 'Sports', 'Business', 'Science/Technology']
    news_article_sequence = tokenizerabout.texts_to_sequences([news_article])
    news_article_sequence = pad_sequences(news_article_sequence, maxlen=170)
    predictions = modelabout.predict(news_article_sequence, verbose=0)
    predicted_class = np.argmax(predictions[0])
    predicted_probability = predictions[0][predicted_class]
    predicted_percentage = "{:.2f}".format(predicted_probability * 100)
    predicted_class_label = class_labels[predicted_class]
    end_time = time.time()
    execution_time = end_time - start_time
    summary.insert('1.0', f"Relevance: \n" +
               "Predicted Probability: " + predicted_percentage + " %" +
               " Predicted Class: " + predicted_class_label +
               " Execution time: {:.5f}".format(execution_time) + " seconds\n")
    
def posneg(news_article):
    start_time = time.time()
    class_labels = ['Irrelevant', 'Negative', 'Positive', 'Neutral']
    news_article_sequence = tokenizerposs.texts_to_sequences([news_article])
    news_article_sequence = pad_sequences(news_article_sequence, maxlen=170)
    predictions = modelposs.predict(news_article_sequence, verbose=0)
    predicted_class_index = np.argmax(predictions)
    predicted_class_label = class_labels[predicted_class_index]
    predicted_probability = predictions[0][predicted_class_index]
    predicted_percentage = "{:.2f}".format(predicted_probability * 100)
    end_time = time.time()
    execution_time = end_time - start_time
    summary.insert('1.0', f"Polarity: \n" +
               "Predicted Probability: " + predicted_percentage + " %" +
               " Predicted Class: " + predicted_class_label +
               " Execution time: {:.5f}".format(execution_time) + " seconds\n")
def onoff():
    global is_on
    if is_on:
        on_button.config(image=off)
        is_on = False
    else:
        on_button.config(image=on)
        is_on = True

def summarize():
    global is_on
    global lenofsum
    global maxlen
    global minlensum
    global maxlensum
    input_text = intext.get('1.0', tk.END) 
    if input_text.strip() == "":
        return
    else:
        if is_on == False:
            start_time = time.time()
            textout = summ_text_main(input_text, lenofsum, maxlen)
            end_time = time.time()
            execution_time = end_time - start_time
            input_word_count = len(input_text.split())
            textout_word_count = len(textout.split())
            summary.config(state=tk.NORMAL, bg='white', fg='black')
            summary.delete('1.0', tk.END)
            summary.insert(tk.END, f"Summarize: {textout}\n")
            summary.insert(tk.END, f"Execution time: {execution_time:.5f} seconds\n")
            summary.insert(tk.END, f"Original Word Count: {input_word_count}\n")
            summary.insert(tk.END, f"Summarized Word Count: {textout_word_count}\n")
            posneg(input_text)
            whatabout(input_text)
            summary.config(bg='#dddddd')

        if is_on == True:
            start_time = time.time()
            summarybert = summarizer(input_text, max_length=maxlensum, min_length=minlensum, do_sample=False)
            summout = summarybert[0]['summary_text']
            end_time = time.time()
            execution_time = end_time - start_time
            summary.config(state=tk.NORMAL, bg='white', fg='black')
            summary.delete('1.0', tk.END)
            summary.insert('1.0', f"Summarize: {summout}")
            execution_time_formatted = "{:.1f}".format(execution_time)
            summary.insert(tk.END, f"\nExecution time: {execution_time_formatted} seconds")
            input_word_count = len(input_text.split())
            summout_word_count = len(summout.split())
            summary.insert(tk.END, f"\nOriginal Word Count: {input_word_count}")
            summary.insert(tk.END, f"\nSummarized Word Count: {summout_word_count}")
            posneg(input_text)
            whatabout(input_text)
            summary.config(bg='#dddddd')


def clearup():#botton to clear input and summ boxes
    intext.delete("1.0", tk.END)
    summary.delete("1.0", tk.END)
def copysum():#coppy the summ in the box
    summary.clipboard_clear()
    summary.clipboard_append(summary.get("1.0", tk.END))
def dowloadsum():#dowloadıng summ as txt
    output_str = summary.get('1.0', tk.END)
    if output_str.strip() == "":
        return
    file_path = filedialog.asksaveasfilename(defaultextension=".txt")
    with open(file_path, 'w') as f:
        f.write(output_str)

def exitfrom():#exit botton
    root.destroy()
def show_about():#info about me
    messagebox.showinfo("About", "Deep learning summarization tool \nMade by Danylo Fedorov \n")
def select_file():#menu bottun to select txt files insides text
    file_path = filedialog.askopenfilename(defaultextension=".txt", filetypes=[("Text Files", "*.txt")])
    if file_path:
        with open(file_path, "r") as file:
            content = file.read()
            intext.delete(1.0, tk.END)
            intext.insert(tk.END, content)

root = tk.Tk()

original_text = " "
root.title("AI")
root.geometry('1600x900')
inputlabel=tk.Label(root,text="Input:")
inputlabel.pack()
intext = tk.Text(root,height=25,width=140)
intext.pack()
slabel = tk.Label(root,text="Summary:")
slabel.pack()
summary=tk.Text(root,height=15,width=140)
summary.config(bg='#dddddd')
summary.pack()
menu_bar = tk.Menu(root)
file_menu = tk.Menu(menu_bar, tearoff=0)
file_menu.add_command(label="Open", command=select_file)
file_menu.add_separator()
file_menu.add_command(label="Exit", command=root.destroy)
menu_bar.add_cascade(label="File", menu=file_menu)
help_menu = tk.Menu(menu_bar, tearoff=0)
help_menu.add_command(label="About", command=show_about)
menu_bar.add_cascade(label="Help", menu=help_menu)
root.config(menu=menu_bar)


default_value = 0
scale = tk.Scale(root, from_=0, to=100, orient=tk.HORIZONTAL, length=100, showvalue=0)
scale.set(default_value)
scale.place(x=1415, y=20)
scale.bind("<ButtonRelease-1>", on_scale_change)
scale2 = tk.Label(root,text="Summary Length:")
scale2.place(x=1415, y=2)
scale3 = tk.Label(root,text="∘Short")
scale3.place(x=1375, y=18)
scale4 = tk.Label(root,text="∘Long")
scale4.place(x=1520, y=18)
#bottons
btn=tk.Button(root,text="Summarize",command=summarize)
btn.place(x=580, y=690)
clr=tk.Button(root,text=" Clear ",command=clearup)
clr.place(x=655, y=690)
ext=tk.Button(root,text=" Dowload ",command=dowloadsum)
ext.place(x=705, y=690)
ext=tk.Button(root,text=" Copy ",command=copysum)
ext.place(x=775, y=690)
ext=tk.Button(root,text=" Exit ",command=exitfrom)
ext.place(x=825, y=690)

on = PhotoImage(file="A.png")
off = PhotoImage(file="B.png")
on_button = Button(root, image=off, bd=0, command=onoff)
on_button.place(x=1455, y=55)

scale3 = tk.Label(root,text="∘Abstractive")
scale3.place(x=1375, y=55)
scale4 = tk.Label(root,text="∘Extractive")
scale4.place(x=1520, y=55)

root.mainloop()