In [None]:
!nvidia-smi

In [1]:
import asyncio

In [2]:
import torch
from langchain import HuggingFacePipeline, PromptTemplate
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from transformers import AutoTokenizer, TextStreamer, pipeline , GenerationConfig
from deep_translator import GoogleTranslator
from transformers import T5Tokenizer, T5ForConditionalGeneration
from auto_gptq import AutoGPTQForCausalLM
from langchain.chains.summarize import load_summarize_chain
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

In [3]:
print(DEVICE)

cuda:0


In [4]:
# MBZUAI/LaMini-Flan-T5-248M

In [5]:
#model and tokenizer loading
checkpoint = "MBZUAI/LaMini-Flan-T5-248M"
tokenizer = T5Tokenizer.from_pretrained(checkpoint)
base_model = T5ForConditionalGeneration.from_pretrained(checkpoint, device_map='auto', torch_dtype=torch.float32)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


# Data

In [11]:
%%time
loader = PyPDFDirectoryLoader("pdfs")
docs = loader.load()
len(docs)

CPU times: total: 828 ms
Wall time: 817 ms


20

In [12]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(docs)
len(texts)

54

In [13]:
pipe_sum = pipeline(
        'summarization',
        model = base_model,
        tokenizer = tokenizer,
        max_length = 500, 
        min_length = 50)

In [14]:
final_texts = ""
for text in texts:
    print(text)
    final_texts = final_texts + text.page_content

page_content='Anxiety Disorders:\nParents’ \nMedication Guide' metadata={'source': 'pdfs\\anxiety-parents-medication-guide.pdf', 'page': 0}
page_content="Anxiety Disorders:  \nParents' Medication Guide Work Group\nCO-CHAIRS:\nJohn T. Walkup, MD  \nJeffrey R. Strawn, MD\nMEMBERS:\nKareem Ghalib, MD  \nKimberly A. Gordon, MD  \nTanya Murphy, MD, MS  \nDaniel S. Pine, MD  \nAdelaide S. Robb, MD  \nMoira A. Rynn, MD  \nTimothy E. Wilens, MD\nSTAFF:\nCarmen J. Thornton, MPH, CHES, Director, Research, Development, & Workforce  \nSarah Hellwege, MEd, Assistant Director, Research, Training, & Education\nCONSULTANT:\nEsha Gupta, Medical Science Writer\nThe American Academy of Child and Adolescent Psychiatry promotes the healthy \ndevelopment of children, adolescents, and families through advocacy, education,  \nand research. Child and adolescent psychiatrists are the leading physician authority \non children’s mental health.\n©2020 American Academy of Child and Adolescent Psychiatry, all rights

In [15]:
final_texts = final_texts.strip()

In [16]:
final_texts = final_texts.replace('.' , '')

In [18]:
final_texts = final_texts.replace('\n' , ' ')

In [19]:
final_texts



In [8]:
sample_text = '''Once upon a time, in a land far, far away, there lived a young girl named Amelia. Amelia lived in a small cottage at the edge of a dense forest with her mother, father, and younger brother, Max. The forest was a magical place, filled with all sorts of creatures, both friendly and fearsome. Amelia and Max loved to explore the forest, and they had many adventures there.

One day, while Amelia and Max were exploring a particularly dense part of the forest, they stumbled upon a hidden cave. The cave was tucked away behind a thicket of bushes and was almost invisible unless you knew where to look. Amelia and Max were curious and decided to venture inside.
a
The inside of the cave was dark and damp, but Amelia and Max were not afraid. They had brought a lantern with them, and Amelia lit it as they ventured further into the cave. As they walked deeper into the cave, they noticed strange markings on the walls. They were unlike anything they had ever seen before.

Amelia and Max continued to explore the cave, and as they ventured deeper, they came across a large chamber. In the center of the chamber was a pedestal with a small, glowing object resting on it. Amelia approached the pedestal and picked up the object. It was a small, round stone that emitted a soft, warm light.

As soon as Amelia picked up the stone, the ground began to tremble, and the walls of the cave started to shake. Amelia and Max were frightened and ran out of the cave as fast as they could. When they reached the entrance of the cave, they turned around and saw that the cave had collapsed behind them.

Amelia and Max were relieved to have escaped the cave, but they were also curious about the stone Amelia had found. They decided to take it home with them and show it to their parents.

When they arrived home, their parents were surprised to see them with the stone. Amelia and Max explained how they had found it in the cave and how the cave had collapsed after they took the stone. Their parents were worried but also intrigued by the stone.

Amelia's father, a scholar and historian, examined the stone carefully. He recognized the markings on the stone as an ancient language that had long been forgotten. He believed that the stone was a key to unlocking a hidden treasure that was said to be buried deep within the forest.

Excited by this discovery, Amelia and Max decided to go on a quest to find the hidden treasure. Their parents were worried about the dangers they might encounter along the way, but they knew that Amelia and Max were resourceful and capable.

The next morning, Amelia and Max set out on their adventure. They packed a bag with food, water, and other supplies, and headed into the forest. They followed a map that their father had given them, which he believed would lead them to the hidden treasure.

As they ventured deeper into the forest, Amelia and Max encountered all sorts of creatures, some friendly and some not so friendly. They came across a river that was too wide to cross, so they built a raft and sailed across. They climbed steep hills, crossed narrow bridges, and navigated through dense thickets.

After several days of traveling, Amelia and Max arrived at the location marked on the map. It was a small clearing in the middle of the forest, surrounded by tall trees. In the center of the clearing was a large rock with a hole in the center.

Amelia and Max approached the rock and examined the hole. It was just large enough for the stone that Amelia had found in the cave. Amelia placed the stone into the hole, and as she did so, the ground began to tremble again.

This time, however, the trembling did not stop. The ground cracked open, and a hidden staircase appeared. Amelia and Max were excited and a little bit nervous as they descended the staircase.

At the bottom of the staircase was a large chamber, similar to the one they had found in the cave. In the center of the chamber was a pedestal with a chest resting on it. Amelia and Max approached the chest and opened it.

Inside the chest was a collection of gold coins, jewels, and other valuable items. Amelia and Max were thrilled to have found the hidden treasure. They carefully packed the treasure into their bags and began the journey home.

When they arrived home, their parents were relieved to see them safe and sound. Amelia and Max showed them the treasure they had found, and their parents were amazed. They were proud of their children for their bravery and resourcefulness.

Amelia and Max were heroes in their village, and the story of their adventure became legendary. They had found the hidden treasure and returned home safely, proving that with courage and determination, anything is possible.'''

In [20]:
result = pipe_sum(final_texts)

In [21]:
print(result[0]['summary_text'])

