In [40]:
import os 
from dotenv import load_dotenv
load_dotenv()


# load env variable
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
os.environ['HUGGINGFACE_TOKEN'] = os.getenv('HUGGINGFACE_TOKEN')

from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.llm import LLMChain
from langchain.document_loaders import PyPDFLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.schema import (HumanMessage, AIMessage, SystemMessage)

In [2]:
llm = ChatGroq(model="llama3-70b-8192")
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000197B9DE6710>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000197B9E9C850>, model_name='llama3-70b-8192', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [3]:
speech = """
Artificial Intelligence (AI) is the simulation of human intelligence in machines that are programmed to think, learn, and make decisions. It is a broad field of computer science that includes subfields such as machine learning, deep learning, natural language processing (NLP), robotics, and computer vision. AI is increasingly becoming an integral part of modern life, powering technologies like voice assistants (e.g., Siri, Alexa), recommendation systems (e.g., Netflix, Amazon), self-driving cars, chatbots, and healthcare diagnostics.

Machine learning, a key branch of AI, involves algorithms that allow computers to learn from data without being explicitly programmed. Deep learning, a subset of machine learning, uses neural networks to solve complex problems such as image and speech recognition. NLP enables machines to understand and generate human language, which is crucial for translation services, sentiment analysis, and intelligent chatbots.

AI has significant applications across various industries. In healthcare, AI assists in early disease detection and personalized treatment. In finance, it’s used for fraud detection and algorithmic trading. In manufacturing, AI improves efficiency through predictive maintenance and automation. The education sector benefits from personalized learning experiences, while agriculture uses AI for crop monitoring and yield prediction.

Despite its benefits, AI also presents challenges. Ethical concerns such as data privacy, algorithmic bias, and job displacement must be addressed. The development of responsible and transparent AI systems is essential to ensure fairness and trust.

As AI continues to evolve, it holds the potential to solve some of the world’s most pressing problems. However, the focus must remain on using AI for the betterment of humanity, with regulations and guidelines to ensure its safe and ethical deployment. Education and public awareness will play a key role in shaping an AI-driven future that is inclusive, equitable, and beneficial for all.
"""


In [4]:
llm.get_num_tokens(speech)

  from .autonotebook import tqdm as notebook_tqdm


381

### Way One for summarization

In [5]:
chat_message = [
    SystemMessage(content="You are an expert at summarizing speeches."),
    HumanMessage(content=f"Provide a short and concise summary of the following speech:\n\n{speech}")
]

In [6]:
llm(chat_message)

  llm(chat_message)


AIMessage(content='Here is a short and concise summary of the speech:\n\nArtificial Intelligence (AI) is a broad field that simulates human intelligence in machines, with applications in healthcare, manufacturing, finance, education, and more. While it presents challenges like data privacy and job displacement, AI also has the potential to solve pressing global problems. To ensure its safe and ethical deployment, regulations, education, and public awareness are crucial in shaping an AI-driven future that benefits all.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 93, 'prompt_tokens': 397, 'total_tokens': 490, 'completion_time': 0.303817878, 'prompt_time': 0.012674057, 'queue_time': 0.20686162800000002, 'total_time': 0.316491935}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_dd4ae1c591', 'finish_reason': 'stop', 'logprobs': None}, id='run--c5569ad2-d10d-4adb-b552-f7a51e51bbf2-0', usage_metadata={'input_tokens': 397, 'output_tokens': 93, 't

In [7]:
# getting the summary content
llm(chat_message).content

"Here is a concise summary of the speech:\n\nArtificial Intelligence (AI) is a rapidly growing field that simulates human intelligence in machines, with applications in healthcare, manufacturing, finance, education. While AI has the potential to solve pressing global problems, it also raises ethical concerns such as data privacy, bias, and job loss. To harness AI's benefits, it's essential to develop responsible and transparent systems, ensure regulations, and promote public awareness to create a safe, equitable, and inclusive AI-driven future."

### Way two for summarization

In [8]:
prompt_two = ChatPromptTemplate.from_template(
    """
    Your task has two steps:
    1. Write a concise summary of the following speech: {speech}.
    2. Translate that summary into {language}.
    """
)

In [9]:
llm_chain = LLMChain(llm=llm, prompt=prompt_two)
bangla_summary = llm_chain.run({"speech": speech, "language": "bangla"})

  llm_chain = LLMChain(llm=llm, prompt=prompt_two)
  bangla_summary = llm_chain.run({"speech": speech, "language": "bangla"})


In [32]:
bangla_summary

"**Summary:**\n\nArtificial Intelligence (AI) is the simulation of human intelligence in machines that can think, learn, and make decisions. AI is a broad field that includes machine learning, deep learning, natural language processing, robotics, and computer vision. It has various applications across industries, including disease detection, personalized treatment, fraud detection, and personalized learning experiences. However, AI also raises ethical concerns such as data bias, job displacement, and the need for responsible and transparent AI systems. As AI continues to evolve, it's essential to focus on using it for the betterment of humanity with regulations and public awareness.\n\n**Bangla Translation:**\n\nকৃত্রিম বুদ্ধিমত্তা (AI) হল মানব বুদ্ধিমত্তার সিমুলেশন যা মেশিন চিন্তা, শিখা এবং সিদ্ধান্ত নেওয়ার ক্ষমতা রাখে। AI একটি ব্যাপক কম্পিউটার বিজ্ঞান ক্ষেত্র যা অন্তর্ভুকরে মেশিন লার্নিং, ডিপ লার্নিং, ন্যাচারাল ল্যাঙ্গুয়েজ প্রসেসিং, রোবোটিক্স এবং কম্পিউটার ভিশন। এটি বিভিন্ন শিল্পের

In [31]:
french_summary = llm_chain.run({"speech": speech, "language": "french"})
french_summary

"Here are the two steps:\n\n**Step 1: Concise Summary**\n\nArtificial Intelligence (AI) simulates human intelligence in machines, enabling them to think, learn, and decide. AI is a broad field encompassing machine learning, deep learning, natural language processing, robotics, and computer vision. It powers technologies like voice assistants, self-driving cars, chatbots, and healthcare diagnostics. While AI has significant applications across industries, it also raises ethical concerns like data privacy, algorithmic bias, and job displacement. Responsible AI development is crucial for ensuring fairness and trust.\n\n**Step 2: French Translation**\n\nL'Intelligence Artificielle (IA) simule l'intelligence humaine dans les machines, leur permettant de penser, d'apprendre et de décider. L'IA est un vaste domaine qui englobe l'apprentissage automatique, l'apprentissage profond, le traitement du langage naturel, la robotique et la vision par ordinateur. Elle alimente les technologies telles 

#### StuffDocumentsChain 
If you have a small pdf then you can use StuffDocumentsChain. because it All the information can be given to the LLM at once.
Problem: token limit problem

In [36]:
loader = PyPDFLoader("../pdf/apjspeech.pdf")
docs = loader.load_and_split()
docs

[Document(metadata={'producer': 'GPL Ghostscript 8.15', 'creator': 'PScript5.dll Version 5.2', 'creationdate': 'D:20070730160943', 'moddate': 'D:20070730160943', 'title': 'Microsoft Word - Document1', 'author': 'Shri', 'source': '../pdf/apjspeech.pdf', 'total_pages': 7, 'page': 0, 'page_label': '1'}, page_content='A P J Abdul Kalam Departing speech \n \n \nFriends, I am delighted to address you all, in the country and those livi ng abroad, after \nworking with you and completing five beautiful and eventful years in Rashtrapati \nBhavan. Today, it is indeed a thanks giving occasion. I would like to narr ate, how I \nenjoyed every minute of my tenure enriched by the wonderful assoc iation from each one \nof you, hailing from different walks of life, be it politics, sci ence and technology, \nacademics, arts, literature, business, judiciary, administration, local bodies, farming, \nhome makers, special children, media and above all from the youth and st udent \ncommunity who are the futur

In [48]:
prompt = ChatPromptTemplate.from_template(
    """
        write a short summary of the following speech
        Speech : {text} 
        Translate that summary into {language}
    """
)

In [49]:
prompt

ChatPromptTemplate(input_variables=['language', 'text'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['language', 'text'], input_types={}, partial_variables={}, template='\n        write a short summary of the following speech\n        Speech : {text} \n        Translate that summary into {language}\n    '), additional_kwargs={})])

In [52]:
chain = load_summarize_chain(llm=llm, chain_type="stuff", prompt=prompt)
summary = chain.run({"input_documents": docs, "language": "bangla"})
summary

"Here is a summary of Dr. A.P.J. Abdul Kalam's departing speech as the President of India:\n\n**Summary in English**\n\nIn his farewell address, Dr. A.P.J. Abdul Kalam reflected on his five-year tenure as President, highlighting the country's progress and the need for further development. He emphasized the importance of empowering villages, mobilizing rural resources, and accelerating development to achieve a developed India by 2020. He also stressed the need for connectivity, defeating problems, and overcoming calamities through partnership. He shared his experiences of meeting people from various walks of life, including farmers, scientists, and youth, who inspired him with their courage and determination. He expressed his confidence in India's ability to become a developed nation before 2020, with the power of its 540 million youth.\n\n**Summary in Bangla**\n\nড. এ.পি.জে. আব্দুল কালামের বিদায়ী ভাষণে, তিনি তাঁর পাঁচ বছরের রাষ্ট্রপতি হিসেবে কর্মজীবন সম্পর্কে আলোচনা করেন। তিনি দেশের প

#### Map Reduce

In [53]:
load_pdf = PyPDFLoader('../pdf/sensors-21-03261-v2.pdf')
documents = load_pdf.load()
documents

[Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2021-05-10T11:33:04+08:00', 'author': 'Salman Md Sultan; Muhammad Waleed; Jae-Young Pyun; Tai-Won Um', 'keywords': 'deep reinforcement learning; internet of things; target tracking; best sensor selection; energy consumption', 'moddate': '2021-05-10T11:40:37+08:00', 'subject': 'The Internet of Things (IoT)-based target tracking system is required for applications such as smart farm, smart factory, and smart city where many sensor devices are jointly connected to collect the moving target positions. Each sensor device continuously runs on battery-operated power, consuming energy while perceiving target information in a particular environment. To reduce sensor device energy consumption in real-time IoT tracking applications, many traditional methods such as clustering, information-driven, and other approaches have previously been utilized to select the best sensor. However, applying machin

In [54]:
len(documents)

22

In [55]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [59]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
split_document = text_splitter.split_documents(documents)
split_document

[Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2021-05-10T11:33:04+08:00', 'author': 'Salman Md Sultan; Muhammad Waleed; Jae-Young Pyun; Tai-Won Um', 'keywords': 'deep reinforcement learning; internet of things; target tracking; best sensor selection; energy consumption', 'moddate': '2021-05-10T11:40:37+08:00', 'subject': 'The Internet of Things (IoT)-based target tracking system is required for applications such as smart farm, smart factory, and smart city where many sensor devices are jointly connected to collect the moving target positions. Each sensor device continuously runs on battery-operated power, consuming energy while perceiving target information in a particular environment. To reduce sensor device energy consumption in real-time IoT tracking applications, many traditional methods such as clustering, information-driven, and other approaches have previously been utilized to select the best sensor. However, applying machin

In [60]:
len(split_document)

92

In [61]:
map_prompt = ChatPromptTemplate.from_template(
    """ 
        write a short summary of the following speech
        speech: {text}
    """
)

In [62]:
combine_prompt = ChatPromptTemplate.from_template(
    """
    provide the full summary of the entier speech with these important points.
    Section headings as paragraph titles (e.g., Introduction, Abstract, Preliminaries, Conclusion). Keep the summary concise, clear, and preserve the technical meaning.    
    Here is the content to summarize:
    {text}
    Summarize it below with section headings:
"""
)

In [None]:
# Shorter prompts
# map_prompt = ChatPromptTemplate.from_template("Summarize this text:\n\n{text}")
# combine_prompt = ChatPromptTemplate.from_template("Combine the summaries below:\n\n{text}")

In [63]:
map_chain = load_summarize_chain(
        llm=llm,
        chain_type="map_reduce",
        map_prompt=map_prompt,
        combine_prompt=combine_prompt 
    )

In [64]:
summary = map_chain.run(split_document)
summary

Token indices sequence length is longer than the specified maximum sequence length for this model (8156 > 1024). Running this sequence through the model will result in indexing errors


'Here is a summary of the speech with section headings:\n\n**Introduction**\nThe speech discusses the use of deep reinforcement learning to conserve energy in Internet of Things (IoT) tracking applications.\n\n**Importance of Energy Efficiency in IoT Tracking Applications**\nThe speech highlights the importance of using multiple sensors in tracking applications and reducing energy consumption in sensors to increase battery lifespan.\n\n**Challenges in IoT Tracking Applications**\nThe speech discusses the challenge of energy-efficient sensor selection in IoT tracking applications and proposes a Deep Reinforcement Learning (Deep RL) model using a Long Short-Term Memory network.\n\n**Applications of Deep Reinforcement Learning in IoT**\nThe speech discusses the benefits of using deep reinforcement learning in 5G sensor networks, including the best sensor selection and reduced energy consumption.\n\n**Proposed Method: LSTM-DQN-Epsilon-Greedy**\nThe speech proposes a novel Deep Reinforcemen

#### Refine

In [65]:
# Define the initial summary prompt
refine_summary = load_summarize_chain(
    llm=llm,
    chain_type="refine", 
)
output_summary = refine_summary.run(split_document)
output_summary

'After reviewing the new context, I found that it does not provide any new information that would refine the original summary. The new context appears to provide references to other articles and research papers related to TensorFlow and Keras, which are not relevant to refining the original summary.\n\nTherefore, I will return the original summary, which remains a comprehensive and accurate summary of the article:\n\nThe article proposes a novel Deep RL framework that predicts the suboptimal energy-efficient sensor to track the target in IoT tracking applications. The proposed system utilizes an LSTM-DQN-epsilon-greedy method, which combines an LSTM deep Q-network (LSTM-DQN) as Q-approximator with an epsilon-greedy action-selection strategy. The system also employs a data pre-processing approach, including mini-max normalization, to improve the performance of the LSTM Q-approximator. Additionally, the system uses a Kalman filter to localize the target position, which is particularly us