In [9]:
import langchain 
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain

import os 

In [21]:
load_dotenv()

GROQ_API_KEY = os.getenv('GROQ_API_KEY')

## Stuff Type 
- metode summarization yang dimana langsung mengirimkan documents ke prompt dan melakukan summary
- cocok untuk file document yang kecil (<4000 tokens atau file dengan ukuran <200kb(plain text) dan <500kb(pdf))
- tidak untuk document dengan ukuran yang besar (>4000 tokens)

### Load Document

In [14]:
#load document 
docs = PyPDFLoader('data\A Study on Backpropagation in Artificial Neural Networks.pdf').load()

In [15]:
docs

[Document(metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2020-09-13T21:23:55+08:00', 'title': 'Journal Paper', 'author': 'GV', 'moddate': '2020-09-13T21:23:55+08:00', 'source': 'data\\A Study on Backpropagation in Artificial Neural Networks.pdf', 'total_pages': 7, 'page': 0, 'page_label': '1'}, page_content='Asia-Pacific Journal of Neural Networks and Its Applications \nVol.4, No.1 (2020), pp.21-28 \nhttp://dx.doi.org/10.21742/ajnnia.2020.4.1.03 \n \n \nPrint ISSN: 2207-8738, eISSN: 2207-8746 AJNNIA \nCopyright ⓒ 2020 Global Vision Press (GV Press) \nA Study on Backpropagation in Artificial Neural Networks \n \n \nCh Sekhar1 and P Sai Meghana2 \nDepartment of CSE, VIIT(A), AP, India University, India \n1sekhar1203@gmail.com, 2palavalasasaimeghana@gmail.com \nAbstract \nInnovation assumes essential job nowadays in human life to limit the manual work. \nExecution and exactness with innovation will be high. The Backpropagation neural \nfr

In [16]:
docs = PyPDFLoader('data\A Study on Backpropagation in Artificial Neural Networks.pdf').load_and_split()

In [17]:
docs

[Document(metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2020-09-13T21:23:55+08:00', 'title': 'Journal Paper', 'author': 'GV', 'moddate': '2020-09-13T21:23:55+08:00', 'source': 'data\\A Study on Backpropagation in Artificial Neural Networks.pdf', 'total_pages': 7, 'page': 0, 'page_label': '1'}, page_content='Asia-Pacific Journal of Neural Networks and Its Applications \nVol.4, No.1 (2020), pp.21-28 \nhttp://dx.doi.org/10.21742/ajnnia.2020.4.1.03 \n \n \nPrint ISSN: 2207-8738, eISSN: 2207-8746 AJNNIA \nCopyright ⓒ 2020 Global Vision Press (GV Press) \nA Study on Backpropagation in Artificial Neural Networks \n \n \nCh Sekhar1 and P Sai Meghana2 \nDepartment of CSE, VIIT(A), AP, India University, India \n1sekhar1203@gmail.com, 2palavalasasaimeghana@gmail.com \nAbstract \nInnovation assumes essential job nowadays in human life to limit the manual work. \nExecution and exactness with innovation will be high. The Backpropagation neural \nfr

In [19]:
len(docs)

7

### Prompt

In [6]:
tempalate = "write a concise and short summary of the following {text}"

prompt = PromptTemplate(input_variables=['text'], template=tempalate)

In [7]:
prompt

PromptTemplate(input_variables=['text'], input_types={}, partial_variables={}, template='write a concise and short summary of the following {text}')

### Chain LLM

In [8]:
from langchain_groq import ChatGroq

In [11]:
llm = ChatGroq(api_key=GROQ_API_KEY, model='gemma2-9b-it', temperature=0)

In [12]:
chain = load_summarize_chain(llm,
                             chain_type= 'stuff', 
                             prompt=prompt,
                             verbose=True)

### Output

In [18]:
summary = chain.run(docs)
summary

  summary = chain.run(docs)




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mwrite a concise and short summary of the following Asia-Pacific Journal of Neural Networks and Its Applications 
Vol.4, No.1 (2020), pp.21-28 
http://dx.doi.org/10.21742/ajnnia.2020.4.1.03 
 
 
Print ISSN: 2207-8738, eISSN: 2207-8746 AJNNIA 
Copyright ⓒ 2020 Global Vision Press (GV Press) 
A Study on Backpropagation in Artificial Neural Networks 
 
 
Ch Sekhar1 and P Sai Meghana2 
Department of CSE, VIIT(A), AP, India University, India 
1sekhar1203@gmail.com, 2palavalasasaimeghana@gmail.com 
Abstract 
Innovation assumes essential job nowadays in human life to limit the manual work. 
Execution and exactness with innovation will be high. The Backpropagation neural 
framework is multilayered, feedforward neural framewo rk and is by a full edge the most 
extensively utilized. It is moreover seen as one of the least demanding and most wide systems 
used for 

"This article provides a concise overview of the backpropagation algorithm used in artificial neural networks (ANNs). \n\n**Here's a breakdown of the key points:**\n\n* **Introduction:** Backpropagation is a fundamental algorithm for training multi-layered neural networks. It works by adjusting the weights of connections between neurons based on the error between the network's output and the desired output.\n* **History:** The article briefly traces the history of backpropagation, highlighting key milestones and contributors.\n* **Algorithm:** The article explains the step-by-step process of backpropagation, including:\n    * Calculating the error between the network's output and the target output.\n    * Propagating the error back through the network, layer by layer.\n    * Adjusting the weights of connections based on the calculated error gradients.\n* **Applications:** The article mentions several applications of backpropagation, such as:\n    * **Classification:** Categorizing data

# Map Reduce Type
- metode summarization yang membutuhkan splitting/ chunking pada documents lalu tiap chunk/potongan document akan dikirimkan ke prompt lalu di summarize (map), lalu tiap tiap summary akan digabungkan dengan prompt untuk menjadi satu kesatuan summary (reduce)
- cocok untuk data yang besar (>4000 token, ukuran file >200kb(plain text) dan >500kb(pdf))
- lebih lambat dan mahal karena membutuhkan beberapa query ke LLM

In [20]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

### Load Documents

In [23]:
docs = PyPDFLoader('data\A Study on Backpropagation in Artificial Neural Networks.pdf').load()

In [28]:
docs

[Document(metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2020-09-13T21:23:55+08:00', 'title': 'Journal Paper', 'author': 'GV', 'moddate': '2020-09-13T21:23:55+08:00', 'source': 'data\\A Study on Backpropagation in Artificial Neural Networks.pdf', 'total_pages': 7, 'page': 0, 'page_label': '1'}, page_content='Asia-Pacific Journal of Neural Networks and Its Applications \nVol.4, No.1 (2020), pp.21-28 \nhttp://dx.doi.org/10.21742/ajnnia.2020.4.1.03 \n \n \nPrint ISSN: 2207-8738, eISSN: 2207-8746 AJNNIA \nCopyright ⓒ 2020 Global Vision Press (GV Press) \nA Study on Backpropagation in Artificial Neural Networks \n \n \nCh Sekhar1 and P Sai Meghana2 \nDepartment of CSE, VIIT(A), AP, India University, India \n1sekhar1203@gmail.com, 2palavalasasaimeghana@gmail.com \nAbstract \nInnovation assumes essential job nowadays in human life to limit the manual work. \nExecution and exactness with innovation will be high. The Backpropagation neural \nfr

### Splitting Docs

In [35]:
splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
splitted_docs = splitter.split_documents(docs)

In [36]:
len(splitted_docs)

10

### Prompt

In [37]:
#prompt for each chunk documents
map_template = """
please summarize the documents {text}
Summary : 
"""

map_prompt_template = PromptTemplate(input_variables=['text'],
                                     template=map_template)

In [39]:
reduce_template = """
provide the final summary of the entire documents with these important points. Add a title, start the precise summary with an introduction and provide summary in number points for the speech {text}
"""

reduce_prompt_template = PromptTemplate(input_variables=['text'], template=reduce_template)


### Chain LLM map-reduce type

In [41]:
summary_chain = load_summarize_chain(
    llm=llm,
    chain_type="map_reduce",
    map_prompt = map_prompt_template,
    combine_prompt = reduce_prompt_template,
    verbose=True
)

### Output

In [43]:
#run llm with splitted documents
summary = summary_chain.run(splitted_docs)
print(summary)



[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
please summarize the documents Asia-Pacific Journal of Neural Networks and Its Applications 
Vol.4, No.1 (2020), pp.21-28 
http://dx.doi.org/10.21742/ajnnia.2020.4.1.03 
 
 
Print ISSN: 2207-8738, eISSN: 2207-8746 AJNNIA 
Copyright ⓒ 2020 Global Vision Press (GV Press) 
A Study on Backpropagation in Artificial Neural Networks 
 
 
Ch Sekhar1 and P Sai Meghana2 
Department of CSE, VIIT(A), AP, India University, India 
1sekhar1203@gmail.com, 2palavalasasaimeghana@gmail.com 
Abstract 
Innovation assumes essential job nowadays in human life to limit the manual work. 
Execution and exactness with innovation will be high. The Backpropagation neural 
framework is multilayered, feedforward neural framewo rk and is by a full edge the most 
extensively utilized. It is moreover seen as one of the least demanding and most wide systems 
used for managed plannin

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.
Token indices sequence length is longer than the specified maximum sequence length for this model (3183 > 1024). Running this sequence through the model will result in indexing errors




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
provide the final summary of the entire documents with these important points. Add a title, start the precise summary with an introduction and provide summary in number points for the speech This article provides an overview of the backpropagation algorithm, a fundamental technique used to train artificial neural networks (ANNs). 

**Key Points:**

* **Backpropagation's Importance:** Backpropagation is crucial for training multilayered feedforward neural networks (MLP), which are widely used in various applications like image recognition, speech processing, and prediction.
* **How it Works:** Backpropagation adjusts the weights of connections between neurons in the network based on the difference between the network's output and the desired output (the error). This iterative process aims to minimize the error and improve the network's accuracy.
* **Adv

### Refine Chain Type

- Menggunakan hasil summary dari chunk pertama dan chunk document ke prompt dan llm untuk summarization 
- hasil summary dari chunk document sebelumnya di terus ke prompt untuk document selanjutnya 
- summary diperbaiki secara bertahap dengan menambahkan bagian bagian(chunk) selanjutnya

- keuntungan : lebih kontekstual, karena menggunakan summary sebelumnya
- boros token karena panjang token terus nambah


In [44]:
chain = load_summarize_chain(
    llm=llm,
    chain_type='refine',
    verbose=True
)

In [45]:
summary = chain.run(splitted_docs)
print(summary)



[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"Asia-Pacific Journal of Neural Networks and Its Applications 
Vol.4, No.1 (2020), pp.21-28 
http://dx.doi.org/10.21742/ajnnia.2020.4.1.03 
 
 
Print ISSN: 2207-8738, eISSN: 2207-8746 AJNNIA 
Copyright ⓒ 2020 Global Vision Press (GV Press) 
A Study on Backpropagation in Artificial Neural Networks 
 
 
Ch Sekhar1 and P Sai Meghana2 
Department of CSE, VIIT(A), AP, India University, India 
1sekhar1203@gmail.com, 2palavalasasaimeghana@gmail.com 
Abstract 
Innovation assumes essential job nowadays in human life to limit the manual work. 
Execution and exactness with innovation will be high. The Backpropagation neural 
framework is multilayered, feedforward neural framewo rk and is by a full edge the most 
extensively utilized. It is moreover seen as one of the least demanding and most wide systems 
used for manag

## How to check total input token ??

In [49]:
import tiktoken

In [None]:
#tokenizer untuk document yang diload document dengan langchain 
encoding = tiktoken.get_encoding('cl100k_base')

In [51]:
total_tokens = sum(len(encoding.encode(doc.page_content)) for doc in docs)

In [52]:
total_tokens

3518