In [1]:
import os
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader
from langchain.llms import OpenAI
from langchain import PromptTemplate
import time
from fpdf import FPDF

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
llm = OpenAI(temperature=0.5)

In [5]:
map_custom_prompt='''
Your job is to create a clear and brief summary of the given text. The summary should convey meaningful information.
Text:`{text}`
'''
map_prompt_template = PromptTemplate(
    input_variables = ['text'],
    template=map_custom_prompt
)
combine_custom_prompt='''
Generate a summary with the following guidelines:

- Start with a clear title representing the main subject.
- Provide an introductory paragraph to give an overview.
- Use bullet points for key points.
- End with a summary paragraph recapping the main ideas.

Keep the response concise, with a maximum length of 20 pages. Ensure that your response follows the above instructions.

Text:`{text}`
'''
combine_prompt_template = PromptTemplate(
    template=combine_custom_prompt, 
    input_variables=['text']
)

In [6]:
def pdf_splitter(pdf_file_path):
    start_time = time.time()
    loader = PyPDFLoader(pdf_file_path)
    docs = loader.load_and_split()
    print("There are: ",len(docs))
    end_time = time.time()
    print("Execution Time: ", str(round(end_time - start_time,2))+" seconds")
    return docs
    
pdf_file_path = "crime-and-punishment.pdf"
print("PDF Splitter")
docs_to_summarize = pdf_splitter(pdf_file_path)

PDF Splitter
There are:  767
Execution Time:  4.88 seconds


In [7]:
def summarize_pdf(docs):
    start_time = time.time()
    summary_chain = load_summarize_chain(
        llm=llm,
        chain_type='map_reduce',
        map_prompt=map_prompt_template,
        combine_prompt=combine_prompt_template,
        verbose=False
    )
    summary = summary_chain.run(docs)
    end_time = time.time()
    print("Execution Time: ", str(round(end_time - start_time,2))+" seconds")
    return summary

print("Book Summary")
summarize = summarize_pdf(docs_to_summarize[0:])
print(f"\n{summarize}\n")

Book Summary
Execution Time:  300.69 seconds


Title: Redemption Through Reflection in Crime and Punishment 

Introduction: Crime and Punishment by Fyodor Dostoevsky follows the story of Rodion Romanovitch Raskolnikov, a university student who commits a crime and is later sentenced to prison. After reflecting on his life and choices, he embarks on a journey of redemption and spiritual transformation. 

Bullet Points: 
- Raskolnikov confesses to killing an old pawnbroker woman and her sister with an axe and robbing them, and is sentenced to eight years of penal servitude. 
- He is visited by Sonia, who brings him relief from the outside world and provides comfort.
- He reflects on Sonia's plight and the three potential outcomes of her life. 
- While in prison, Raskolnikov struggles with his own inner turmoil and his belief in God.
- His mother dies, and he is overwhelmed and ashamed by his situation.
- He is eventually released from prison and begins a new life of regeneration and initi

In [53]:
text = pdf_file_path
text= text.split("/")
text1 = text[-1].split('.pdf')[0]
text1 = "Summary_"+text1+".pdf"
text[-1] = text1
output_file = "/".join(text)

# Create PDF
class PDF(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'Summary', 0, 1, 'C')

    def chapter_title(self, title):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(10)

    def chapter_body(self, body):
        self.set_font('Arial', '', 12)
        
        # Use utf-8 encoding to handle special characters
        body = body.encode('latin-1', 'replace').decode('latin-1')
        
        self.multi_cell(0, 10, body)
        self.ln()


pdf = PDF()
pdf.add_page()

# Split the text into paragraphs and add to the PDF
paragraphs = [line.strip() for line in summarize.replace('\n\n','\n').strip().split('\n')]
for paragraph in paragraphs:
    pdf.chapter_body(paragraph)

# Save the PDF

pdf.output(output_file)

print(f"PDF file '{output_file}' has been created.")


PDF file 'Summary_crime-and-punishment.pdf' has been created.
