In [1]:
# import API KEY

import os                           # operating system library
from dotenv import load_dotenv      # load environment variables  


load_dotenv()


OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [2]:
from langchain_openai.chat_models import ChatOpenAI   # LangChain connection to OpenAI

model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-4-turbo")

response = model.invoke("What is the Suez Canal?")

response.content

"The Suez Canal is a man-made waterway in Egypt, connecting the Mediterranean Sea to the Red Sea. This canal is a critical shortcut for international maritime trade, significantly reducing the travel distance between the North Atlantic and northern Indian oceans by allowing ships to bypass the lengthy journey around the southern tip of Africa via the Cape of Good Hope.\n\nOpened in 1869, the Suez Canal was engineered by the French developer Ferdinand de Lesseps but is now owned and maintained by the Suez Canal Authority of Egypt. The canal extends approximately 120 miles (193 kilometers) between Port Said in the north and Suez in the south, with no locks because the sea level is the same at both ends, which facilitates the transit of ships.\n\nThe canal's strategic and economic importance cannot be understated, as it serves as a major facilitator of world trade. It allows for the more efficient and faster transportation of oil, natural gas, and various goods between Europe, Asia, and o

In [3]:
os.listdir("../pdfs")

['2024_BRIEFING.pdf']

In [4]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

In [5]:
loader = PyPDFDirectoryLoader("../pdfs/")

pages = loader.load()

In [6]:
len(pages)

8

In [7]:
from langchain_openai.embeddings import OpenAIEmbeddings


vectorizer = OpenAIEmbeddings()

In [8]:
from langchain_community.vectorstores import Chroma

chroma_db = Chroma.from_documents(pages, vectorizer, persist_directory="../chroma_db")

In [9]:
retriever = chroma_db.as_retriever(search_type="mmr", search_kwargs={"k": 2, "lambda_mult": 0.25})

In [10]:
from langchain.prompts import ChatPromptTemplate

In [11]:
template = """
            Given the context below and the question, 
            please generate a header, and 5 bullet points, Summarize each bullet point in 40 words.
            Also fetch sub_titles and numbers to describe the information.

            Context: {context}

            Question: {question}
            """


prompt = ChatPromptTemplate.from_template(template)

In [12]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

In [13]:
from langchain_core.runnables import RunnablePassthrough

In [14]:
query = "What are the endnotes of the briefing?"

In [15]:
in_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | model | parser


response = in_chain.invoke(query)

Number of requested results 20 is greater than number of elements in index 16, updating n_results = 16


'Header: Key Endnotes from the Briefing on Recent Threats in the Red Sea\n\n1. **Author and Graphics Information**\n   - The briefing was authored by Angelos Delivorias with graphics by Samy Chahri.\n\n2. **Document Identification**\n   - The document is identified as PE 760.390, published in March 2024.\n\n3. **Publisher Information**\n   - Published by the European Parliamentary Research Service (EPRS).\n\n4. **Geopolitical Focus**\n   - The briefing discusses the economic impact of threats in the Red Sea on the region and the EU.\n\n5. **Document Structure**\n   - The briefing includes sections on introduction, impact of the attacks, affected countries, mitigating factors, and EU positions on submarine communications.'

In [16]:
response.split('\n')

['Header: Key Endnotes from the Briefing on Recent Threats in the Red Sea',
 '',
 '1. **Author and Graphics Information**',
 '   - The briefing was authored by Angelos Delivorias with graphics by Samy Chahri.',
 '',
 '2. **Document Identification**',
 '   - The document is identified as PE 760.390, published in March 2024.',
 '',
 '3. **Publisher Information**',
 '   - Published by the European Parliamentary Research Service (EPRS).',
 '',
 '4. **Geopolitical Focus**',
 '   - The briefing discusses the economic impact of threats in the Red Sea on the region and the EU.',
 '',
 '5. **Document Structure**',
 '   - The briefing includes sections on introduction, impact of the attacks, affected countries, mitigating factors, and EU positions on submarine communications.']

In [17]:
from langchain_openai import OpenAI

input_model = OpenAI(temperature=0, max_tokens=1024)

In [24]:
template = """
            We have provided  information below.
            Given this information, please generate python-pptx code for a single 
            slide with this information set header and subtitle as title and  
            bullet points.
            
            Put the title on top of the slide and center all text to slide size.
            
            Separate the bullet points into separate texts with line separator.
            Set font size to minimum for fixing text to cells. Save the file in ../pptx folder

            Information: {context}
            """


prompt = ChatPromptTemplate.from_template(template)

In [25]:
out_chain = prompt | input_model | parser

In [26]:
output = out_chain.invoke({"context": response})

In [27]:
output.split('\n')

['',
 '# Import necessary libraries',
 'from pptx import Presentation',
 'from pptx.util import Inches',
 '',
 '# Create a new presentation',
 'prs = Presentation()',
 '',
 '# Add a slide with title and subtitle',
 'slide = prs.slides.add_slide(prs.slide_layouts[0])',
 'title = slide.shapes.title',
 'subtitle = slide.placeholders[1]',
 'title.text = "Key Endnotes from the Briefing on Recent Threats in the Red Sea"',
 'subtitle.text = "Header"',
 '',
 '# Center all text to slide size',
 'title.text_frame.paragraphs[0].alignment = 1',
 'subtitle.text_frame.paragraphs[0].alignment = 1',
 '',
 '# Add bullet points',
 'bullet_points = [',
 '    "1. **Author and Graphics Information**",',
 '    "- The briefing was authored by Angelos Delivorias with graphics by Samy Chahri.",',
 '    "2. **Document Identification**",',
 '    "- The document is identified as PE 760.390, published in March 2024.",',
 '    "3. **Publisher Information**",',
 '    "- Published by the European Parliamentary Resear

In [28]:
exec('print(2+2)')

4


In [29]:
exec(output)