In [12]:
import glob
from langchain.document_loaders import (UnstructuredWordDocumentLoader,
                                         UnstructuredPowerPointLoader,
                                         UnstructuredPDFLoader)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [11]:
!mkdir ./Downloads/projet_m2isd

In [13]:
for file in glob.glob('./*.pdf'):
    print(file)

./c386ee7f-f414-4e9e-84b3-1c9236008498.pdf


In [14]:
docs = [ PyPDFLoader(file).load() for file in glob.glob('./*.pdf')]

docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=100
)
doc_splits = text_splitter.split_documents(docs_list)

In [16]:
doc_splits = doc_splits[:10]

In [17]:
doc_splits

[Document(metadata={'source': './c386ee7f-f414-4e9e-84b3-1c9236008498.pdf', 'page': 0}, page_content='Summary\nBowyer Research ur ges shareholders to vote AGAINST  Proposal 7, on the 2024 proxy ballot of\nApple, Inc. (“Apple” or the “Company”) The “Resolved” clause of Proposal No. 7 states:\nRESOL VED: Shareholders request that Apple Inc. prepare a transparency report on the\ncompany’ s use of Artificial Intelligence (“AI”) in its business operations and disclose any\nethical guidelines that the company has adopted regarding the company’ s use of AI\ntechnology . This report shall be made publicly available to the company’ s shareholders\non the company’ s website, be prepared at a reasonable cost, and omit any information\nthat is proprietary , privileged, or violative of contractual obligations.\n \nThe supporting statement to this Proposal1, submitted by Segal Marco Advisors (the\n“Proponent”), contends that:\nIncreased use of artificial intelligence/machine learning models (further

In [11]:
import getpass
import os


def _set_env(key: str):
    if key not in os.environ:
        os.environ[key] = getpass.getpass(f"{key}:")


_set_env("OPENAI_API_KEY")

In [15]:
# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma-sec",
    embedding=OpenAIEmbeddings(),
)
retriever = vectorstore.as_retriever()

In [16]:
### Retrieval Grader


from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI


# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )


# LLM with function call
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader
question = "What was assessed in Proposal No. 7 ?"
docs = retriever.get_relevant_documents(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))

  docs = retriever.get_relevant_documents(question)


binary_score='yes'


In [17]:
### Generate

from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)


# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)



Proposal No. 7 assessed the need for Apple Inc. to prepare a transparency report on its use of Artificial Intelligence (AI) and disclose any ethical guidelines related to AI technology. The proposal argued that increased AI usage raises significant social policy issues, including concerns about discrimination and job automation. It called for preemptive corporate self-regulation to address potential conflicts arising from AI before they occur.


In [18]:
FINANCE_API_KEY='cte32t9r01qt478kvedgcte32t9r01qt478kvee0'

In [2]:
!pip install finnhub-python

Defaulting to user installation because normal site-packages is not writeable
Collecting finnhub-python
  Downloading finnhub_python-2.4.22-py3-none-any.whl (11 kB)
Installing collected packages: finnhub-python
Successfully installed finnhub-python-2.4.22


In [19]:
import finnhub

# Setup client
finnhub_client = finnhub.Client(api_key=FINANCE_API_KEY)

# Company News
# Need to use _from instead of from to avoid conflict
print(finnhub_client.company_news('AAPL', _from="2024-12-12", to="2024-12-13"))

[{'category': 'company', 'datetime': 1734072300, 'headline': 'American Century Focused Dynamic Growth Fund Q3 2024 Commentary', 'id': 131963716, 'image': 'https://static.seekingalpha.com/cdn/s3/uploads/getty_images/1358927461/image_1358927461.jpg?io=getty-c-w1536', 'related': 'AAPL', 'source': 'SeekingAlpha', 'summary': 'The broad U.S. stock market posted solid gains that saw a reversal of trends from the first half of the year. Click here to read the full commentary. ', 'url': 'https://finnhub.io/api/news?id=de1a9e3597b99763ff340611ce4c76539e433699021b7f08351ac79069fef323'}, {'category': 'company', 'datetime': 1734064506, 'headline': "Here's Why Apple Is Our Highest Conviction Short For 2025", 'id': 131962096, 'image': 'https://static.seekingalpha.com/cdn/s3/uploads/getty_images/1705316992/image_1705316992.jpg?io=getty-c-w1536', 'related': 'AAPL', 'source': 'SeekingAlpha', 'summary': "Apple's upsurge since June is driving a widening divergence from its stalling fundamental growth outl

In [69]:
!pip install --upgrade --quiet  yfinance

In [75]:
from langchain_core.tools import tool

# Define a tool to fetch the finance news
@tool
def get_finance_news(company:str, start_date:str, end_date:str) -> str:
    """A tool that fetches finance news from the Finnhub API."""
    # Your existing API call logic here
    finnhub_client = finnhub.Client(api_key=FINANCE_API_KEY)
    
    response = finnhub_client.company_news(company, _from=start_date, to=end_date)
    summary = response[0]['summary']
    return summary

get_finance_news.invoke({"company":'AAPL', "start_date":'2024-12-12', "end_date":'2024-12-13'})


'The broad U.S. stock market posted solid gains that saw a reversal of trends from the first half of the year. Click here to read the full commentary. '

In [53]:
@tool
def add(a: int, b: int) -> int:
    """Add two integers.

    Args:
        a: First integer
        b: Second integer
    """
    return a + b

-----

# Creation de l'agent simple (Agent + Tools)

In [84]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [85]:
from langchain_core.tools import tool


@tool
def get_finance_news(company:str, start_date:str, end_date:str) -> str:
    """A tool that fetches finance news from the Finnhub API."""
    # Your existing API call logic here
    finnhub_client = finnhub.Client(api_key=FINANCE_API_KEY)
    
    response = finnhub_client.company_news(company, _from=start_date, to=end_date)
    summary = response[0]['summary']
    return summary




tools = [get_finance_news]

llm_with_tools = llm.bind_tools(tools)

In [86]:
from langchain_core.messages import HumanMessage

query = "Use the provided tool in order to ouput the latest news, had in this question: What are the latest news of the company 'AAPL' between those exact values 2024-12-12 2024-12-13"

messages = [HumanMessage(query)]

ai_msg = llm_with_tools.invoke(messages)

print(ai_msg.tool_calls)

messages.append(ai_msg)

[{'name': 'get_finance_news', 'args': {'company': 'AAPL', 'start_date': '2024-12-12', 'end_date': '2024-12-13'}, 'id': 'call_vCXlwYrEQ1oH025P2t80Lkuk', 'type': 'tool_call'}]


In [87]:
for tool_call in ai_msg.tool_calls:
    selected_tool = {"get_finance_news":get_finance_news}[tool_call["name"].lower()]
    tool_msg = selected_tool.invoke(tool_call)
    messages.append(tool_msg)

messages

[HumanMessage(content="Use the provided tool in order to ouput the latest news, had in this question: What are the latest news of the company 'AAPL' between those exact values 2024-12-12 2024-12-13", additional_kwargs={}, response_metadata={}),
 AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_vCXlwYrEQ1oH025P2t80Lkuk', 'function': {'arguments': '{"company":"AAPL","start_date":"2024-12-12","end_date":"2024-12-13"}', 'name': 'get_finance_news'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 37, 'prompt_tokens': 110, 'total_tokens': 147, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_6fc10e10eb', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-614df7d6-42f8-4f11-8828-5dc0c8c601e1-0', tool

In [89]:
llm_with_tools.invoke(messages).content

'The latest news for Apple Inc. (AAPL) between December 12, 2024, and December 13, 2024, indicates that the broad U.S. stock market posted solid gains, reversing trends from the first half of the year.'