In [None]:
import os
from dotenv import load_dotenv


In [2]:
load_dotenv('./.env')


True

## LangChain primeiros passos

In [None]:
from langchain.llms import OpenAI
llm = OpenAI(model_name='gpt-4-1106-preview', temperature=0.7, max_tokens=512)
print (llm)

In [None]:
output = llm('explique como funções python funcionam')
print(output)

In [None]:
print(llm.get_num_tokens('explique como funções python funcionam'))

## ChatModels GPT-4-preview

In [7]:
from langchain.schema import(
    AIMessage,
    HumanMessage,
    SystemMessage
)

from langchain.chat_models import(
    ChatOpenAI
)

In [None]:
chat = ChatOpenAI(model_name='gpt-4-1106-preview', temperature=0.5, max_tokens=1024)
messages = [
    SystemMessage(content='Você é um especialista em Machine Learning que responde tudo em português'),
    HumanMessage(content='explique em um parágrafo o que é machine learning')
]

output= chat(messages)

In [None]:
print(output.content)

## Prompt Templates

In [12]:
from langchain.prompts import PromptTemplate

In [None]:
template = """ Você é um virologista experiente.
Escreva algumas frases sobre o seguinte {virus} e {idioma}."""

prompt = PromptTemplate(
    input_variables=['viruts', 'idioma'],
    template=template
)
print(prompt)

In [None]:
from langchain.llms import OpenAI
llm = OpenAI(model_name='gpt-4-1106-preview', temperature=0.7)
output = llm(prompt.format(virus='covid-19', idioma='português'))
print(output)

## Simple Chains

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

llm = OpenAI(model_name='gpt-4-1106-preview', temperature=0.7)
template = """ Você é um virologista experiente.
Escreva um resumo sobre o seguinte {virus} em {idioma}."""

prompt = PromptTemplate(
    input_variables=['virus', 'idioma'],
    template=template
)

chain = LLMChain(llm=llm, prompt=prompt)

output = chain.run({'virus':'HIV', 'idioma':'Inglês'})

In [None]:
output

## Chain Sequencial

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SimpleSequentialChain
from langchain.llms import OpenAI

llm1 = OpenAI(model_name='gpt-4-1106-preview', temperature=0.7, max_tokens=1024)

prompt1 = PromptTemplate(
    input_variables=['conceito'],
    template="""Você é um cientista e programador python.
    escreva uma função que implemente o {conceito}"""
)

chain1 = LLMChain(llm=llm1, prompt=prompt1)

# --- segunda chain --- 

llm2 = OpenAI(model_name='gpt-4-1106-preview', temperature=1.2)

prompt2 = PromptTemplate(
    input_variables=['function'],
    template=""" Data a função {function} python descreva como funciona da forma mais detalhada possível"""
)

chain2 = LLMChain(llm=llm2, prompt=prompt2)

overrall_chain = SimpleSequentialChain(chains=[chain1, chain2], verbose=True)
output = overrall_chain.run("regressão linear")

## LangChain Agents

In [None]:
# Exemplo de exponenciação

5.1 ** 7.3

In [6]:
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain_experimental.tools.python.tool import PythonAstREPLTool
from langchain import OpenAI

In [None]:
llm = OpenAI(temperature=0)
agent_executor = create_python_agent(llm=llm, 
                                     tool=PythonAstREPLTool(),
                                     verbose=True
)
agent_executor.run('calcule a raiz quadrada do fatorial de 20 e exiba com 4 casas decimais')

### Splitting e Embedding de texto

In [55]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

with open('./docs/CLT.txt') as f:
    clt = f.read()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=10,
    length_function=len
)

In [56]:
chunks = text_splitter.create_documents([clt])

In [None]:
chunks[0].page_content

In [None]:
len(chunks)

In [59]:
def embedding_cost(texts):
    import tiktoken
    enc = tiktoken.encoding_for_model('text-embedding-ada-002')
    total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
    print(f'Total de tokens: {total_tokens}')
    print(f'Custo de Embedding em USD: {total_tokens / 1000 * 0.0001:.6f}')

In [None]:
embedding_cost(chunks)

In [61]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [62]:
vector = embeddings.embed_query(chunks[0].page_content)

In [64]:
import os
import pinecone
from langchain.vectorstores import Pinecone




pc=pinecone.Pinecone(api_key=os.environ['PINECONE_API_KEY'], environment='gcp-starter')


In [None]:
indexes = pc.list_indexes()
for i in indexes.names():
    pc.delete_index(i)
    print('Feito!')

In [None]:
from pinecone import PodSpec
index_name = 'linuxtips'
environment = "gcp-starter"
spec = PodSpec(environment=environment)
if index_name not in pc.list_indexes():
    pc.create_index(name=index_name, dimension=1536, metric='cosine', spec=spec)
    print('Feito!')


In [67]:
vector_store = Pinecone.from_documents(chunks, embeddings, index_name=index_name)

## Conversando com os Dados (similarity search)

In [None]:
query = 'explique a remuneração das férias'
result = vector_store.similarity_search(query)
print(result)

In [None]:
for r in result:
    print(r.page_content)
    print('-'*50)

In [None]:
## Gerando respostas com LLM

In [None]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model='gpt-4-1106-preview', temperature=0.5)

retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})

chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)



In [None]:
query = 'como funciona o décimo terceiro salário?'
resp = chain.run(query)
print(resp)

In [None]:
query = 'como funciona a remuneração de férias?'
resp = chain.run(query)
print(resp)