In [31]:
import os
from dotenv import load_dotenv

# .env dosyasını yükle
load_dotenv()

# .env dosyasındaki anahtarı al
api_key = os.getenv("OPENAI_API_KEY")




In [32]:
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document

In [33]:
embedding = OpenAIEmbeddings(openai_api_key=api_key)

In [34]:
docs = [
    Document(
        page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose",
        metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"},
    ),
    Document(
        page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
        metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2},
    ),
    Document(
        page_content="A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
        metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6},
    ),
    Document(
        page_content="A bunch of normal-sized women are supremely wholesome and some men pine after them",
        metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3},
    ),
    Document(
        page_content="Toys come alive and have a blast doing so",
        metadata={"year": 1995, "genre": "animated"},
    ),
    Document(
        page_content="A hacker discovers reality is a simulation and leads a rebellion against the machines controlling it.",
        metadata={"year": 1999, "director": "Lana Wachowski, Lilly Wachowski", "rating": 8.7, "genre": "science fiction"},
    ),
    Document(
        page_content="A young lion prince flees his kingdom only to learn the true meaning of responsibility and bravery.",
        metadata={"year": 1994, "rating": 8.5, "genre": "animated"},
    ),
    Document(
        page_content="Batman faces off against the Joker, a criminal mastermind who plunges Gotham into chaos.",
        metadata={"year": 2008, "director": "Christopher Nolan", "rating": 9.0, "genre": "action"},
    ),
    Document(
        page_content="A team of explorers travel through a wormhole in space in an attempt to ensure humanity's survival.",
        metadata={"year": 2014, "director": "Christopher Nolan", "rating": 8.6, "genre": "science fiction"},
    )
]

In [8]:
vectorstore = Chroma.from_documents(docs, embedding)

In [35]:
question1 = "Which 1994 animated movie has a rating of 8.5?"
question2 = "Which movie features Batman facing off against the Joker and who directed it?"
question3 = "What genre is the movie 'The Matrix' and who directed it?"
     

In [10]:
vectorstore.similarity_search(question1)

[Document(metadata={'genre': 'animated', 'year': 1995}, page_content='Toys come alive and have a blast doing so'),
 Document(metadata={'genre': 'animated', 'rating': 8.5, 'year': 1994}, page_content='A young lion prince flees his kingdom only to learn the true meaning of responsibility and bravery.'),
 Document(metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),
 Document(metadata={'director': 'Christopher Nolan', 'genre': 'science fiction', 'rating': 8.6, 'year': 2014}, page_content="A team of explorers travel through a wormhole in space in an attempt to ensure humanity's survival.")]

In [11]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [36]:
from langchain.chat_models import ChatOpenAI

from operator import itemgetter
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

In [17]:
llm = ChatOpenAI(temperature=0.7)

  llm = ChatOpenAI(temperature=0.7)


In [37]:
import textwrap
def wrap_text(text, width=90): #preserve_newlines
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

In [38]:

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

In [39]:

prompt = ChatPromptTemplate.from_template(template)

In [40]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [20]:
question1

'Which 1994 animated movie has a rating of 8.5?'

In [22]:
text_reply = chain.invoke(question1)
print(wrap_text(text_reply))

The 1994 animated movie with a rating of 8.5 is the one about a young lion prince fleeing
his kingdom and learning about responsibility and bravery.


## Self-query


In [1]:

from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings

from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import ChatOpenAI

In [2]:
docs = [
    Document(
        page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose",
        metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"},
    ),
    Document(
        page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
        metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2},
    ),
    Document(
        page_content="A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
        metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6},
    ),
    Document(
        page_content="A bunch of normal-sized women are supremely wholesome and some men pine after them",
        metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3},
    ),
    Document(
        page_content="Toys come alive and have a blast doing so",
        metadata={"year": 1995, "genre": "animated"},
    ),
    Document(
        page_content="A hacker discovers reality is a simulation and leads a rebellion against the machines controlling it.",
        metadata={"year": 1999, "director": "Lana Wachowski, Lilly Wachowski", "rating": 8.7, "genre": "science fiction"},
    ),
    Document(
        page_content="A young lion prince flees his kingdom only to learn the true meaning of responsibility and bravery.",
        metadata={"year": 1994, "rating": 8.5, "genre": "animated"},
    ),
    Document(
        page_content="Batman faces off against the Joker, a criminal mastermind who plunges Gotham into chaos.",
        metadata={"year": 2008, "director": "Christopher Nolan", "rating": 9.0, "genre": "action"},
    ),
    Document(
        page_content="A team of explorers travel through a wormhole in space in an attempt to ensure humanity's survival.",
        metadata={"year": 2014, "director": "Christopher Nolan", "rating": 8.6, "genre": "science fiction"},
    )
]

In [9]:
vectorstore = Chroma.from_documents(docs, embedding)

In [10]:
metadata_field_info = [
    AttributeInfo(
        name="genre",
        description="The genre of the movie. One of ['science fiction', 'comedy', 'drama', 'thriller', 'romance', 'action', 'animated']",
        type="string",
    ),
    AttributeInfo(
        name="year",
        description="The year the movie was released",
        type="integer",
    ),
    AttributeInfo(
        name="director",
        description="The name of the movie director",
        type="string",
    ),
    AttributeInfo(
        name="rating", description="A 1-10 rating for the movie", type="float"
    ),
]

In [11]:
document_content_description = "Brief summary of a movie"

In [12]:
from langchain.chains.query_constructor.base import (
    StructuredQueryOutputParser,
    get_query_constructor_prompt,
)

In [13]:
prompt = get_query_constructor_prompt(
    document_content_description,
    metadata_field_info,
)
prompt

FewShotPromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, examples=[{'i': 1, 'data_source': '```json\n{{\n    "content": "Lyrics of a song",\n    "attributes": {{\n        "artist": {{\n            "type": "string",\n            "description": "Name of the song artist"\n        }},\n        "length": {{\n            "type": "integer",\n            "description": "Length of the song in seconds"\n        }},\n        "genre": {{\n            "type": "string",\n            "description": "The song genre, one of "pop", "rock" or "rap""\n        }}\n    }}\n}}\n```', 'user_query': 'What are songs by Taylor Swift or Katy Perry about teenage romance under 3 minutes long in the dance pop genre', 'structured_request': '```json\n{{\n    "query": "teenager love",\n    "filter": "and(or(eq(\\"artist\\", \\"Taylor Swift\\"), eq(\\"artist\\", \\"Katy Perry\\")), lt(\\"length\\", 180), eq(\\"genre\\", \\"pop\\"))"\n}}\n```'}, {'i': 2, 'data_source': '```json\n{{\n    "con

In [14]:
output_parser = StructuredQueryOutputParser.from_components()

In [18]:
query_constructor = prompt | llm | output_parser

In [23]:

query_constructor.invoke(
    {
        "query": "What are some sci-fi movies from the 90's directed by Luc Besson about taxi drivers"
    }
)

StructuredQuery(query='taxi drivers', filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='science fiction'), Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.GTE: 'gte'>, attribute='year', value=1990), Comparison(comparator=<Comparator.LT: 'lt'>, attribute='year', value=2000)]), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='director', value='Luc Besson')]), limit=None)

In [24]:

from langchain.retrievers.self_query.chroma import ChromaTranslator

retriever = SelfQueryRetriever(
    query_constructor=query_constructor,
    vectorstore=vectorstore,
    structured_query_translator=ChromaTranslator(),
)

In [25]:
retriever.invoke(
    "What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated"
)

[Document(metadata={'genre': 'animated', 'year': 1995}, page_content='Toys come alive and have a blast doing so'),
 Document(metadata={'genre': 'animated', 'rating': 8.5, 'year': 1994}, page_content='A young lion prince flees his kingdom only to learn the true meaning of responsibility and bravery.')]

In [26]:
from operator import itemgetter
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

In [27]:


template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

In [41]:

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

text_reply = chain.invoke("Tell me about the movie which have rating more than 7.")

print(wrap_text(text_reply))

The movies with a rating of more than 7 are:
1. Batman, directed by Christopher Nolan, with a rating of 9.0
2. A young lion prince, an animated film from 1994, with a rating of 8.5
3. Interstellar, a science fiction film from 2014, with a rating of 8.6


In [43]:
query_constructor.invoke("Tell me about the movie which have rating more than 7.")

StructuredQuery(query=' ', filter=Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=7), limit=None)