In [1]:
import os

os.environ["OPENAI_API_KEY"] = ""
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

In [4]:
# from langchain.llms import OpenAI # this code has been deprecated since recording.
from langchain.chat_models import ChatOpenAI # this is the replacement 
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, NonNegativeInt
from typing import List
from random import sample 
from langchain.document_loaders.csv_loader import CSVLoader

First, let's create a loader and load reviews from tv-reviews.csv into memory

In [5]:
# TODO: load reviews from tv-reviews.csv
loader = CSVLoader(file_path='./tv-reviews.csv')
data = loader.load()
print(data)

[Document(page_content="TV Name: Imagix Pro\nReview Title: Amazing Picture Quality\nReview Rating: 9\nReview Text: I recently purchased the Imagix Pro and I am blown away by its picture quality. The colors are vibrant and the images are crystal clear. It feels like I'm watching movies in a theater! The sound is also impressive, creating a truly immersive experience. Highly recommended!", metadata={'source': './tv-reviews.csv', 'row': 0}), Document(page_content="TV Name: Imagix Pro\nReview Title: Impressive Features\nReview Rating: 8\nReview Text: The Imagix Pro is packed with impressive features that enhance my viewing experience. The smart functionality allows me to easily stream my favorite shows and movies. The remote control is user-friendly and has convenient shortcuts. The slim design is sleek and fits perfectly in my living room. The only downside is that the sound could be better, but overall, I'm satisfied.", metadata={'source': './tv-reviews.csv', 'row': 1}), Document(page_co

Then, let's initialize our LLM

In [8]:
# TODO: initialize OpenAI object with your API key
from langchain.llms import OpenAI

import os
os.environ["OPENAI_API_KEY"] = ""
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"
model_name= 'gpt-3.5-turbo'
llm = OpenAI(model_name=model_name,temperature=0)




Now, let's setup our parser and a template  - 

**Note**  that since recording, the code to initialize the model has been updated to 

`llm = ChatOpenAI()`

In [9]:
class ReviewSentiment(BaseModel):
    positives: List[NonNegativeInt] = Field(description="index of a positive TV review, starting from 0")
    negatives: List[NonNegativeInt] = Field(description="index of a negative TV review, starting from 0")
        
parser = PydanticOutputParser(pydantic_object=ReviewSentiment)
# TODO: setup a template with partial and input variables
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"positives": {"title": "Positives", "description": "index of a positive TV review, starting from 0", "type": "array", "items": {"type": "integer", "minimum": 0}}, "negatives": {"title": "Negatives", "description": "index of a negative TV review, starting from 0", "type": "array", "items": {"type": "integer", "minimum": 0}}}, "required": ["positives", "negatives"]}
```


Pick 3 sample reviews to classify - LLMs have a limited context window they can work with. In later exercises, we'll see how to deal with that differently

In [18]:
# TODO: pick 3 random reviews and save them into reviews_to_classify variable
prompt = PromptTemplate(
         template="{question}\n{format_instructions}\nContext:{context}",
         input_variables =["question","context"],
         partial_variables = {"format_instructions":parser.get_format_instructions},)

question = """
            Classify TV reviews provided in the context into positive and negative.
            Only use the reviews provided in this context, do not make up new reviews or use any existing information you know bout these TVs.
            If there is no positive or negative reviews , output an emply JSON array

"""

reviews_to_classify = sample(data,3)
context ='\n'.join(review.page_content for review in reviews_to_classify)

query = prompt.format(context=context,question=question)
print(query)



            Classify TV reviews provided in the context into positive and negative.
            Only use the reviews provided in this context, do not make up new reviews or use any existing information you know bout these TVs.
            If there is no positive or negative reviews , output an emply JSON array


The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"positives": {"title": "Positives", "description": "index of a positive TV review, starting from 0", "type": "array", "items": {"type": "integer", "minimum": 0}}, "negatives": {"title": "Negatives", "description": "ind

# generate textual prompt from the prompt template
question = """
    Review TVs provided in the context. 
    Only use the reviews provided in this context, do not make up new reviews or use any existing information you know about these TVs. 
    If there are no positive or negative reviews, output an empty JSON array. 
"""
query = prompt.format(context = context, question = question)

Finally, let's send our query to LLM and use the parser we setup to parse an output into a Python object

**NOTE**: Since recording the code to feed the query to the llm has been updated to

`llm.predict(query)`

In [21]:
# TODO: query LLM, then parse output into the result variable

output = llm(query)
print(output)

result = parser.parse(output)
print(result)


print("Positives:\n" + "\n".join([reviews_to_classify[i].page_content for i in result.positives]))
print("Negatives:\n" + "\n".join([reviews_to_classify[i].page_content for i in result.negatives]))

{
    "positives": [0, 1, 2],
    "negatives": []
}
positives=[0, 1, 2] negatives=[]
Positives:
TV Name: Imagix Pro
Review Title: Easy Setup and Navigation
Review Rating: 9
Review Text: Setting up the Imagix Pro was a breeze. The instructions were clear and the TV guided me through the process smoothly. The interface is intuitive and easy to navigate. I love how seamless it is to switch between different apps and inputs. This TV has made my life so much simpler!
TV Name: VisionMax Ultra
Review Title: Easy Installation Process
Review Rating: 9
Review Text: Setting up the VisionMax Ultra was a breeze. The instructions provided were clear and concise, making the installation process quick and hassle-free. The TV also offers various mounting options, allowing me to choose the perfect placement for my viewing pleasure.
TV Name: VisionMax Ultra
Review Title: Sleek and Modern
Review Rating: 9
Review Text: The VisionMax Ultra fits perfectly into my modern living room decor. The slim design and