In [1]:
# Had to pip install jupyter first
!pip install python-dotenv

Collecting python-dotenv
  Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Using cached python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1


In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import os
import sys
from dotenv import load_dotenv
load_dotenv()

True

## Data

In [4]:
import pandas as pd

In [5]:
df_reviews = pd.read_csv("dummy_data_new.csv", index_col=0)

In [6]:
print(df_reviews.shape, df_reviews.columns)
df_reviews.head(3)

(109, 4) Index(['Product Name', 'Product Description', 'Review Text', 'Rating'], dtype='object')


Unnamed: 0,Product Name,Product Description,Review Text,Rating
0,iPhone 15,The Apple iPhone 15 redefines smartphone innov...,The iPhone 15 is a masterpiece! The sleek desi...,"{""durability"": 5, ""ease of use"": 5, ""pleasant ..."
1,MacBook Pro 2023,Experience the ultimate in computing power wit...,The MacBook Pro 2023 is a game-changer! The pe...,"{""durability"": 5, ""ease of use"": 5, ""pleasant ..."
2,Kindle Paperwhite,"Enjoy reading your favorite books anytime, any...",The Kindle Paperwhite is a must-have for book ...,"{""durability"": 5, ""ease of use"": 5, ""pleasant ..."


In [7]:
# Check out some descriptions to use as input
df_reviews["Product Name"].sample(1).iloc[0]

'Canon EOS Rebel T8i DSLR Camera'

## Review generation

### A) Langchain - OpenAI

Le Wagon's requirements file for using LangChain: https://wagon-public-datasets.s3.amazonaws.com/deep_learning_datasets/langchain_requirements.txt

In [8]:
!pip --quiet install langchain langchain-community langchain-openai chromadb

In [6]:
from langchain.chains import RetrievalQA
# from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_text_splitters import Language
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain.schema.document import Document

In [1]:
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    docs = [Document(page_content=x) for x in text_splitter.split_text(text)]
    return docs

def embed_texts_openai(texts, openai_api_key):
    print(f"Embedding {len(texts)} texts...", end=' ')
    # Instantiate an embedder
    embedder = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Use the embedder to populate a Chroma vector store with our texts.
    doc_search = Chroma.from_documents(texts, embedder)
    print("✅")
    return doc_search

def run_qa(doc_search, prompt, openai_api_key):
    print(f"Running QA...", end=' ')

    # Retrieval QA
    # - chain_type="stuff": the model 'stuffs' all our texts into a single prompt (sufficiently small)
    # - model: latest GPT-3.5-Turbo model.
    qa = RetrievalQA.from_chain_type(
        llm=ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=openai_api_key),
        chain_type="stuff",
        retriever=doc_search.as_retriever(search_kwargs={"k": 1})  # 1 doc to return max
    )

    answer = qa.invoke(prompt)
    print("✅")
    return answer["result"]

In [7]:
OPEN_API_KEY = os.environ.get('OPENAI_API_KEY')
PRODUCT_DESCRIPTION = 'Canon EOS Rebel T8i DSLR Camera'
RATINGS = {"Longevity": 1,  "Usage": 2, "Packaging": 5, "Design": 4}

# Get text chunks
chunks = get_text_chunks(PRODUCT_DESCRIPTION)

In [8]:
doc_search = embed_texts_openai(chunks, OPEN_API_KEY)
doc_search

Embedding 1 texts... ✅


<langchain_community.vectorstores.chroma.Chroma at 0x10eb1cca0>

In [10]:
def generate_review(product_description, ratings, doc_search, openai_api_key):
    prompt = f"""
    Product Description: {product_description}
    Ratings:
    {''.join([f"{criteria}: {rating}" for criteria, rating in ratings.items()])}
    Based on the description and ratings provided, please generate a review for this product. Do it without explicitly referring to the fact that it is based on a rating or provided information.

    """
    for criteria, rating in ratings.items():
        if rating >= 4:
            prompt += f"The {product_description} excels in {criteria.lower()} as it offers exceptional {criteria.lower()}.\n"
        elif rating >= 3:
            prompt += f"The {product_description} performs well in terms of {criteria.lower()} with {criteria.lower()} that meet expectations.\n"
        elif rating >= 2:
            prompt += f"The {product_description} has average {criteria.lower()}, providing satisfactory {criteria.lower()}.\n"
        else:
            prompt += f"The {product_description} could improve its {criteria.lower()} as the current {criteria.lower()} is below expectations.\n"

    return run_qa(doc_search, prompt, openai_api_key)


# Generate review
review = generate_review(PRODUCT_DESCRIPTION, RATINGS, doc_search, OPEN_API_KEY)

print(f"Product: {PRODUCT_DESCRIPTION}\n")
print(f"Generated Review:\n{review}")

Running QA... ✅
Product: Canon EOS Rebel T8i DSLR Camera

Generated Review:
The Canon EOS Rebel T8i DSLR Camera showcases a sleek design that complements its exceptional packaging, ensuring a delightful aesthetic experience. While its usage meets the standard expectations, there is room for improvement in terms of longevity to truly enhance the overall experience with this camera.
