In [None]:
# Import necessary libraries and modules
from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
import pandas as pd
from tqdm import tqdm
import random

In [None]:
from langchain_community.llms import Ollama

llm = Ollama(model="mistral")

In [None]:
import os
from langchain_openai import ChatOpenAI

os.environ['OPENAI_API_KEY'] = ''

llm = ChatOpenAI(model="gpt-4-turbo-preview")

In [None]:
# Type of shop. Including "a/an". Example: "an online book store"
TYPE_OF_SHOP = "an online shop for all class of sport fishing products"

# Number of categories to generate
NUMBER_OF_CATEGORIES = 15

# Number of vendors to generate
NUMBER_OF_VENDORS = 15

# Number of products to generate
NUMBER_OF_PRODUCTS = 200

# Maximum number of attempts for the LLM to generate each product
MAX_ATTEMPTS = 5

In [None]:
csv_parser = CommaSeparatedListOutputParser()

helper_system_prompt = f"""
As a synthetic product generator specialized for {TYPE_OF_SHOP}, your role encompasses the creation of categories and vendors that fit this specific shop's theme. 
Your responses should:

- Strictly conform to the instructions provided in both system and user prompts.
- Focus solely on generating content that is directly requested, without introducing extraneous information.
- Reflect the unique context and offerings of {TYPE_OF_SHOP}, ensuring relevance and alignment with its theme.

Remember, your objective is to generate data that is coherent, contextually appropriate, and within the bounds of the prompts. 
Precision and relevance are key.
"""

helper_prompt = PromptTemplate(
    template="{system_prompt}\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={
        "format_instructions": csv_parser.get_format_instructions(),
        "system_prompt": helper_system_prompt,
    },
)

helper_chain = helper_prompt | llm | csv_parser

In [None]:
# Prepare the helper prompt
helper_system_prompt = f"""
As a synthetic product generator specialized for {TYPE_OF_SHOP}, your role encompasses the creation of categories and vendors that fit this specific shop's theme. 
Your responses should:

- Strictly conform to the instructions provided in both system and user prompts.
- Focus solely on generating content that is directly requested, without introducing extraneous information.
- Reflect the unique context and offerings of {TYPE_OF_SHOP}, ensuring relevance and alignment with its theme.

Remember, your objective is to generate data that is coherent, contextually appropriate, and within the bounds of the prompts. 
Precision and relevance are key.
"""

csv_parser = CommaSeparatedListOutputParser()

helper_prompt = PromptTemplate(
    template="{system_prompt}\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={
        "format_instructions": csv_parser.get_format_instructions(),
        "system_prompt": helper_system_prompt,
    },
)

helper_chain = helper_prompt | llm | csv_parser

# Prepare the categories prompt
categories_prompt = f"""
Imagine you're organizing the inventory for {TYPE_OF_SHOP}, a shop that needs a well-defined set of categories to classify its wide range of products. 
Your task is to create {NUMBER_OF_CATEGORIES} category names that reflect the diversity and specificity of products typically found in such a store. 
These categories should:

- Be reflective of common divisions seen in retail stores, ensuring they are recognizable and straightforward for customers.
- Encompass a comprehensive scope of the shop's inventory, covering all potential product types it might offer.
- Maintain simplicity and clarity, with each name immediately conveying the kind of products it includes.

Very important: List only each category name, focusing on the names alone without additional explanations. 
Do not include new lines, break lines or characters like "()[]/\:"
"""

categories = helper_chain.invoke({"query": categories_prompt})

# Prepare the vendors prompt
vendors_prompt = f"""
Craft {NUMBER_OF_VENDORS} unique and inventive vendor names that would be a perfect match for {TYPE_OF_SHOP}. 
Each name should:

- Evoke a sense of professionalism and alignment with the shop's range of products.
- Span a variety of styles, suggesting backgrounds from handcrafted origins to modern, technology-driven enterprises.
- Remain entirely fictional, carefully avoiding any similarities to actual brands or vendors in the market.

Focus exclusively on generating the vendor names, without additional details or descriptions.
Do not include new lines, break lines or characters like "()[]/\:"
"""

vendors = helper_chain.invoke({"query": vendors_prompt})

In [None]:
# Generate a list of product combinations
product_combinations = [{
    'vendor': random.choice(vendors),
    'category': random.choice(categories)
} for _ in range(NUMBER_OF_PRODUCTS)]

In [None]:
product_system_prompt = f"""
As a synthetic product generator specialized for {TYPE_OF_SHOP}, your role encompasses the creation of products that fit this specific shop's theme. 
You will be given a category and an vendor name for generating the product.
Your responses should:

- Strictly conform to the instructions provided in both system and user prompts.
- Focus solely on generating content that is directly requested, without introducing extraneous information.
- Reflect the unique context and offerings of {TYPE_OF_SHOP}, ensuring relevance and alignment with its theme.

Remember, your objective is to generate data that is coherent, contextually appropriate, and within the bounds of the prompts. 
Precision and relevance are key.
"""

json_parser = JsonOutputParser(pydantic_object=Product)

product_prompt = PromptTemplate(
    template="{system_prompt}\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={
        "format_instructions": json_parser.get_format_instructions(),
        "system_prompt": product_system_prompt,
    },
)

product_chain = product_prompt | llm | json_parser

In [None]:
class Product(BaseModel):
    title: str = Field(description="Title of the product")
    description: str = Field(description="Description of the product")
    price: str = Field(description="Price of the product in USD")
        
def generate_product_details(vendor, category, chain):
    # Construct the prompt for generating product details
    prompt = f"""
Given a vendor "{vendor}", generate a product for the {category} category.

Include:

- A product title that captures the essence of the item. Do not include the vendor name on the title. Kept it short and concise.
- A comprehensive description that showcases the product's key features and benefits. If it makes sese for the type of product being generated, include relevant details such as weight, size, color, or other pertinent characteristics to give a clear picture of the product. The length of the description is at your discretion, but it should be thorough enough to inform and entice potential customers.
- A price in USD, considering the product's value and market positioning.
Make sure to always generate a title, a description and a price.

Guidelines:
- The product must always include a title, description, and price.
- Ensure the title and description are specifically tailored to the category.
- You may choose whether to incorporate the vendor name and category within the description, based on what best suits the product narrative and customer engagement strategy.
- In your answer, output only the title, description and price.
"""
    for attempt in range(MAX_ATTEMPTS):
        # Invoke the chain (LLM) with the prompt
        response = chain.invoke({"query": prompt})

        # Assuming 'response' is a dict with 'title', 'description', and 'price'
        # Check if all required parts are present
        if all(key in response for key in ['title', 'description', 'price']):
            # If all parts are present, return the response
            return {
                'vendor': vendor,
                'category': category,
                'title': response['title'],
                'description': response['description'],
                'price': response['price']
            }
        else:
            # If not all parts are present, print a message and try again
            print(f"Attempt {attempt + 1}: Missing one or more parts. Retrying...")
            print(response)
    
    # If the loop exits without returning, it means all attempts failed
    print("Failed to generate complete product details after maximum attempts.")
    return None

In [None]:
product_details_list = [] 

for product in tqdm(product_combinations, desc='Generating Product Details'):
    vendor = product['vendor']
    category = product['category']
    
    product_details = generate_product_details(vendor, category, product_chain)
    product_details_list.append(product_details)

In [None]:
# Convert the list of dictionaries into a DataFrame
df = pd.DataFrame(product_details_list)

# Display the first few rows of the DataFrame to verify
print(df.head())

In [None]:
df.to_excel('products.xlsx')

In [None]:
df.to_csv('products.csv')