In [2]:
import json
import pandas as pd
import numpy as np
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough

from dotenv import load_dotenv
load_dotenv()

True

In [3]:
products_df = pd.read_csv("data/products_data.csv")

In [6]:
rand_sample_disruptions_df = products_df.sample(20)

In [10]:
rng = np.random.default_rng()
rand_sample_disruptions_df['delay_in_days'] = rng.integers(1, high=4, size=len(rand_sample_disruptions_df))

In [23]:
# Define the prompt template
prompt = PromptTemplate(
    input_variables=["num_samples"],
    template="""
    Generate a synthetic email about supplier: {supplier} having a delay
    in shipment of {qty} number of product {product} by {num_days} days.
    The email should be addressed to: "Nature’s Basket Mart"
    The email should start with: "Dear Nature's Basket Mart employee, \n"
    The email should have a believable reason as to why shipment was delayed
    Output as a JSON with "email" as key
    """
)

# Initialize LLM
llm = ChatOpenAI(model="gpt-4")

# Create a chain using RunnableSequence
chain = (
    RunnablePassthrough()  # Pass input directly
    | prompt               # Format prompt
    | llm                  # Pass to LLM
)

# Run the chain
num_samples = 20

emails_lst = []
for i, row in rand_sample_disruptions_df.iterrows():
    result = chain.invoke({"supplier": row['supplier'],
                           "qty": row['daily_demand'] * row['lead_time'],
                           "product": row['product_name'],
                           "num_days": row['delay_in_days']})
    emails_lst.append(result)

In [31]:
emails_parsed_lst = []

for email in emails_lst:
    try:
        parsed_email = json.loads(email.content, strict=False)
        emails_parsed_lst.append(parsed_email)
    except Exception as e:
        print(email)
        print(e)

In [33]:
# Parse and save JSON output

with open("data/delay_emails.json", "w") as f:
    json.dump(emails_parsed_lst, f, indent=4)  # Save as formatted JSON

print("JSON output saved to delay_emails.json")

JSON output saved to delay_emails.json
