## Importing Libraries

In [1]:
import os
import json
from dotenv import load_dotenv
import chromadb
from sentence_transformers import SentenceTransformer
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


## Loading env variables

In [9]:
# Load environment variables
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

## Set up embedding using Sentence transformer

In [7]:
embedding_fn = SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")


## Set up chroma db

In [8]:
client = chromadb.Client()
collection = client.get_or_create_collection(name="rca_patterns", embedding_function=embedding_fn)

## Set up Gemini 1.5 Flash

In [10]:
genai.configure(api_key=GEMINI_API_KEY)
gemini_model = genai.GenerativeModel("gemini-1.5-flash")

## Load data.json into ChromaDB (if DB is empty)

In [11]:
if not collection.count():
    with open("data.json") as f:
        data = json.load(f)
        for item in data:
            collection.add(
                documents=[item["checklist"]],
                metadatas=[{"source": "json"}],
                ids=[item["pattern"]]
            )
    print("✅ Loaded initial data from data.json")
else:
    print("ℹ️ ChromaDB already initialized with data")

✅ Loaded initial data from data.json


  return forward_call(*args, **kwargs)


## Function to generate checklist using Gemini

In [17]:
def generate_checklist(pattern: str) -> str:
    prompt = f"""
You are an expert in root cause analysis for FMCG products.
Given the issue: "{pattern}", list 5 crisp subtasks to investigate the root causes. Only one point per line.
"""
    response = gemini_model.generate_content(prompt)
    return response.text.strip()

## RCA agent logic

In [18]:
def retrieve_or_generate_checklist(pattern: str):
    results = collection.query(query_texts=[pattern], n_results=1)
    retrieved = results["documents"][0]

    if retrieved:
        similarity_checklist = retrieved[0]
        print("📎 Retrieved similar pattern. Using it to augment the LLM prompt.")
        prompt = f"""
You are an FMCG root cause analysis expert.

A new pattern was observed: "{pattern}".

Here is a similar past issue and its checklist:
{similarity_checklist}

Now based on this and your expertise, generate a fresh checklist of just 5 crisp subtasks tailored to the new pattern. One point per line.
"""
        checklist = gemini_model.generate_content(prompt).text.strip()
    else:
        print("🚀 No similar pattern. Generating from scratch.")
        checklist = generate_checklist(pattern)

    # Store new pattern + checklist
    collection.add(
        documents=[checklist],
        metadatas=[{"source": "generated"}],
        ids=[pattern]
    )

    return checklist


## 🎯 Example test

In [None]:
pattern = "Increased sales on monday mornings"
checklist = retrieve_or_generate_checklist(pattern)
print("\n📋 Checklist:\n", checklist)

📎 Retrieved similar pattern. Using it to augment the LLM prompt.

📋 Checklist:
 1. Analyze POS data for Saturday and Sunday sales and stock levels to determine if weekend stockouts fueled Monday morning demand.

2. Investigate social media mentions and sentiment relating to the product over the preceding weekend to identify potential viral trends or influencer impact.

3. Compare Monday morning sales to other days of the week across different regions, controlling for potential regional variations in consumer habits.

4.  Assess retailer-reported data on Monday morning stock replenishment and promotional display activities.

5. Examine delivery schedules and logistics data for any anomalies that might explain increased product availability on Monday mornings.


In [20]:
pattern = "Less profit on icecreams during summer"
checklist = retrieve_or_generate_checklist(pattern)
print("\n📋 Checklist:\n", checklist)

📎 Retrieved similar pattern. Using it to augment the LLM prompt.

📋 Checklist:
 1. Analyze ice cream sales data by region, temperature, and promotional activity to identify areas and periods with unexpectedly low profit margins.

2. Investigate supply chain inefficiencies specific to ice cream distribution during peak summer temperatures (e.g., spoilage, increased transportation costs).

3. Assess competitor pricing and promotional strategies for ice cream during summer to determine market competitiveness.

4. Evaluate consumer purchasing behaviour during summer heatwaves, including potential shifts to alternative cooling options or reduced overall consumption.

5. Review summer-specific marketing and promotional campaigns for effectiveness in driving sales and maximizing profitability, considering messaging and channel reach.


In [22]:
pattern = "Protein bar sales spike in tier-2 cities"
checklist = retrieve_or_generate_checklist(pattern)
print("\n📋 Checklist:\n", checklist)

📎 Retrieved similar pattern. Using it to augment the LLM prompt.

📋 Checklist:
 1. Analyze POS data from Tier-2 cities to identify specific protein bar varieties driving the sales spike and correlate with potential local events or trends.

2. Investigate social media and local news in Tier-2 cities for mentions of protein bars, fitness challenges, or relevant health campaigns.

3. Compare sales data of protein bars in Tier-2 cities against Tier-1 and Tier-3 cities to determine the uniqueness of the trend and rule out national campaigns.

4. Assess competitor activity in Tier-2 cities, including pricing, promotions, and stock availability, to determine if competitive factors contributed to the spike.

5. Examine distribution and logistical data for Tier-2 cities to identify any anomalies in delivery schedules or stock replenishment that may have influenced availability.
