<a href="https://colab.research.google.com/github/gauravm999/LLMs/blob/main/RAG_vs_Agentic_RAG_CPG_Sales.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#RAG for CPG Sales

In [1]:
# Google Colab Code for RAG

# Step 1: Install necessary libraries
!pip install transformers faiss-cpu pandas

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0.post1


In [2]:
# Step 2: Import necessary modules
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import faiss

In [3]:
# Step 3: Load sample dataset
data = {
    'Region': ['North', 'South', 'East', 'West'],
    'Sales Data': ['5000 units', '3000 units', '8000 units', '2000 units'],
    'Customer Feedback': ['Positive reviews, some issues with stock.',
                          'Moderate reviews, mentions of better alternatives.',
                          'High demand, positive reviews.',
                          'Low sales, customers demand discounts.'],
    'Competitor Activity': ['Launched a discount.',
                            'No competitor action.',
                            'Running heavy ads.',
                            'Aggressive discounts.']
}
df = pd.DataFrame(data)
print(df)

  Region  Sales Data                                  Customer Feedback  \
0  North  5000 units          Positive reviews, some issues with stock.   
1  South  3000 units  Moderate reviews, mentions of better alternati...   
2   East  8000 units                     High demand, positive reviews.   
3   West  2000 units             Low sales, customers demand discounts.   

     Competitor Activity  
0   Launched a discount.  
1  No competitor action.  
2     Running heavy ads.  
3  Aggressive discounts.  


In [7]:
# Step 4: Initialize a retrieval-augmented generation model (e.g., T5)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
rag = pipeline('text2text-generation', model=model, tokenizer=tokenizer)

# Step 5: Build the FAISS index (for retrieval)
import numpy as np # import the numpy module
def build_faiss_index(df):
    embeddings = [] # create a list to store all the embeddings
    for i, row in df.iterrows():
        text = f"Region: {row['Region']}, Sales: {row['Sales Data']}, Feedback: {row['Customer Feedback']}, Competitor Activity: {row['Competitor Activity']}"
        input_ids = tokenizer.encode(text, return_tensors='pt')
        embedding = model.get_encoder()(input_ids).last_hidden_state.mean(dim=1).detach().numpy()
        embeddings.append(embedding) # append the embedding to the list
    embeddings = np.concatenate(embeddings, axis=0) # concatenate all embeddings into a single array
    index = faiss.IndexFlatL2(embeddings.shape[1])  # Use the correct dimension for the index
    index.add(embeddings)
    return index

index = build_faiss_index(df)

# Step 6: Retrieve information
def retrieve(query):
    query_embedding = model.get_encoder()(tokenizer.encode(query, return_tensors='pt')).last_hidden_state.mean(dim=1).detach().numpy()
    D, I = index.search(query_embedding, 3)  # Top 3 closest regions
    for i in I[0]:
        print(f"Retrieved Info: {df.iloc[i]}")



In [12]:
# Step 7: Test the RAG pipeline
query = "Why did the East region perform better than others?"
retrieve(query)

Retrieved Info: Region                                           East
Sales Data                                 8000 units
Customer Feedback      High demand, positive reviews.
Competitor Activity                Running heavy ads.
Name: 2, dtype: object
Retrieved Info: Region                                                   West
Sales Data                                         2000 units
Customer Feedback      Low sales, customers demand discounts.
Competitor Activity                     Aggressive discounts.
Name: 3, dtype: object
Retrieved Info: Region                                                     North
Sales Data                                            5000 units
Customer Feedback      Positive reviews, some issues with stock.
Competitor Activity                         Launched a discount.
Name: 0, dtype: object


In [13]:
# Use the model to generate a report based on retrieved data
response = rag(f"Why did the East region perform better than others? {df.iloc[2]['Sales Data']} {df.iloc[2]['Customer Feedback']} {df.iloc[2]['Competitor Activity']}")
print("RAG Response: ", response[0]['generated_text'])

RAG Response:  East region perform better than others


# Agentic RAG for CPG Sales

In [14]:
# Google Colab Code for Agentic RAG

# Step 1: Install necessary libraries
!pip install transformers faiss-cpu pandas



In [15]:
# Step 2: Import necessary modules
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import faiss

# Step 3: Load sample dataset (same as above)
data = {
    'Region': ['North', 'South', 'East', 'West'],
    'Sales Data': ['5000 units', '3000 units', '8000 units', '2000 units'],
    'Customer Feedback': ['Positive reviews, some issues with stock.',
                          'Moderate reviews, mentions of better alternatives.',
                          'High demand, positive reviews.',
                          'Low sales, customers demand discounts.'],
    'Competitor Activity': ['Launched a discount.',
                            'No competitor action.',
                            'Running heavy ads.',
                            'Aggressive discounts.']
}
df = pd.DataFrame(data)
print(df)

  Region  Sales Data                                  Customer Feedback  \
0  North  5000 units          Positive reviews, some issues with stock.   
1  South  3000 units  Moderate reviews, mentions of better alternati...   
2   East  8000 units                     High demand, positive reviews.   
3   West  2000 units             Low sales, customers demand discounts.   

     Competitor Activity  
0   Launched a discount.  
1  No competitor action.  
2     Running heavy ads.  
3  Aggressive discounts.  


In [17]:
# Step 4: Initialize RAG and FAISS as in the RAG example
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
rag = pipeline('text2text-generation', model=model, tokenizer=tokenizer)

def build_faiss_index(df):
    embeddings = [] # create a list to store the embeddings
    for i, row in df.iterrows():
        text = f"Region: {row['Region']}, Sales: {row['Sales Data']}, Feedback: {row['Customer Feedback']}, Competitor Activity: {row['Competitor Activity']}"
        input_ids = tokenizer.encode(text, return_tensors='pt')
        embedding = model.get_encoder()(input_ids).last_hidden_state.mean(dim=1).detach().numpy()
        embeddings.append(embedding) # append the embedding to the list

    # get the dimensionality of the embeddings
    dim = embeddings[0].shape[1]
    # create the Faiss index with the correct dimensionality
    index = faiss.IndexFlatL2(dim)
    # add the embeddings to the index
    index.add(np.array(embeddings).reshape(len(embeddings), dim))
    return index

index = build_faiss_index(df)

# Step 5: Agentic decision-making function
def agentic_decision(query):
    query_embedding = model.get_encoder()(tokenizer.encode(query, return_tensors='pt')).last_hidden_state.mean(dim=1).detach().numpy()
    D, I = index.search(query_embedding, 1)  # Find the closest region

    closest_region = df.iloc[I[0][0]]
    print(f"Agentic Decision-Making for Region: {closest_region['Region']}")

    # Autonomous actions based on retrieved information (e.g., adjust discount strategy)
    if closest_region['Sales Data'] == '2000 units':
        print("Low sales detected. Action: Increase discount to 20% for the West region.")
    else:
        print(f"Region {closest_region['Region']} is performing well. No immediate action required.")

# Step 6: Test the Agentic RAG model
query = "How should we adjust promotion strategies based on sales performance?"
agentic_decision(query)



Agentic Decision-Making for Region: West
Low sales detected. Action: Increase discount to 20% for the West region.
