# RAG - Retrieval Augmented Generation

In [185]:
import pandas as pd
import numpy as np 
from flask import Flask
import openai
from sklearn.metrics.pairwise import cosine_similarity
import chromadb
import warnings
import configparser

warnings.filterwarnings("ignore")


In [198]:
# Loading OpenAI API key from configuration file
config = configparser.ConfigParser()
config.read('C:\David\ML\GenerativeAI2\config.ini') #Path to your configuration file
OPENAI_KEY =  config.get('OPENAI_API', 'OPENAI_API')

#Set your OpenAI API key here
openai.api_key = OPENAI_KEY


## Let's overview the data we want to leverage

In [230]:
# Load your tweet data
df = pd.read_csv("C:\David\ML\GenerativeAI2/data/twitter_data_clean_sample.csv")

df.head()


Unnamed: 0,customer_tweet,company_tweet,company
0,Ordered this around 2am Friday morning and it ...,@383517 I am very happy to hear this Pablo:) I...,AmazonHelp
1,"@AmazonHelp what does ""ships in 1-3 weeks"" act...","@274096 If your item will ship in 1-3 weeks, t...",AmazonHelp
2,@115821 // Email from Representative not corre...,@528375 I'm sorry you haven't received your pa...,AmazonHelp
3,je l’ai déjà l’application amazon jdevrais êtr...,@792999 3/3 Ensuite décochez à nouveau les ca...,AmazonHelp
4,"I must say @115830, a package left under a doo...",@776873 I apologize for how your delivery was ...,AmazonHelp


## ChromaDb - vector storage

### First run - create a collection and compute embeddings

In [227]:
client = chromadb.PersistentClient(path="C://David//ML//GenerativeAI2//ChromaDb")

#instance embeddings
openai_ef = chromadb.utils.embedding_functions.OpenAIEmbeddingFunction(
                api_key=OPENAI_KEY,
                model_name="text-embedding-3-small"
            )

#create a new collection
collection = client.create_collection(name="tweets_chromadb_sample", metadata={"hnsw:space": "cosine"})

##save embbedings in the collection
collection.add(
    embeddings=openai_ef(df["customer_tweet"]),
    documents=[tweet for tweet in df.customer_tweet],
    metadatas=[{"source": "sample", "company": "{0}".format(row["company"]), "company_tweet": "{0}".format(row["company_tweet"])} for index, row in df.iterrows()],
    ids=["{}".format(k) for k in np.arange(len(df.customer_tweet))]
)

### Second run - import your existing collection 

In [231]:
client = chromadb.PersistentClient(path="C://David//ML//GenerativeAI2//ChromaDb")

collection = client.get_collection("tweets_chromadb_sample")


In [213]:
collection.peek(1)

{'ids': ['0'],
 'embeddings': [[0.01468683686107397,
   -0.029017794877290726,
   -0.05299853906035423,
   -0.021284278482198715,
   0.0013961391523480415,
   -0.02970217727124691,
   0.01025204174220562,
   0.034136973321437836,
   -0.01501534041017294,
   -0.01808137074112892,
   -0.0028795371763408184,
   0.024035494774580002,
   -0.04571671411395073,
   -3.611718784668483e-05,
   0.00661455187946558,
   0.02406287007033825,
   0.00799358170479536,
   -0.010696889832615852,
   -0.07342049479484558,
   0.053491294384002686,
   -0.0019761528819799423,
   -0.038544394075870514,
   0.014768962748348713,
   -0.04908387362957001,
   -0.04453957825899124,
   -0.00857530627399683,
   -0.002066833432763815,
   -0.04927550256252289,
   0.0005898516974411905,
   -0.04218530282378197,
   0.0014183815801516175,
   -0.038434892892837524,
   0.041774675250053406,
   -0.04798886179924011,
   -0.029017794877290726,
   0.023405862972140312,
   0.007110728416591883,
   0.035943739116191864,
   -0.0379

## Find closest tweets

In [217]:
tweet = 'My account is locked, please help'

#['AmazonHelp', 'AppleSupport', 'SpotifyCares', 'Uber_Support']
company_name = 'AmazonHelp'
k_closest = 3

closest_tweets= collection.query(
    query_embeddings=[get_embedding(tweet, model="text-embedding-3-small")],
    n_results=k_closest,
    where={"company": company_name},
)

closest_tweets


{'ids': [['95', '96', '30']],
 'distances': [[0.5181955695152283, 0.5578674077987671, 0.6125984191894531]],
 'metadatas': [[{'company': 'AmazonHelp',
    'company_tweet': "@795684 We're here to help if we can! Are you receiving an error message when you try to sign in? ^TR",
    'source': 'sample'},
   {'company': 'AmazonHelp',
    'company_tweet': '@465795 Hey, sorry to hear that. What is the amount and the note on the account next to it? ^AT',
    'source': 'sample'},
   {'company': 'AmazonHelp',
    'company_tweet': "@215479 I understand your concern. Please report this to our support team using the email or chat option. We'll help. ^ST",
    'source': 'sample'}]],
 'embeddings': None,
 'documents': [["@117634 My Kindle won't connect to my Amazon account - can you help?",
   "@115830 is there really a technical problem where you can't look into my account. Iv had money taken that makes no sense",
   '@115850 hi, I need to talk about a defective product I received but my no is blocke

## Integration of the closest tweets into our prompt

### Vanilla answer

In [218]:
instruction =  f"""\
You are a chatbot answering customer's tweet. You are working for a company called {company_name}. 
You are provided with an example of a similar interaction between a customer and an agent. Reply to the customer's tweet in the same tone, structure and style than the provided example.
Tweet:
"{tweet}"
"""

messages = [
    {"role": "user", "content": instruction}
]

response = openai.chat.completions.create(
    model= "gpt-3.5-turbo",
    messages=messages,
    seed=42,
    temperature=0.7)

generated_text = response.choices[0].message.content


print(f"Instruction:\n\n{instruction} \n Response: \n{generated_text}")


Instruction:

You are a chatbot answering customer's tweet. You are working for a company called AmazonHelp. 
You are provided with an example of a similar interaction between a customer and an agent. Reply to the customer's tweet in the same tone, structure and style than the provided example.
Tweet:
"My account is locked, please help"
 
 Response: 
Customer Support: Hi there! We're sorry to hear that your account is locked. We'd be happy to help you with that. Please send us a DM with your account information so we can assist you further. Thank you!


### with the RAG

In [232]:
instruction =  f"""\
You are a chatbot answering customer's tweet. You are working for a company called {company_name}. 
You are provided with an example of a similar interaction between a customer and an agent. Reply to the customer's tweet in the same tone, structure and style than the provided example.

"""

for k in np.arange(k_closest):
    instruction = instruction + f"""\

    #####
    Example {k +1}:
    Customer : "{closest_tweets["documents"][0][k]}"
    Agent : "{closest_tweets["metadatas"][0][k]["company_tweet"]}"
"""

instruction = instruction + f"""\
Tweet:
"{tweet}"
"""

messages = [
{"role": "system",    "content": (f"Answer to the tweet in a professional manner using closest tweets found")    },
{"role": "user", "content": tweet},
{"role": "assistant", "content": instruction}
    ]

messages = [
    {"role": "user", "content": instruction}
]


response = openai.chat.completions.create(
    model= "gpt-3.5-turbo",
    messages=messages,
    seed=42,
    temperature=0.7)

generated_text = response.choices[0].message.content


print(f"Instruction:\n\n{instruction} \n Response: \n{generated_text}")


'[0m' Instruction:

You are a chatbot answering customer's tweet. You are working for a company called AmazonHelp. 
You are provided with an example of a similar interaction between a customer and an agent. Reply to the customer's tweet in the same tone, structure and style than the provided example.


    #####
    Example 1:
    Customer : "@117634 My Kindle won't connect to my Amazon account - can you help?"
    Agent : "@795684 We're here to help if we can! Are you receiving an error message when you try to sign in? ^TR"

    #####
    Example 2:
    Customer : "@115830 is there really a technical problem where you can't look into my account. Iv had money taken that makes no sense"
    Agent : "@465795 Hey, sorry to hear that. What is the amount and the note on the account next to it? ^AT"

    #####
    Example 3:
    Customer : "@115850 hi, I need to talk about a defective product I received but my no is blocked. So could u please give me a call ASAP??! https://t.co/VpvcPcBWpu"
