In [7]:


# Import Google GenerativeAI Python module
import google.generativeai as genai

# Define Gemini API key
genai.configure(api_key="AIzaSyBGOlsnd3I5J7-PrcxYOypZPb4wkAdrOxw")

# Create the model
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 40,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

# Specify model name and define system instruction
model = genai.GenerativeModel(
  model_name="gemini-1.5-pro",
  generation_config=generation_config,
  system_instruction="""You are a virtual restaurant and bar recommendation assistant. Your goal is to provide users with highly personalized recommendations based on their preferences and needs.

Here are some guidelines to follow:

- Understand the User's Intent: Carefully analyze the user's query.
- Leverage User Preferences: Utilize the user's past behavior.
- Consider Dietary Restrictions: Factor in dietary restrictions.
- Provide Relevant Information: Offer details like cuisine type, price, and ambiance.
- Handle Ambiguous Queries: Ask clarifying questions.
- Be Conversational and Engaging: Maintain a friendly tone."""
)

# Acceptable past chat for reference
chat_session = model.start_chat(
  history=[
    {"role": "user", "parts": ["Hello"]},
    {"role": "model", "parts": ["Hello there! I am a virtual agent for Welp!"]},
  ]
)

In [8]:
import pandas as pd
import pickle

# Load the Excel file
df = pd.read_excel("solution-case-study-activity-3/ailtk-case-apache-hop-output.xls")

# Combine relevant columns into a single document per row
# Assuming the columns are 'input' and 'output', adjust if necessary
corpus = df.apply(lambda row: f"{row['input']}. {row['output']}", axis=1).tolist()

# Save corpus to a pickle file
PICKLE_FILE = "corpus.pkl"

with open(PICKLE_FILE, "wb") as f:
    pickle.dump(corpus, f)

print(f"Corpus successfully saved to {PICKLE_FILE}")


import pickle
import os
from typing import List

class RAGOrchestrator:
    # Manages corpus loading, similarity calculations, and generating augmented responses using the LLM.

    def __init__(self, pickle_file: str, model):
        # Initializes the RAGOrchestrator.
        # Parameters:
        # - pickle_file (str): Path to the pickled corpus file.
        # - model: Preloaded LLM instance for generating responses.

        self.pickle_file = pickle_file
        self.model = model
        self.corpus = self._load_corpus()

    def _load_corpus(self) -> List[str]:
        # Loads the corpus from a pickle file.
        
        if not os.path.exists(self.pickle_file):
            raise FileNotFoundError(f"Pickle file '{self.pickle_file}' not found. Please generate it first.")
        
        with open(self.pickle_file, "rb") as f:
            print("Corpus loaded from pickle file.")
            return pickle.load(f)

    @staticmethod
    def _jaccard_similarity(query: str, document: str) -> float:
        # Calculates Jaccard similarity between a query and a document.
        
        query_tokens = set(query.lower().split())
        document_tokens = set(document.lower().split())
        
        intersection = query_tokens.intersection(document_tokens)
        union = query_tokens.union(document_tokens)

        return len(intersection) / len(union) if union else 0.0

    def _get_similar_documents(self, query: str, top_n: int = 5) -> List[str]:
        # Retrieves the top N most similar documents from the corpus.
        
        similarities = [self._jaccard_similarity(query, doc) for doc in self.corpus]
        top_indices = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)[:top_n]
        
        return [self.corpus[i] for i in top_indices]

    def generate_augmented_response(self, user_prompt: str) -> str:
        # Generates a response using the LLM with an injected prompt from RAG results.

        similar_docs = self._get_similar_documents(user_prompt)
        injected_prompt = f"{user_prompt} {' '.join(similar_docs)}"

        response = self.model.generate_content(injected_prompt)
        return response.text

# Example usage:
PICKLE_FILE = "corpus.pkl"
MODEL = genai.GenerativeModel(model_name="gemini-1.5-flash", generation_config=generation_config)
orchestrator = RAGOrchestrator(PICKLE_FILE, MODEL)
response = orchestrator.generate_augmented_response("Tell me about coffee preparation methods.")
print(response)

Corpus successfully saved to corpus.pkl
Corpus loaded from pickle file.
The provided text focuses on reviews of coffee shops, not on coffee preparation methods.  Therefore, I cannot answer your question about coffee preparation methods using this data.  The reviews mention caramel lattes and coffee, but don't describe *how* the coffee was prepared (e.g., espresso machine, pour over, French press, etc.).

