In [2]:
import pinecone
from sentence_transformers import SentenceTransformer
from groq import Groq
from typing import List, Dict
import os
from dotenv import load_dotenv

In [10]:
class RSORagBot:
    def __init__(self, pinecone_api_key: str, pinecone_index_name: str, groq_api_key: str):
        # Initialize Pinecone
        self.pc = pinecone.Pinecone(api_key=pinecone_api_key)
        self.index = self.pc.Index(pinecone_index_name)
        
        # Initialize embedding model
        self.embed_model = SentenceTransformer('all-mpnet-base-v2')
        
        # Initialize Groq client
        self.groq_client = Groq(api_key=groq_api_key)
        
        # Define system prompt
        self.system_prompt = """You are a helpful assistant for University of Chicago students, 
        helping them find Registered Student Organizations (RSOs) that match their interests. 
        Use the provided RSO information to give detailed, relevant recommendations. 
        Always include the RSO's name, a brief description, and contact information when available."""

    def get_relevant_rsos(self, query: str, top_k: int = 5) -> List[Dict]:
        """
        Get relevant RSOs based on the query
        """
        # Create query embedding
        query_embedding = self.embed_model.encode(query).tolist()
        
        # Query Pinecone
        results = self.index.query(
            vector=query_embedding,
            top_k=top_k,
            include_metadata=True
        )
        
        return results.matches

    def format_context(self, relevant_rsos: List[Dict]) -> str:
        """
        Format RSO information into a context string for the LLM
        """
        context = "Here are some relevant RSOs:\n\n"
        
        for rso in relevant_rsos:
            metadata = rso.metadata
            context += f"Name: {metadata['name']}\n"
            context += f"Description: {metadata['description']}\n"
            context += f"Categories: {', '.join(metadata['categories'])}\n"
            context += f"Contact: {metadata['contact_email']}\n"
            if metadata['social_media_links']:
                context += f"Social Media: {', '.join(metadata['social_media_links'])}\n"
            if metadata['additional_info']:
                context += f"Additional Info: {', '.join(metadata['additional_info'])}\n"
            context += f"Website: {metadata['full_url']}\n\n"
        
        return context

    def generate_response(self, query: str) -> str:
        """
        Generate a response to the user's query
        """
        # Get relevant RSOs
        relevant_rsos = self.get_relevant_rsos(query)
        
        # Format context
        context = self.format_context(relevant_rsos)
        
        # Construct the prompt
        prompt = f"""Based on the following RSO information, please recommend appropriate RSOs for a student with this query: "{query}"

        {context}

        Please provide a natural, conversational response that:
        1. Highlights the most relevant RSOs for their interests
        2. Explains why each RSO might be a good fit
        3. Includes practical information like how to get involved
        """
        
        # Get response from Groq
        response = self.groq_client.chat.completions.create(
            messages=[
                {"role": "system", "content": self.system_prompt},
                {"role": "user", "content": prompt}
            ],
            model="mixtral-8x7b-32768",  # or your preferred Groq model
            temperature=0.7,
            max_tokens=1024
        )
        
        return response.choices[0].message.content

In [6]:
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")


In [12]:
bot = RSORagBot(
        pinecone_api_key=os.getenv("PINECONE_API_KEY"),
        pinecone_index_name="rso-chatbot",  # replace with your index name
        groq_api_key=os.getenv("GROQ_API_KEY")
    )
    
    

In [13]:
# Example query
query = "I'm interested in machine learning and artificial intelligence clubs"
response = bot.generate_response(query)
print(response)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Sure, I'd be happy to help you find some RSOs that match your interests in machine learning and artificial intelligence! Here are a few that I think you might find interesting:

1. **AI @ UChicago**: This RSO is specifically focused on artificial intelligence and offers a range of educational opportunities, hands-on projects, and community-building activities related to AI. They cover topics such as AI ethics, applications, and technologies, which sounds like it would be right up your alley. You can get involved by joining their mailing list or reaching out to their contact, Arjun Sohur, at [arjunsohur@uchicago.edu](mailto:arjunsohur@uchicago.edu). You can also check out their website at <https://ai-uchicago.com/> or follow them on social media.
2. **UChicago ACM**: While this RSO is not exclusively focused on AI or machine learning, they do have committees dedicated to these topics. UChicago ACM is the University of Chicago’s student chapter of the Association for Computing Machinery 