In [1]:
import pinecone
from sentence_transformers import SentenceTransformer
from groq import Groq
from typing import List, Dict
import os
from dotenv import load_dotenv

  from tqdm.autonotebook import tqdm


In [37]:
class RSORagBot:
    def __init__(self, pinecone_api_key: str, pinecone_index_name: str, groq_api_key: str):
        # Initialize Pinecone
        self.pc = pinecone.Pinecone(api_key=pinecone_api_key)
        self.index = self.pc.Index(pinecone_index_name)
        
        # Initialize embedding model
        self.embed_model = SentenceTransformer('all-mpnet-base-v2')
        
        # Initialize Groq client
        self.groq_client = Groq(api_key=groq_api_key)
        
        # Define system prompt
        self.system_prompt = """You are a knowledgeable and helpful assistant for University of Chicago students, 
        specializing in Registered Student Organizations (RSOs). Your role is to help students learn about and 
        engage with RSOs by:

        - Providing accurate, detailed information about specific RSOs when asked
        - Recommending relevant RSOs based on students' interests and preferences when asked
        - Explaining RSO activities, events, and opportunities

        Focus on the specific information or guidance the student is seeking."""

    def get_relevant_rsos(self, query: str, top_k: int = 3) -> List[Dict]:
        """
        Get relevant RSOs based on the query
        """
        # Create query embedding
        query_embedding = self.embed_model.encode(query).tolist()
        
        # Query Pinecone
        results = self.index.query(
            vector=query_embedding,
            top_k=top_k,
            include_metadata=True
        )
        
        return results.matches

    def format_context(self, relevant_rsos: List[Dict]) -> str:
            """
            Format RSO information into a context string for the LLM
            """
            context = "Here is the RSO information:\n\n"
            
            for rso in relevant_rsos:
                metadata = rso.metadata
                context += f"Name: {metadata['name']}\n"
                context += f"Description: {metadata['description']}\n"
                context += f"Categories: {', '.join(metadata['categories'])}\n"
                
                # Only include contact info if it exists and isn't 'None'
                if metadata['contact_email'] and metadata['contact_email'].lower() != 'none':
                    context += f"Contact: {metadata['contact_email']}\n"
                
                if metadata['social_media_links']:
                    context += f"Social Media: {', '.join(metadata['social_media_links'])}\n"
                
                if metadata['additional_info']:
                    context += f"Additional Info: {', '.join(metadata['additional_info'])}\n"
                
                context += f"Website: {metadata['full_url']}\n\n"
            
            return context

    def generate_response(self, query: str) -> str:
        """
        Generate a response based on the query and relevant RSO information
        """
        # Get relevant RSOs

        query2 = query.lower().replace('club', 'rso')
        print(f"query1: {query}, query2: {query2}")

        relevant_rsos = self.get_relevant_rsos(query2)
        
        # Format context
        context = self.format_context(relevant_rsos)
        
        # Construct universal prompt that works for both information and recommendations
        prompt = f"""Here is a student's question about UChicago RSOs: "{query}"

        Below are the 3 top entries when we queried our RSO database with the query:



        {context}

        Those are the 3 top entries. If only one RSO seems relevant, only mention that RSO. 
        If the question is not related to the 3 top entries, ignore the 3 top entries. 
        Now provide a natural, conversational response that:
        1. Directly addresses their specific question or need
        2. Includes only relevant details and context from the RSO information
        """
        
        # Get response from Groq
        response = self.groq_client.chat.completions.create(
            messages=[
                {"role": "system", "content": self.system_prompt},
                {"role": "user", "content": prompt}
            ],
            model="mixtral-8x7b-32768",
            temperature=0.7,
            max_tokens=1024
        )
        
        return response.choices[0].message.content

In [3]:
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")


In [38]:
bot = RSORagBot(
        pinecone_api_key=os.getenv("PINECONE_API_KEY"),
        pinecone_index_name="rso-chatbot",  # replace with your index name
        groq_api_key=os.getenv("GROQ_API_KEY")
    )
    
    

In [46]:
# Example query

query = "Help me find marketing opportunities for undergrads"
response = bot.generate_response(query)
print(response)

query1: Help me find marketing opportunities for undergrads, query2: help me find marketing opportunities for undergrads
It sounds like you're looking for opportunities to get involved in marketing as an undergraduate student. I have a couple of RSOs in mind that might interest you!

First, there's Pareto Solutions, a student-run consulting group that focuses on marketing engagements with local clients. As a member, you'll have the chance to develop skills in data analysis, learn business frameworks, conduct primary research, and work on marketing projects that make a positive impact on the community. Pareto's meetings are held on Wednesdays from 8-9 PM in Stuart 101, and you can contact them at uc.paretosolutions@gmail.com or join their listhost at pareto_solutions-request@lists.uchicago.edu.

Another RSO to consider is The Mark, which is dedicated to helping brands shine through marketing and advertising consulting. The Mark's meetings take place on Mondays at 6 PM in Harper 104, and

In [33]:
bot2 = RSORagBot(
        pinecone_api_key=os.getenv("PINECONE_API_KEY"),
        pinecone_index_name="rso-chatbot",  # replace with your index name
        groq_api_key=os.getenv("GROQ_API_KEY")
    )
    

query = "Help me find quantitative finance RSOs"

relevant_rsos = bot.get_relevant_rsos(query)
        
        # Format context
context = bot.format_context(relevant_rsos)
print(context)

Here is the RSO information:

Name: Maroon Capital
Description: Maroon Capital is UChicago’s oldest and largest quantitative finance RSO with an aim to provide education, career and networking resources, and talent development for our members. Since 2010, we've established and expanded our own industry-relevant curriculum covering topics in quantitative and fundamental finance, statistics, market making, and coding. If you're a prospective member, please visit our website (mc.uchicago.edu) for more information / questions.
Categories: Quantitative Trading, Academic Interest, Finance, Investment
Contact: jsagrolikar@uchicago.edu
Social Media: http://mc.uchicago.edu/, https://www.facebook.com/marooncapital
Additional Info: RSO Advisor: Peer Advisors, Advising Model Categorization:: Orange Group, Year Created:: 2013, Regular Meetings (Day/Time/Location):: Changes quarterly, RSO Listhost:: themarooncapital@lists.uchicago.edu, This organization is affiliated with a parent/national/internati