In [None]:
# !pip install dspy-ai cloudpickle matplotlib

In [1]:
# Import relevant packages
import dspy # pip install dspy-ai
from dspy.signatures.signature import signature_to_template
# Setting up LLM models
PROJECT_ID = "wiq-gen-ai-rd-dev"
MODEL_ID = "gemini-1.5-flash-001"

flash = dspy.GoogleVertexAI(
    model=MODEL_ID, 
    max_tokens=1000,
    temperature=0.05, 
    project=PROJECT_ID,
)

# Note: lm is language model and rm is retrieval model
dspy.settings.configure(lm=flash, rm=flash)

# Fixed themes for the prompt
themes = """
    Technical Learning Resources,
    Clarity of Requirements,
    Time Commitment / Workload,
    Showcase / Presentation Format,
    Communication and Information Sharing,
    Team Formation and Dynamics,
    Relevance of Training Content,
    Application to Business / Use Cases,
    Accessibility and Inclusion,
    Incentives and Recognition,
    Post-Hackathon Follow-up,
    Support from Leadership / Mentors,
"""

  from .autonotebook import tqdm as notebook_tqdm


Import Training and Testing Dataset

In [2]:
import pandas as pd
data = pd.read_csv("./golden_dataset.csv")
dataset = data[["comments", "answer"]]
dataset = dataset[
    dataset.comments.notna() & dataset.answer.notna()
]
# dataset = dataset.head(5)

# Create training and testing dataset
from dspy.datasets import DataLoader

dl = DataLoader()
hackathon_dataset = dl.from_pandas(dataset, fields=("comments", "answer"), input_keys=['comments'])
splits = dl.train_test_split(hackathon_dataset, train_size=0.8) # `dataset` is a List of dspy.Example
train_dataset = splits['train']
test_dataset = splits['test']
# train_dataset

In [8]:
# Setting up signature for DSPy
class GenerateTheme(dspy.Signature):
    """Classify user feedback from hackathon to one single theme"""
    comments = dspy.InputField(desc="user feedback from hackathon")
    themes = dspy.OutputField(desc=f"only pick one of the following choices: {themes}")


class FollowupQuery(dspy.Signature):
    """Generate a query which is conducive to classifying the comment"""

    context = dspy.InputField(desc=f"contains relevant facts to classify comments from hackathon to {themes}")
    comments = dspy.InputField(desc="user feedback from hackathon")
    search_query = dspy.OutputField(desc="Judge if the context is adequate to classify user comments, if not adequate or if it is blank, generate a search query that would help you classify the comments")
    

In [9]:
## Vanilla
class Vanilla(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.Predict(GenerateTheme)
    
    def forward(self, comments):
        answer = self.generate_answer(comments=comments)
        theme = answer.themes.split("Themes: ",1)[1].strip()
        return dspy.Prediction(answer=theme)
    
vanilla = Vanilla()

## COT
class COT(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.ChainOfThought(GenerateTheme)
    
    def forward(self, comments):
        answer = self.generate_answer(comments=comments)
        return dspy.Prediction(answer=answer.themes)
    
cot = COT()

## ReAct
class ReAct(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.ReAct(GenerateTheme)
    
    def forward(self, comments):
        answer = self.generate_answer(comments=comments)
        return dspy.Prediction(answer=answer.themes)
    
react = ReAct()


## BasicMultiHop
class BasicMultiHop(dspy.Module):
    def __init__(self, num_passages=3):
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_query = dspy.ChainOfThought(FollowupQuery)
        self.generate_answer = dspy.ChainOfThought(GenerateTheme)

    def forward(self, comments):
        context = []

        for hop in range(2):
            query = self.generate_query(context=context, comments=comments).search_query
            context += self.retrieve(query).passages

        answer = self.generate_answer(context=context, comments=comments)
        return dspy.Prediction(answer=answer.themes)
    
multihop = BasicMultiHop(num_passages=3)


In [54]:
# Test a question
rag = cot
pred = rag.forward(
    "The topic and content was very interesting and the opportunity to connect with technical and commercial mentors was great! In the future, it would be great to see either more structure to the hackathon, or more communication about the structure of the hackathon."
)
pred



Prediction(
    answer='**Clarity of Requirements, Support from Leadership / Mentors**'
)

In [10]:
# flash.inspect_history(n=1)

Then define permutations for our model candidates

In [5]:
from dspy.teleprompt import LabeledFewShot, BootstrapFewShot

metric = dspy.evaluate.metrics.answer_exact_match

modules = {
    'vanilla': vanilla,
    'cot': cot,
    'react': react,
    'multihop': multihop,
}

optimizers = {
    'none': None,
    'labeled_few_shot': LabeledFewShot(),
    'bootstrap_few_shot': BootstrapFewShot(metric=metric, max_errors=20),
    # 'bootstrap_few_shot_random_search': BootstrapFewShotWithRandomSearch(
    #                                         max_bootstrapped_demos=8,
    #                                         max_labeled_demos=8,
    #                                         num_candidate_programs=10,
    #                                         num_threads=8,
    #                                         metric=metric,
    #                                         teacher_settings=dict(lm=flash)
    #                                     )    
}

Here we define a helper class to facilitate the evaluation

In [6]:
from dspy.evaluate.evaluate import Evaluate
import pandas as pd

NUM_THREADS = 4
class ModelSelection():

    # Compile our models
    def __init__(self, modules, optimizers, metric, trainset):
        self.models = []
        self.metric = metric
        
        for module_name, module in modules.items():
            print(f'Compiling models for {module_name}...')
            models_for_a_program = {'module_name': module_name, 'optimizers': []}

            for optimizer_name, optimizer in optimizers.items():
                print(f'...{optimizer_name}')
                if optimizer is None:
                    compiled_model = module
                else:
                    compiled_model = optimizer.compile(student=module, trainset=trainset)

                optimizer = {
                        'name': optimizer_name,
                        'compiled_model': compiled_model
                }

                models_for_a_program['optimizers'].append(optimizer)

            self.models.append(models_for_a_program)

    # Evaluate our models against the testset. After evaluation, we will have a matrix of models and their scores under the evaluation_matrix attribute
    def evaluate(self, testset):
        evaluator = Evaluate(devset=testset, metric=self.metric, num_threads=3, return_outputs=True)
        for module in self.models:
            print(f"""Evaluating models for {module['module_name']}...""")
            for optimizer in module['optimizers']:
                compiled_model = optimizer['compiled_model']
                evaluation_score, outputs = evaluator(compiled_model)
                optimizer['score'] = evaluation_score

        # read dict into a dataframe
        df = pd.DataFrame(self.models)

        # unnest optimizers column
        df = df.explode('optimizers')

        # extract name/score column from optimizers
        df['optimizer'] = df['optimizers'].apply(lambda x: x['name'])
        df['score'] = df['optimizers'].apply(lambda x: x['score'])

        df.drop(columns=['optimizers'], inplace=True)
        self.evaluation_matrix = df

    # Raise a question against the compiled model
    def question_for_model(self, module_name, optimizer_name, comments):
        for model in self.models:
            if model['module_name'] == module_name:
                for s in model['optimizers']:
                    if s['name'] == optimizer_name:
                        return s['compiled_model'](comments=comments)

In [None]:
# Compile the models
ms = ModelSelection(modules=modules, optimizers=optimizers, metric=metric, trainset=train_dataset)

# Evaluate them
ms.evaluate(testset=test_dataset)
print("Done!")

In [59]:
ms.evaluation_matrix

Unnamed: 0,module_name,optimizer,score
0,cot,mipro,66.67


In [75]:
pred = ms.question_for_model("cot", "mipro","Slightly fewer final presentations, and each presenting team presenting for slightly longer")
pred

Prediction(
    answer='**Showcase / Presentation Format**'
)

In [76]:
flash.inspect_history(n=1)




## PROPOSED INSTRUCTION:

**You are provided a participant's feedback from a Generative AI Hackathon. Analyze the feedback and choose one of the following themes that best describes its core concern.** 

**Themes:**

- Technical Learning Resources
- Clarity of Requirements
- Time Commitment / Workload
- Showcase / Presentation Format
- Communication and Information Sharing
- Team Formation and Dynamics
- Relevance of Training Content
- Application to Business / Use Cases
- Accessibility and Inclusion
- Incentives and Recognition
- Post-Hackathon Follow-up
- Support from Leadership / Mentors

**Explain your choice of theme and provide a detailed reasoning as to why the chosen theme best reflects the feedback.**

**Example of participant feedback:** "I really appreciated the opportunity to learn about the latest generative AI models, however, it would have been more helpful to have hands-on projects using these models.  I felt like the workshop was too theoretical and didn't provide e

'\n\n\n## PROPOSED INSTRUCTION:\n\n**You are provided a participant\'s feedback from a Generative AI Hackathon. Analyze the feedback and choose one of the following themes that best describes its core concern.** \n\n**Themes:**\n\n- Technical Learning Resources\n- Clarity of Requirements\n- Time Commitment / Workload\n- Showcase / Presentation Format\n- Communication and Information Sharing\n- Team Formation and Dynamics\n- Relevance of Training Content\n- Application to Business / Use Cases\n- Accessibility and Inclusion\n- Incentives and Recognition\n- Post-Hackathon Follow-up\n- Support from Leadership / Mentors\n\n**Explain your choice of theme and provide a detailed reasoning as to why the chosen theme best reflects the feedback.**\n\n**Example of participant feedback:** "I really appreciated the opportunity to learn about the latest generative AI models, however, it would have been more helpful to have hands-on projects using these models.  I felt like the workshop was too theore

Install Phoenix, DSPy, and other dependencies.

In [None]:
# !pip install "regex~=2023.10.3" dspy-ai
# !pip install arize-phoenix openinference-instrumentation-vertexai vertexai

In [2]:
# Phoenix by default uses the 6006 port for the UI. 
# If you have a port conflict, you can close the port by uncommenting the following code

import phoenix as px
phoenix_session = px.launch_app()


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [1]:
from phoenix.otel import register
from openinference.instrumentation.vertexai import VertexAIInstrumentor
tracer_provider = register(
  project_name="default", # Default is 'default'
)  
VertexAIInstrumentor().instrument(tracer_provider=tracer_provider)


  from .autonotebook import tqdm as notebook_tqdm


OpenTelemetry Tracing Details
|  Phoenix Project: default
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: localhost:4317
|  Transport: gRPC
|  Transport Headers: {'user-agent': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



In [77]:
# Used to kill phoenix
import psutil

def close_port(port):
    for conn in psutil.net_connections(kind='inet'):
        if conn.laddr.port == port:
            print(f"Closing port {port} by terminating PID {conn.pid}")
            process = psutil.Process(conn.pid)
            process.terminate()

close_port(6006)

Closing port 6006 by terminating PID 18324


: 