In [None]:
# !pip install langchain langchain-openai

In [None]:
import time
import os
import requests

# from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI
import pandas as pd

pd.set_option("display.max_colwidth", 0)

In [None]:
judge_prompt = """
You are an impartial, unbiased judge for a hackathon. You will be critiquing hackathon projects against two criteria: innovation and curiosity. 

Innovation is defined as challenging the norm by analyzing existing solutions through testing and/or research, and then exploring ways to improve or reinvent the norm. A project that demonstrates innovation may improve an existing solution in a way that increases value (e.g., access, affordability, opportunity, empowerment), and/or invent a brand-new approach/solution that addresses the problem. Consider: how the team challenged commonly accepted ideas and what they did differently in their project. 

Curiosity is defined as being curious by asking questions that inspire growth and result in the artful application of scientific principles. A project that demonstrates curiosity may question existing limitations, discover an unaddressed problem, and/or is “outside the box” that leverages unique ideas and processes. Consider: how the team explored possibilities and something clever, beautiful, or unique about their project.

Given a project description, in a neutral and objective tone, provide a one-sentence problem statement and a one-sentence project description. Next, using only the information from the description without adding interpretive commentary or value judgments, extract the aspects of the project that demonstrate alignment and/or misalignment with the innovation and curiosity criteria, if any.

As the judge, you also have to evaluate how the projects meet rubric criteria. Using the following rubrics for innovation and curiosity, identify how the project either “Fully meets”, “Partially meets”, or “Does not meet” each level in the rubric. Include an explanation.

Innovation level 1: The team developed a project but did not research or analyze existing solutions and no potential improvements was identified, 2: The team developed a project, and did not examine existing solutions for potential improvements, instead used an already known or given area of improvement, 3: The team developed a project, conducted research on existing solutions, but did not analyze current accepted solutions to identify strengths or weaknesses, and did not explore new solutions, 4: The team developed a project, conducted research on existing solutions, analyze current accepted solutions to identify strengths or weaknesses, but did not explore new or alternative solutions, 5: The team based their project on research and analyzed currently accepted solutions to identify strengths or weaknesses, and explored one or more new or alternative successful solutions.

Curiosity level 1: The team did not ask questions that addressed their understanding of the project topic areas, 2: The team asked questions to better understand their area of interest, 3: The team worked together to identify knowledge gaps, asking unique or new questions to bridge gaps, 4: The team worked together to identify knowledge gaps, asking unique or new questions, and made a plan that explored answers to improve project outcomes, 5: The team worked together to identify knowledge gaps, asking unique or new questions, and acted on that plan to explore create to project outcomes. 

Let’s work this out in a step by step way to be sure we have the right answer.
"""

neutral_prompt = (
    judge_prompt + "\n-Assessment style: your assessment is accurate and unbiased."
)

positive_prompt = (
    judge_prompt
    + "\n-Assessment style: your assessment is accurate but you are inclined to provide a favorable review."
)

negative_prompt = (
    judge_prompt
    + "\n-Assessment style: your assessment is accurate but you are inclined to provide a n unfavorable review."
)

In [None]:
import pandas as pd

projects_df = pd.read_csv("projects_selected.csv")
rubric_df = pd.read_csv("devpost_creativity_rubric.csv")

In [6]:
descriptions = pd.read_csv("project_descriptions.csv")

In [None]:
# model = ChatOllama(model="llama3.2", temperature=0.5)
OPENAI_API_KEY = "your key here"
model = ChatOpenAI(model="gpt-4o", temperature=0.5, api_key=OPENAI_API_KEY)

In [None]:
from langchain.schema import SystemMessage, HumanMessage


def pass_judgement(prompt, project):
    messages = [
        SystemMessage(content=prompt),
        HumanMessage(content=f"Please judge this project: {project}"),
    ]
    output = model.invoke(input=messages)
    return output.content

In [None]:
import time


def gen_judgement_dict(projects_df):
    output_dict = dict()
    for idx, row in projects_df.iterrows():
        t = time.time()
        print(f"Judging row {idx}")
        description = row["description"]
        output_dict[idx] = dict()
        outputs = [
            pass_judgement(prompt, description)
            for prompt in [neutral_prompt, positive_prompt, negative_prompt]
        ]
        output_dict[idx]["neutral"] = outputs[0]
        output_dict[idx]["positive"] = outputs[1]
        output_dict[idx]["negative"] = outputs[2]
        print(f"Took {time.time() - t}s")

    return output_dict

In [10]:
output_dict = gen_judgement_dict(descriptions)

Judging row 0
Took 30.10392689704895s
Judging row 1
Took 24.70517086982727s
Judging row 2
Took 16.394521951675415s
Judging row 3
Took 31.047686100006104s
Judging row 4
Took 18.097491025924683s
Judging row 5
Took 15.246944189071655s
Judging row 6
Took 12.77750015258789s
Judging row 7
Took 22.60185980796814s
Judging row 8
Took 19.032674074172974s
Judging row 9
Took 21.2361741065979s
Judging row 10
Took 17.13142204284668s
Judging row 11
Took 20.743053197860718s


In [None]:
import json

json_dict = json.dumps(output_dict)

In [16]:
with open("judgements_json.json", "w+") as f:
    json.dump(output_dict, f, indent=4)