In [1]:
import time
import os
import requests
# from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI
import pandas as pd

pd.set_option('display.max_colwidth', 0)

In [2]:
from dotenv import load_dotenv
load_dotenv(".env")

True

In [None]:
from langchain.schema import SystemMessage, HumanMessage, AIMessage


def call_model(model, prompt):
    response = model.invoke(
        input=[
            SystemMessage(content="You are a helpful assistant."),
            HumanMessage(content=prompt),
        ],
    )
    return response.content

def generate_reworded_text(description, model, criterion, criterion_desc):
    prompt = (
        f"Analyze the project description below and provide a JSON response that evaluates the project based on the '{criterion}' criteria. "
        f"Format your response as a JSON object with the following structure:\n\n"
        "{\n"
        '  "alignment": [\n'
        '    "specific aspect 1 that aligns with criteria",\n'
        '    "specific aspect 2 that aligns with criteria"\n'
        "  ],\n"
        '  "misalignment": [\n'
        '    "specific aspect 1 that misaligns with criteria",\n'
        '    "specific aspect 2 that misaligns with criteria"\n'
        "  ],\n"
        '  "scoring_evaluation": {\n'
        '    "level_3": {\n'
        '      "status": "fully met|partially met|did not meet",\n'
        '      "explanation": "brief explanation"\n'
        "    },\n"
        '    "level_2": {\n'
        '      "status": "fully met|partially met|did not meet",\n'
        '      "explanation": "brief explanation"\n'
        "    },\n"
        '    "level_1": {\n'
        '      "status": "fully met|partially met|did not meet",\n'
        '      "explanation": "brief explanation"\n'
        "    }\n"
        "  }\n"
        "}\n\n"
        f"Use only the information from the description without adding interpretive commentary or value judgments. Extract specific aspects that demonstrate alignment or misalignment with the '{criterion}' criteria.\n\n"
        f"Definition of {criterion}: {criterion_desc}\n\n"
        f"Project Description: {description}\n\n"
        f"Ensure your response is properly formatted JSON that can be parsed by standard JSON parsers."
    )
    return call_model(model, prompt)

def generate_project_summary(model, description):
        """
        Adds a new column to the DataFrame with a summary of the project, including
        the Project Overview and Problem Statement.
        """
        prompt = (
            f"Given the project description:\n\n"
            f"Provide one sentence describing the project.\n"
            f"Provide one sentence stating the problem the project is trying to solve.\n\n"
            f"Project Description: {description}\n\n"
            f"Response:"
        )
        return call_model(model, prompt)

In [None]:
# REMEMBER - rewrite this to accomodate the Azure model rate limits! :)
def process_projects(projects_df, rubric_df, temperature=0.3):
    model = ChatOpenAI(model="gpt-4o-mini", temperature=temperature)
    
    # Extract rubric descriptions once outside the loop
    innovation_desc = rubric_df[rubric_df["Criterion"] == "Innovation"]["Description"].iloc[0]
    curiosity_desc = rubric_df[rubric_df["Criterion"] == "Curiosity"]["Description"].iloc[0]

    for idx, row in projects_df.iterrows():
        print(f"Processing row {idx}")
        description = row['description']
        
        # Process all assessments for one description at once
        try:
            projects_df.at[idx, 'Project_Summary'] = generate_project_summary(model, description)
            projects_df.at[idx, 'Innovation_Assessment'] = generate_reworded_text(
                description, model, "Innovation", innovation_desc
            )
            projects_df.at[idx, 'Curiosity_Assessment'] = generate_reworded_text(
                description, model, "Curiosity", curiosity_desc
            )
        except Exception as e:
            print(f"Error processing row {idx}: {str(e)}")
            continue
                
        print(f"Row {idx} completed")
        
        # Optional: Save intermediate results
        # projects_df.to_csv('intermediate_results.csv')
        
    return projects_df

In [4]:
# Load project descriptions and rubric descriptions. 
projects_df = pd.read_csv("projects_selected.csv")
rubric_df = pd.read_csv("devpost_creativity_rubric.csv")

In [None]:
# new_projects_df.to_csv("openai_processed_projects_json.csv")