In [None]:
import openai
import os
client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))


In [None]:
import json
import numpy as np
from numpy.linalg import norm

# Initialize OpenAI client (using environment variable for API key)

# Define the evaluation categories
CATEGORIES = [
    "Thematic Alignment",
    "Innovation",
    "Feasibility",
    "Community/Geographic Impact",
    "Technical/Research Depth"
]

def extract_features(description: str) -> dict:
    """
    Use the ChatGPT API to extract a feature vector from a description.
    The API is prompted to rate the description on predefined categories.
    """
    prompt = f"""
    You are an expert in evaluating grant and project descriptions. 
    Please analyze the following text and provide a rating from 0 to 1 for each of the following categories: {", ".join(CATEGORIES)}.
    Output your answer as a JSON object where keys are the category names and values are the ratings.
    
    Text:
    {description}
    """
    completion = client.chat.completions.create(  # Updated API call
        model="gpt-4",  # or gpt-3.5-turbo
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3,
    )

    content = completion.choices[0].message.content  # Access content correctly
    try:
        features = json.loads(content)
    except json.JSONDecodeError:
        raise ValueError(f"Could not parse JSON from LLM output: {content}") # Better error message
    return features

def vector_from_features(features: dict) -> np.ndarray:
    """
    Convert the features dictionary into a NumPy vector.
    """
    return np.array([features[cat] for cat in CATEGORIES], dtype=float)

def cosine_similarity(vec1: np.ndarray, vec2: np.ndarray) -> float:
    """
    Compute the cosine similarity between two vectors.
    """
    return np.dot(vec1, vec2) / (norm(vec1) * norm(vec2) + 1e-8)

# Example grant and project texts
grant_description = """
This grant supports innovative research in renewable energy and emphasizes sustainable, community-driven solutions.
Applicants should demonstrate a strong alignment with environmental themes and present rigorous, innovative methodologies.
"""

project_proposal = """
researching and actively deriving methodologies for interpreting neural networks, which are commonly considered “black boxes.” As AI increasingly touches upon all aspects of our lives, it becomes ever more important to understand what informs model predictions. Using linear algebra and function analysis techniques, I’ve developed a mathematical framework for a system that can take a neural network’s output and identify the set of inputs occupying an N-dimensional space that result in that output. I’ve successfully implemented this system for small networks and confirmed my hypothesis. Now, I am working on applying it to larger and different network architectures. I believe analyzing this space will provide powerful insights into the behavior of neural networks.
"""

# Extract feature vectors using the LLM
grant_features = extract_features(grant_description)
project_features = extract_features(project_proposal)

grant_vector = vector_from_features(grant_features)
project_vector = vector_from_features(project_features)

# Compute the similarity between the grant and the project
sim_score = cosine_similarity(grant_vector, project_vector)
print("Feature-based Similarity Score:", sim_score)

# ... (rest of your code for large datasets would go here)

Feature-based Similarity Score: 0.9839916686711934
