In [3]:
import openai
import json
import os
import pandas as pd
from packaging import version
from keys import API_KEY
from datetime import datetime
from llama_index.core import __version__

print(f"LlamaIndex version: {__version__}")


required_version = version.parse("1.1.1")
current_version = version.parse(openai.__version__)

if current_version < required_version:
    raise ValueError(f"Error: OpenAI version {openai.__version__}"
                     " is less than the required version 1.1.1")
else:
    print("OpenAI version is compatible.")

# -- Now we can get to it
from openai import OpenAI
client = OpenAI(api_key=API_KEY) 

LlamaIndex version: 0.10.55
OpenAI version is compatible.


In [None]:
ngo_desc = """
Warsaw-based NGO hiring 3 people in Poland conducting countrywide projects in areas of:
A) Meta EA and career advice - spreading effective altruism ideas and helping people have an impactful job,
B) AI Safety - communicating AI Safety ideas and trying to get more people to work on it,
C) effective fundraising - fundraising (via a platform they're developing) for other NGOs that work in effective altruism,
Projects they don't want to work on right now: animal rights, animal welfare, conservation, ecology, direct work in other areas.
The NGO is ready to start new projects and is looking for grants that could finance them.
"""

In [None]:
# List all files in the given directory and iterate over the filtered files to find the most recent one
files = os.listdir('.')
grants_files = [f for f in files if f.startswith('grants_') and f.endswith('.csv')]
most_recent_file = None
latest_timestamp = datetime.min

for file in grants_files:
    parts = file.split('_')
    if len(parts) >= 3:  # Ensure the filename has enough parts to extract a timestamp
        timestamp_str = parts[1]
        try:
            # Parse the timestamp
            timestamp = datetime.strptime(timestamp_str, '%y%m%d%H%M')
            if timestamp > latest_timestamp:
                most_recent_file, latest_timestamp = file, timestamp
        except ValueError:
            continue

print(most_recent_file + "is the most recent grants CSV file")

# Load the CSV file into DataFrame
df = pd.read_csv(most_recent_file)
df.head(5)


In [None]:
obszary = ["Warszawa", "Cała Polska", "mazowieckie", "Kraków", "Poznań", "wielkopolskie", "Wrocław"]
fdf = df[(df["obszar"].isin(obszary))]
fdf

In [None]:
def get_prompt(ngo, grant):
  return f"Grant description: {grant}\nNGO description: {ngo}\n" +  """
    Provide
    - rating of grant's suitability for the NGO's area A described above (conservative, critical assessment) (1-100),
    - rating of grant's suitability for the NGO's area B described above (conservative, critical assessment) (1-100),
    - rating of grant's suitability for the NGO's area C described above (conservative, critical assessment) (1-100),
    - rating of how low effort it would be to use this grant (1-100), 
    - rating of how likely it is to cover general operation costs or meta charity projects (1-100),
    {A_rating: _ , B_rating: _ , C_rating: _ , effortless_rating: _ , meta_ops_rating: _ }\nPlease reply with proper JSON format like above
    """

In [None]:
def rate_match(grant_row):
  grant_short_descr = grant_row.name
  if not grant_row.descr: 
    grant_short_descr=grant_row.descr
  
  grant_descr = f"{grant_short_descr}\ntagi: {grant_row.tags}\nautor: {grant_row.author}\nobszar: {grant_row.obszar}\nzakres: {grant_row.fundusz}"
  for i in df.iterrows():# Example usage: Create a text completion with GPT-3
    response = client.chat.completions.create(
      model="gpt-3.5-turbo-0125",
      response_format={ "type": "json_object" },
      messages=[
          {"role":"user", "content":get_prompt(ngo_desc, grant_descr)}
      ]
    )

    json_output = response.choices[0].message.content
    parsed_dict = json.loads(json_output)
    print(grant_row.name, parsed_dict)
    return pd.Series(parsed_dict)
  
new_columns = fdf.apply(rate_match, axis=1)
fdf = fdf.join(new_columns)


In [None]:
fdf["total_rating"] = fdf.A_rating + fdf.B_rating + fdf.C_rating + fdf.effortless_rating*0.5 + fdf.meta_ops_rating*0.5

In [None]:
cols = ["name","obszar", "total_rating"]
fdf.sort_values(by="total_rating", ascending=False).head(10)

In [None]:
fdf.to_csv("ranked_filtered_"+most_recent_file)