In [5]:
import anthropic
import pandas as pd

In [6]:
connections_df = pd.read_csv("./input/Connections.csv", skiprows=2)
# print(connections_df.head())

companies = connections_df.Company.unique()
# print(companies)
print(f"Successfully read-in connections. Total unique companies: {len(companies)}")

Successfully read-in connections. Total unique companies: 986


In [7]:
api_key_file = open("./anthropic_api_key.txt", "r")
anthropic_api_key = api_key_file.read()

client = anthropic.Anthropic(api_key=anthropic_api_key)

In [8]:
def query_claude(prompt, use_search=False):

    tools = []
    if use_search:
        tools = [
            {
                "name": "web_search",
                "type": "web_search_20250305",
                "max_uses": 1
            }
        ]
        betas = ["web-search-2025-03-05"]

    return client.beta.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=1024,
        temperature=1,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": prompt
                    }
                ]
            }
        ],
        tools=tools
    )

In [9]:
def loop_through_companies(companies, prompt_template, bool_col_name='IsGood'):
    
    result_df = pd.DataFrame(columns=['Company', bool_col_name, 'Message', 'IsErr'])
    total_input_tokens = 0
    total_output_tokens = 0
    counter = 0

    for company_name in companies:

        formatted_prompt = prompt_template.format(company_name=company_name)
        message = query_claude(formatted_prompt)

        final_text = message.content[-1].text
        bool_col = False
        is_err = False

        if final_text.endswith('TRUE'):
            bool_col = True
        elif final_text.endswith('FALSE'):
            bool_col = False
        else:
            is_err = True

        result_df.loc[len(result_df)] = [company_name, bool_col, final_text, is_err]

        total_input_tokens += message.usage.input_tokens
        total_output_tokens += message.usage.output_tokens
        counter += 1

        print(f"{counter:3d}/{len(companies)}: Processed company: {company_name}. Input tokens: {message.usage.input_tokens}; Output tokens: {message.usage.output_tokens}; Error: {is_err}")

    return result_df.set_index('Company')

In [10]:
good_prompt = """
        I am looking for jobs. I am a data scientist looking for a company or non-profit working on a pro-social mission. 
        Some example cause areas: climate change, healthcare, preserving democracy, wealth inequality, education. 
        But I am interested in any others that are for the benefit of the greater good.
        Can you please let me know if this company fits that above description: {company_name}. 
        If it does meet this conditions, just reply "TRUE". If it does not, just reply "FALSE". Do not respond with the reasoning for this decision, 
        simply respond "TRUE" or "FALSE".
    """

result_df = loop_through_companies(companies, good_prompt, bool_col_name='IsGood')
result_df

  1/5: Processed company: Scope3. Input tokens: 148; Output tokens: 4; Error: False
  2/5: Processed company: GoodLeap. Input tokens: 149; Output tokens: 4; Error: False
  3/5: Processed company: System Inc.. Input tokens: 147; Output tokens: 4; Error: False
  4/5: Processed company: Kanary. Input tokens: 148; Output tokens: 4; Error: False
  5/5: Processed company: Card Shop Near Me. Input tokens: 149; Output tokens: 4; Error: False


Unnamed: 0_level_0,IsGood,Message,IsErr
Company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Scope3,True,True,False
GoodLeap,True,True,False
System Inc.,False,False,False
Kanary,False,False,False
Card Shop Near Me,False,False,False


In [None]:
# result_df = pd.read_csv("./output/dave_connection_companies_first_pass.csv", index_col=0)

In [None]:
good_df = result_df.loc[result_df['IsGood'] == True]
good_companies = good_df.index.tolist()
print(f"Good companies: {len(good_companies)}")

Good companies: 257


In [None]:
local_prompt = """
        Can you please let me know if the company {company_name} is either fully remote or based in Colorado?
        I live in Colorado and can only work for a company that is based in Colorado or fully remote.
        If it is remote or in Colorado, just reply "TRUE". If it is neither, just reply "FALSE". 
        Do not respond with the reasoning for this decision, simply respond "TRUE" or "FALSE".
    """

loc_result_df = loop_through_companies(good_companies, local_prompt, bool_col_name='IsLocal')
loc_result_df

  1/257: Processed company: Scope3. Input tokens: 103; Output tokens: 4; Error: False
  2/257: Processed company: GoodLeap. Input tokens: 103; Output tokens: 4; Error: False
  3/257: Processed company: Project Canary. Input tokens: 103; Output tokens: 4; Error: False
  4/257: Processed company: Hiili. Input tokens: 102; Output tokens: 48; Error: True
  5/257: Processed company: Working Families Party. Input tokens: 103; Output tokens: 4; Error: False
  6/257: Processed company: Our World in Data. Input tokens: 103; Output tokens: 4; Error: False
  7/257: Processed company: Trunk Tools. Input tokens: 102; Output tokens: 4; Error: False
  8/257: Processed company: SPAN. Input tokens: 101; Output tokens: 81; Error: True
  9/257: Processed company: Epoch AI. Input tokens: 102; Output tokens: 4; Error: False
 10/257: Processed company: Ascend Analytics. Input tokens: 103; Output tokens: 4; Error: False
 11/257: Processed company: Giving Green. Input tokens: 102; Output tokens: 4; Error: Fal

Unnamed: 0_level_0,IsLocal,Message,IsErr
Company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Scope3,True,TRUE,False
GoodLeap,False,FALSE,False
Project Canary,True,TRUE,False
Hiili,False,I don't have reliable information about a comp...,True
Working Families Party,False,FALSE,False
...,...,...,...
IQVIA,False,FALSE,False
Novartis,False,FALSE,False
HONOR,False,FALSE,False
Athena Education,False,I need more specific information to accurately...,True


In [89]:
loc_result_df.loc[loc_result_df['IsLocal'] == True]

Unnamed: 0_level_0,IsLocal,Message,IsErr
Company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Scope3,True,True,False
Project Canary,True,True,False
Our World in Data,True,True,False
Ascend Analytics,True,True,False
Giving Green,True,True,False
University of Colorado Boulder,True,True,False
RMI,True,True,False
LineVision,True,True,False
Modern Classrooms Project,True,True,False
The Salvation Army Intermountain Division,True,True,False


In [90]:
loc_result_df.to_csv("./output/dave_connection_companies_second_pass.csv")
# result_df.to_csv("./output/dave_connection_companies_first_pass.csv", index=False)

In [None]:
# class CompanyResult:
#     def __init__(self, is_good, message, is_err):
#         self.is_good = is_good
#         self.message = message
#         self.is_err = is_err

#     def __str__(self):
#         return f"CompanyResult(is_good={self.is_good}, message={self.message}, is_err={self.is_err})"

#     def __repr__(self):
#        return self.__str__()

#     def (self):
#         return {
#             "is_good": self.is_good,
#             "message": self.message,
#             "is_err": self.is_err
#         }

# "text": f"""
#     I am looking for jobs. I am a data scientist looking for a company or non-profit working on a pro-social mission. 
#     Some example cause areas: climate change, healthcare, preserving democracy, wealth inequality, education. 
#     But I am interested in any others that are for the benefit of the greater good. I can only work in Colorado or fully remotely. 
#     Can you please let me know if this company fits that above description: {company_name}. 
#     If it does meet these conditions, just reply "TRUE". If it does not, just reply "FALSE". Do not respond with the reasoning for this decision, 
#     simply respond "TRUE" or "FALSE".
# """