In [10]:
from dotenv import load_dotenv
import os, json
import pandas as pd
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.llms import OpenAI, AI21

# Load API keys from .env file
def load_api_keys():
    # Load variables from .env file into os.environ
    load_dotenv()

    # Get variables from os.environ
    key = os.environ["OPENAI_API_KEY"]
    key = os.environ["AI21_API_KEY"]
    key = os.environ["SERPAPI_API_KEY"]

# Set LLM
def set_llm():
    # load llm with deterministic setting
    llm = OpenAI(temperature=0)
    # llm = AI21(temperature=0)

    return llm

# Set agent and tools
def set_agent():
    # load llm with deterministic setting
    llm = OpenAI(temperature=0)
    # llm = AI21(temperature=0)

    # set tools
    tools = load_tools(["serpapi"], llm=llm)

    # set agent type
    agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=False)

    return agent

# from langchain.prompts import PromptTemplate
# prompt = PromptTemplate(
#     input_variables=["first_name","last_name","org_name"],
#     template = 'what is the email address and academic department of {first_name} {last_name} from {org_name}?'
# )

# translate answers to json
def jsonify_answer(answer):
    try:
        answer = json.loads(answer.replace("'", "\""))
        return answer
    except:
        return None

def get_longform_answer(agent, first_name, last_name, org_name):
    query = f'what is the email address and academic department of {first_name} {last_name} from {org_name}?  Organize the information in a dictionary.'
    answer = agent.run(query)
    return answer


def sample_answers(agent,data):
    all_answers = []
    for i in range(2):
        # get data
        first_name = data.iloc[i]['investigator_first_name']
        last_name = data.iloc[i]["investigator_last_name"]
        org_name = data.iloc[i]['org_name']

        # get answer
        dict_answer = get_longform_answer(agent, first_name, last_name, org_name)

        # jsonify answer
        dict_answer = jsonify_answer(dict_answer)

        # add to list
        if dict_answer:
            dict_answer['first_name'] = first_name
            dict_answer['last_name'] = last_name
            dict_answer['org_name'] = org_name
            all_answers.append(dict_answer)
        
        # print progress
        print(f'---- query {i+1} ----')

    return all_answers



# load api keys, initialize agent, and load data
load_api_keys()
llm= set_llm()
agent = set_agent()

# load data
data = pd.read_csv("datasets/upitt_data.csv")

# get answers
all_answers = sample_answers(agent,data)

---- query 1 ----
---- query 2 ----


In [11]:
all_answers

[{'email': 'paula.monaghan-nichols@pitt.edu',
  'department': 'Biomedical Sciences',
  'first_name': 'a paula',
  'last_name': 'monaghan-nichols',
  'org_name': 'university of pittsburgh at pittsburgh'},
 {'email': 'abarchow@pitt.edu',
  'department': 'Environmental and Occupational Health',
  'first_name': 'aaron',
  'last_name': 'barchowsky',
  'org_name': 'university of pittsburgh at pittsburgh'}]