In [11]:
from dotenv import load_dotenv
import os, json
import pandas as pd
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.llms import OpenAI, AI21

# Load API keys from .env file
def load_api_keys():
    # Load variables from .env file into os.environ
    load_dotenv()

    # Get variables from os.environ
    key = os.environ["OPENAI_API_KEY"]
    key = os.environ["AI21_API_KEY"]
    key = os.environ["SERPAPI_API_KEY"]

# Set LLM
def set_llm():
    # load llm with deterministic setting
    llm = OpenAI(temperature=0)
    # llm = AI21(temperature=0)

    return llm

# Set agent and tools
def set_agent():
    # load llm with deterministic setting
    llm = OpenAI(temperature=0)
    # llm = AI21(temperature=0)

    # set tools
    tools = load_tools(["serpapi"], llm=llm)

    # set agent type
    agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=False)

    return agent

# load data
data = pd.read_csv("datasets/upitt_data.csv")


# from langchain.prompts import PromptTemplate
# prompt = PromptTemplate(
#     input_variables=["first_name","last_name","org_name"],
#     template = 'what is the email address and academic department of {first_name} {last_name} from {org_name}?'
# )

def get_longform_answer(agent, first_name, last_name, org_name):
    query = f'what is the email address and academic department of {first_name} {last_name} from {org_name}?'
    answer = agent.run(query)
    return answer

# load api keys, initialize agent, and load data
load_api_keys()
llm= set_llm()
agent = set_agent()



In [17]:
def sample_answers(agent,data):
    longform_answers = []
    dict_answers = []
    for i in range(3):
        # get data
        first_name = data.iloc[i]['investigator_first_name']
        last_name = data.iloc[i]["investigator_last_name"]
        org_name = data.iloc[i]['org_name']

        # get answer
        longform_answer = get_longform_answer(agent, first_name, last_name, org_name)

        # organize into dictionary
        query = f'organize this into a dictionary with keys "first_name", "last_name", "email" and "department": {longform_answer}'
        dict_answer = agent.run(query)
        
        # print progress
        print(f'---- query {i+1} ----')
        longform_answers.append(longform_answer)
        dict_answers.append(dict_answer)
    return longform_answers, dict_answers


# translate answers to json
def jsonify_answers(dict_answers):
    jsonifyied_answers = []
    for i, answer in enumerate(dict_answers):
        try:
            answer = json.loads(answer.replace("'", "\""))
            jsonifyied_answers.append(answer)
        except:
            print(f'error in answer {i+1}')
    return jsonifyied_answers

In [18]:
longform_answers, dict_answers = sample_answers(agent,data)

---- query 1 ----
---- query 2 ----
---- query 3 ----


In [19]:
longform_answers 

['The email address and academic department of Paula Monaghan-Nichols from University of Pittsburgh at Pittsburgh is paula.monaghan-nichols@pitt.edu and Biomedical Sciences.',
 "Aaron Barchowsky's email address is abarchow@pitt.edu and his academic department is Environmental and Occupational Health.",
 "Aaron Batista's email address is aaron.batista@pitt.edu and his academic department is Computer Science and Engineering."]

In [20]:
dict_answers

["{'first_name': 'Paula', 'last_name': 'Monaghan-Nichols', 'email': 'paula.monaghan-nichols@pitt.edu', 'department': 'Biomedical Sciences'}",
 'The code to create a dictionary with keys "first_name", "last_name", "email" and "department" is:\n\ndef create_dict(keys):\n    return {key: None for key in keys}',
 "{'first_name': 'Aaron', 'last_name': 'Batista', 'email': 'aaron.batista@pitt.edu', 'department': 'Computer Science and Engineering'}"]

In [8]:
dict_answer 

"{'first_name': 'Paula', 'last_name': 'Monaghan-Nichols', 'email': 'paula.monaghan-nichols@pitt.edu', 'department': 'Biomedical Sciences'}"

---- query 1 ----


---- answer 1 ----
{'first_name': 'Paula', 'last_name': 'Monaghan-Nichols', 'email': 'pmonaghan@pitt.edu', 'department': 'Biomedical Sciences'}
{'first_name': 'Paula', 'last_name': 'Monaghan-Nichols', 'email': 'pmonaghan@pitt.edu', 'department': 'Biomedical Sciences'}
---- answer 2 ----
{'first_name': 'Aaron', 'last_name': 'Barchowsky', 'email': 'aab20@pitt.edu', 'department': 'Environmental and Occupational Health'}
{'first_name': 'Aaron', 'last_name': 'Barchowsky', 'email': 'aab20@pitt.edu', 'department': 'Environmental and Occupational Health'}
