In [None]:
## Query response format - 
{
    "soulution" : {"type": "string", "description": "The solution of the problem"},
    "queries" : [ {"type": "dict", "description": "The search queries and their search domain", "example" : {"query":"Faculty of Engineering", "domain":"web"}}],

}

In [5]:
# Example Goal - I want an internship in California in molecular biology
eg1 = {"target": ["research professors", "labs", "research companies"],
 "queries": [{"query":"Molecular biology professors in California", "domain":"web"}, 
             {"query":"", "domain":"web"}, 
             {"query":"Molecular biology research labs", "domain":"web"}],
  "search_domain": ["web"]
 }

# goal - I want a catering service for a wedding in Santa Clara. for 100 people
eg2_0 = {
    "target": ["catering service", "catering companies", "catering service for wedding"],
    "queries": [{"query":"Catering service for wedding in Santa Clara", "domain":"web"}, 
                {"query":"Catering service in Santa Clara", "domain":"web"}, 
                {"query":"Catering service", "domain":"yelp"},
                {"query":"Cantering service in Santa Clara", "domain":"gmaps"}
                ],
    "search_domain": ["web", "gmaps", "yelp"]
}

# second version of the same goal
# - takes less tokes
# - has a better structure
# - more chances always selecting web as the search domain
# - no need for different search domain
eg2_1 = {
    "target": ["catering companies"],
    "queries": {"web" : ["Catering service for wedding in Santa Clara", "Catering service in Santa Clara"],
                "yelp" : "Catering service",
                "gmaps" : "Cantering service in Santa Clara"
                },
}


sys_prompt = """
Comprehend the goal, and provide small web search queries to assist in achieving it. The queries should be based on finding the email of best individual person or an expert, to contact for helping or completing the user goal. First give the list of people/vendor (1 to 3) to approach for the goal (Eg- Professors, Catering Companies etc) in small strings as targets (focus on a person in 1-3 words). Then give search queries, always give search queries for `web` in a list of string(max 3), each targeting a target and slightly broaden the search(searching for their email). Give yelp and gmaps search query only if necessary for the goal.`yelp` and `gmaps` both are used for local businesses, including personal, small, and medium-sized enterprises, use both only when location is given by user (but don't use near me), else give an empty string. 'yelp' search query should NOT include location in its query string (Yelp does not accept location based search query, only vendor). The output should be in JSON format : "{\"targets\": [\"\",\"\"], \"queries\": {\"web\": [\"\", \"\"...], \"yelp\": \"\", \"gmaps\": \"\"}}
"""

In [14]:
import os
from openai import OpenAI
import json
from dotenv import load_dotenv

load_dotenv()


MY_ENV_VAR = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key=MY_ENV_VAR)
print("client ready")

client ready


In [15]:

def gpt4_response_string(goal):
    try:
        response = client.chat.completions.create(
            model= "gpt-3.5-turbo",
            response_format={ "type": "json_object" },
            messages=[
                {"role": "system", "content": sys_prompt},
                {"role": "user", "content": f"Goal:{goal}"},
            ]
        )
    except Exception as e:
        print(e)
        return None
    response = json.loads(response.choices[0].message.content)
    return json.dumps(response)

def display_response(response):
    display(response)

fail_file = "../data/fail_goals.txt"
dataset_file = "../data/query_generation.jsonl"

with open('../data/goals2.txt', 'r') as file:
    for line in file:
        goal = line[1:].strip()
        
        try:
            response = gpt4_response_string(goal)
        except Exception as e:
            print(e)
            with open(fail_file, 'a') as f:
                f.write(f"{goal}\n")
            continue
        display_response(response)
        
        
        # ask for user input if Enter then continue if n the skip if e then exit
        inp = input("Enter to continue, n to skip, e to exit")

        if inp == "n":
            with open(fail_file, 'a') as f:
                f.write(f"{goal}\n")
        elif inp == "e":
            break
        val = {"messages":[{"role": "system", "content": sys_prompt},{"role": "user", "content": goal},{ "role": "assistant", "content": response}]}
        # wirte to file dataset file
        with open(dataset_file, 'a') as f:
            f.write(json.dumps(val) + "\n")
            f.write(response + "\n")

print("done")


        


        


'{"targets": ["Professors", "Research Coordinator", "Internship Coordinator"], "queries": {"web": ["Harvard Medical School professors email contact", "Harvard Medical School research coordinator contact email", "Harvard Medical School internship coordinator contact email"], "yelp": "", "gmaps": ""}}'

done


In [16]:

def gpt_response_string(goal):
    try:
        response = client.chat.completions.create(
            model= "ft:gpt-3.5-turbo-1106:margati:query-generation:94vNTAHo",
            response_format={ "type": "json_object" },
            messages=[
                {"role": "system", "content": sys_prompt},
                {"role": "user", "content": f"Goal:{goal}"},
            ]
        )
    except Exception as e:
        print(e)
        return None
    response = json.loads(response.choices[0].message.content)
    return json.dumps(response)

Goal = "I want to find a reliable construction service to renovate my house. Location - New York"
response = gpt_response_string(Goal)
print(response)


{"targets": ["Construction Company", "General Contractor", "Home Renovation Service"], "queries": {"web": ["reliable renovation construction New York email contact", "best general contractor New York renovation email", "top home renovation service New York contact email"], "yelp": "construction companies", "gmaps": ""}}
