In [97]:
import json
from typing import List, Dict, Tuple, Set

import dotenv
dotenv.load_dotenv()

from pydantic import BaseModel

from tqdm.auto import tqdm

from google import genai
from google.genai import types
from google.genai.types import Tool, GenerateContentConfig, GoogleSearch


class Affiliation(BaseModel):
  name_in_Chinese: str
  quote: str
  title: str = None
  email: str = None
  phone: str = None

class Researcher(BaseModel):
  I_am_sure_that_I_found_the_researcher: bool
  name_in_Chinese: str
  research_keywords: List[str] = []
  affiliations: List[Affiliation] = []

## Multi-agent but manual orchestration approach

Gemini does not seem to be able to do many tasks at once. For example, it cannot consistently generate a JSON string in text-output mode -- cannot do JSON output when tools are used. Some instructions are not properly followed, such as "do not tell me how you think but just give me the result". 

So let's create individual agents for each task. 

* Agent 1: Web search with Gemini's own query rewriting and thinking
* Agent 2: Check the quality of the web search results
* Agent 3: Generate the final JSON output
* Main function: coordinate the work of the three agents. Need to obtain at least three candidates or three consistent "I cannot find" responses.

In [112]:
class InfoGatherer:
  def __init__(self, model_id: str):
    self.system_prompt = "You are a helpful cross-lingual customer discovery agent."

    self.user_prompt = """
    ## Background and overview

    Many researchers who are China-based or have affiliations in China publish papers in English. On those papers, their names are not in Chinese characters. For example, their names are transliterated or romanized, such as in Pinyin or Wade-Giles. The goal here is to find their names in Chinese characters. Optionally, find their affiliations in Chinese characters, and their emails and phone numbers.
    
    ### Challenges
    
    1. Mapping from such transliterated/romanized names to their Chinese names is not a simple machine translation task due to the homophony of Chinese characters and different transliteration/romanization methods. There is no one-to-one mapping. 
    2. Many researchers use names in the English-speaking world that are not a direct transliteration or romanization of their Chinese name. For example, nvidia CEO Jensen Huang's Chinese name is 黃仁勳 whose romanization is Jen-Hsun Huang. As another example, Forrest Bao's Chinese given name has nothing to do with and cannot be transliterated to "Forrest". So do not rule out a name that does not match the transliterated/romanized name.
    3. Reseachers move from one affiliation to another. So do not rule out a researcher that does not match the affiliation provided in the input. Two researchers may be the same person if they share co-authors, especially when the co-authors are from the same institutions at the two occurences of the researcher.
    4. It is possible for a researcher to have multiple affiliations, some in China and some in other countries. S/he can have multiple affliations in China and multiple affliations in other countries. S/he may have multiple affiliations on one paper. So do not rely solely on the affiliation provided in the input.
    5. It is possible that the same researcher's name is written in different ways in your search results. The variants may be characters have similar or overlapping radicals or pronunciations.
    

    ### Strategies

    Please iteratively search the web. First, come up with an initial search query. Then, search the web. After each round, you should analyze the results, then think, and then refine or expand the search query for the next round. Stop when you are confident that you have found the researcher in the Chinese-speaking world or no new results can be found.

    Here are some ideas to build or expand the search queries but please do not limit yourself to these ideas:

    {strategies}

    ### Knowledge about Chinese names that may help you

    1. While there is no one-to-one mapping from transliterated/romanized names to Chinese names, the mapping of last/family Chinese names are more finite than the first/given names. For example, if you see "Huang" or "Wong" at the beginning or the end of someone's name, it is highly likely to be the last/family name and the character is almost certain to be 黄, 汪, 翁, or 王.
    2. In the transliteration/romanization of a Chinese name, the last/family name usually appear as the first word or the last word -- either case is common. A word or token in the middle of a transliterated/romanized Chinese name is mostly unlikely to be a last/family name.
    3. If someone's name in the English-speaking world is in the form of "a Western name + a Chinese name", the Chinese name is most likely the last/family name.
    4. A Chinese last name is usually a single character or syllable.

    ### Qualification

    1. You can search in both English and Chinese resources, but the Chinese name of a researcher must appear in Chinese sources (can be part of an English-dominant resources) to support the name and affiliation of the researcher.
    2. It is possible that no information in Chinese can support the name and affiliation of the researcher. In such case, you should say I don't know. Do not use results of researchers of similar names. Do not use English sources to support the Chinese name and affiliation of the researcher because it cannot. 
    3. There is no misspelling of the name or affiliation provided in the input. 

    ### Output format

    {output_format}

    Now, lets begin! Below is the information of the researcher to find:
    {researcher_info}
    """

    self.output_format_markdown = """
    
    Output the name and affiliation of the researcher in the following markdown format:
    * Chinese name 1
    * Affiliation 1
      - quote in Chinese source
      - title (if possible)
      - email (if possible)
      - phone number (if possible)
    * Affiliation 2
      - quote in Chinese source
      - title (if possible)
      - email (if possible)
      - phone number (if possible)
    ...

    For each name and affiliation, please provide a quote from the Chinese source to support it.   

    """

    # Tell me all information you found on the internet and how you processed it. 

    # At the end, 


    self.output_format_json = """

    Output the name and affiliation of the researcher in the following json format:

    {{
      "I_am_sure_that_I_have_found_the_researcher": True/False,
      "name_in_Chinese": "Name of the researcher",
      "affiliations": [
        {{
          "name": "Affiliation name",
          "title": "Title of the researcher in this affiliation (if possible)",
          "quote": "Quote from the Chinese source showing this researcher is employed/affiliated with this institution",
          "email": "Email address of the researcher (if possible)",
          "phone_number": "Phone number of the researcher (if possible)"
        }}
      ]
    }}

    The field "I am sure that I have found the researcher" should be True if you are sure that you have found the researcher. Otherwise, it should be False. 
    """

    self.strategies = """
    * See if the research info includings affiliation. If so, translate the affiliation to Chinese and add it to the search queries. 
    * Most scientific publications include the email address and phone number of authors. If you don't have the email address or phone number of the researcher, you may prioritize your search sources to scientific journals, either in English or Chinese, hoping to find the email address or phone number of the researcher. Once you have the email address or phone number of the researcher, you can add them to search query and search on Chinese resources especially scientific journals where the Chinese name of the researcher is most likely to be found. 
    * Different Chinese names can be transliterated or Romanized to the same name in English. Please try to come up some possible Chinese names, and then use them as part of the search terms. Doing so for the family/last name is easier than the first name. 
    * Prioritize your search sources to Chinese-speaking websites. Also try to search as much as possible from scientific literature sources such as Google Scholar, PubMed, CNKI, Semantic Scholar, etc.
    * Determine whether the name in English is romanized or transliterated, in the methods including but not limited to Pinyin, Wade-Giles, etc. If the name is romanized, confine your thinking and next round of search to Chinese names that match the romanized version.
    * Leverage the co-author network/circle of the researcher. You may find co-author network from Semantic Scholar, Google Scholar, etc.
    * A research may publish papers under multiple affiliations. Expand your search queries to include other affiliations appear in publications as long as they are related to the research topic. 
    * From all pages you have found, extract all names and affiliations and add them to the search queries. 
    """

    self.model_id = model_id

  def call_gemini(self, prompt):
    client = genai.Client()

    google_search_tool = Tool(
        google_search = GoogleSearch()
    )

    response = client.models.generate_content(
        model=self.model_id,
        contents=prompt,
        config= {
           "tools": [google_search_tool],
           "response_modalities": ["TEXT"],
          #  "thinking_config": types.ThinkingConfig(thinking_budget=-1), 
           "thinking_config": types.ThinkingConfig(thinking_budget=4096*2), 
           "system_instruction": self.system_prompt
        }
    )
    return response

  def calculate_cost(self, response):
    web_search_queries = response.candidates[0].grounding_metadata.web_search_queries
    
    pricing = { # per 1M tokens
      "gemini-2.5-flash": 
        {"input": 0.30, "output": 2.5, "cache": 0.075, "web_search": 0.035},
      "gemini-2.5-pro":
        {"input": 1.25, "output": 10, "cache": 0.31, "web_search": 0.035},
    }

    model_id = self.model_id

    pricing_for_model = pricing[model_id]

    cost = 0
    cost += pricing_for_model["input"] * response.usage_metadata.prompt_token_count / 1000000
    cost += pricing_for_model["output"] * (response.usage_metadata.candidates_token_count + response.usage_metadata.thoughts_token_count) / 1000000
    num_cached_tokens = 0 if response.usage_metadata.cached_content_token_count is None else response.usage_metadata.cached_content_token_count
    cost += pricing_for_model["cache"] * num_cached_tokens / 1000000
    cost += pricing_for_model["web_search"] * len(web_search_queries) / 1000
    
    return cost

  def one_prompt_approach(self, researcher_info: str, output_format: str) -> Tuple[List[str], float]:

    if output_format == "json":
      output_format_prompt = self.output_format_json
    elif output_format == "markdown":
      output_format_prompt = self.output_format_markdown
    else:
      raise ValueError(f"Invalid output format: {output_format}")

    prompt = self.user_prompt.format(researcher_info=researcher_info, strategies=self.strategies, output_format=output_format_prompt)

    response = self.call_gemini(prompt)
 
    if response.candidates[0].content.parts is None:
      # print (json.dumps(response.model_dump(), indent=2, ensure_ascii=False))
      thoughts = ["I don't know"]
    else:
      thoughts = [part.text for part in response.candidates[0].content.parts]

    cost = self.calculate_cost(response)

    # the_other_model_id = {"gemini-2.5-flash": "gemini-2.5-pro", "gemini-2.5-pro": "gemini-2.5-flash"}[self.model_id]
    # print (f"Cost of using {model_id}: ${cost:.6f}")
    # print (f"The cost would have been ${calculate_cost(response, the_other_model_id):.6f} if using {the_other_model_id}")
    
    return thoughts, cost

In [113]:
# Test InfoGatherer
test_queries = [
    # "Yifeng Wang College of Materials Science and Engineering, NanjLin Tech University, Nanjing, Jiangsu, 210009, China", 
    # "Professor David Z. Zhu, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, AB, T6G 1H9, Canada", 
    # "hua chen	jian	Palladium,Layers,Conjugated polymers,Light,Perovskites", 
    "gu	aijuan	Photocatalysis,Oxides,Adsorption,Organic reactions,Catalytic activity",
    # "ding	congcong	Chemistry, Thermal conductivity, Thermoelectrics, Solar cells, Electrical conductivity, Perovskites",
    "tao	jiayou				Photonics,Sensors,Layers,Two dimensional materials,Perovskites",
    "Dr. BAC AES, Department of Non-Science, University of Nowhere", 
]

info_gatherer = InfoGatherer(model_id="gemini-2.5-flash")
for researcher_info in test_queries:
    print ("Query: ", researcher_info)
    thoughts, cost = info_gatherer.one_prompt_approach(researcher_info=researcher_info, output_format="json")
    print ("Thoughts: ", "".join(thoughts))
    print(f"\nCost: ${cost: .3f} \n ========= \n")

Query:  gu	aijuan	Photocatalysis,Oxides,Adsorption,Organic reactions,Catalytic activity
Thoughts:  I am sure that I have found the researcher.

```json
{
  "I_am_sure_that_I_have_found_the_researcher": true,
  "name_in_Chinese": "顾爱娟",
  "affiliations": [
    {
      "name": "苏州大学",
      "title": null,
      "quote": "专利权人苏州大学...发明人顾爱娟",
      "email": "ajgu@suda.edu.cn",
      "phone_number": null
    }
  ]
}
```

Cost: $ 0.003 

Query:  tao	jiayou				Photonics,Sensors,Layers,Two dimensional materials,Perovskites
Thoughts:  ```json
{
  "I_am_sure_that_I_have_found_the_researcher": true,
  "name_in_Chinese": "陶家佑",
  "affiliations": [
    {
      "name": "湖南理工学院信息光子学与自由空间光通信重点实验室, 物理与电子科学学院",
      "title": null,
      "quote": "Key Laboratory of Hunan Province on Information Photonics and Freespace Optical Communications, School of Physics and Electrical Sciences, Hunan Institute of Science and Technology, Yueyang, 414006, People's Republic of China. [9]",
      "email": "taojy2572@1

In [59]:
class RomanizationWorker():
    def __init__(self):
        self.system_prompt = "You are a helpful language expert."

    def detect(self, name: str) -> bool:
        """Determine if a name is romanized or transliterated from Chinese characters."""
        client = genai.Client()
        prompt = """Is the name below romanized or transliterated from Chinese characters? The romanization method can be Pinyin, Wade-Giles, Jyutping, Yale, or other methods. Return only "true" or "false". The romanized/transliterated name is: {name}"""

        response = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=prompt.format(name=name),
            config= {
                "response_modalities": ["TEXT"],
                "thinking_config": types.ThinkingConfig(thinking_budget=-1), 
                "system_instruction": self.system_prompt
            }
        )

        try:
            thoughts = [part.text for part in response.candidates[0].content.parts]
        except:
            print ("Error: in candidate.content.parts. ")
            thoughts = ["I don't know."]

        thoughts = "\n".join(thoughts)

        return thoughts

    def Romanization_and_Chinese_Character_matcher(self, query: str, name: str) -> bool:
        """Given a _name_ in Chinese characters, check whether the romanized/transliterated name is part of the _query_"""
        client = genai.Client()
        prompt = """Does the romanized/transliterated version of the name "{name}" present in the query "{query}"? The romanization method can be Pinyin, Wade-Giles, Jyutping, Yale, or other methods. Return only "true" or "false". """

        # teflon tape  power cord 

        response = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=prompt.format(query=query, name=name),
            config= {
                "response_modalities": ["TEXT"],
                "thinking_config": types.ThinkingConfig(thinking_budget=-1), 
                "system_instruction": self.system_prompt
            }
        )

        try:
            thoughts = [part.text for part in response.candidates[0].content.parts]
        except:
            print ("Error: in candidate.content.parts. ")
            thoughts = ["I don't know."]

        thoughts = "\n".join(thoughts)

        return thoughts

romanization_worker = RomanizationWorker()
print (romanization_worker.detect("Jen-Hsun Huang")) 
print (romanization_worker.detect("David Huang"))
print (romanization_worker.Romanization_and_Chinese_Character_matcher("gu	aijuan	Photocatalysis,Oxides,Adsorption,Organic reactions,Catalytic activity", "顾嫒娟"))
print (romanization_worker.Romanization_and_Chinese_Character_matcher("Photocatalysis,Oxides,Adsorption,Organic reactions,Catalytic activity", "光合氧"))

true
false
true
false


In [None]:
class JSONWorker:
  def __init__(self):
    self.system_prompt = """You are a helpful assistant processing JSON strings."""

  def extract(self, potential_json_string: str) -> str:
    """Extract JSON string from a string that may contain JSON string."""

    client = genai.Client()
    prompt_template = """Given a string that may contain JSON string, output a synatically correct JSON string that match the provided schema. 
    1. The provided string may or may not contain a JSON string. 
    2. If the given string does not contain a JSON string, extract information from the string and output a JSON string that matches the provided schema. If the string does not contain any information that can be used to generate a JSON string, output an empty JSON string. 
    3.If the given string contains a JSON string, generate your output solely based on the JSON string. If necessary, rectify the JSON string to make it synatically correct. 

    Here is the string: {{potential_json_string}}
    
    """

    prompt = prompt_template.format(potential_json_string=potential_json_string)

    response = client.models.generate_content(
      model="gemini-2.5-flash",
      contents=prompt.format(potential_json_string=potential_json_string),
      config= {
          "response_modalities": ["TEXT"],
          "thinking_config": types.ThinkingConfig(thinking_budget=-1), 
          "system_instruction": self.system_prompt,
          "response_mime_type": "application/json",
          "response_schema": Researcher
      }
    )
    researcher = json.loads(response.text)
    return researcher

  def consolidate(self, ListJSON: List[str | dict]) -> str:
    """Given a list of JSON strings, remove those that contain highly are highly similar to each other."""
    # BUG: Consolidation does not work 

    client = genai.Client()
    prompt_template = """Given a list of JSON strings about a researcher, remove those that are highly similar to each other. Here is the list: {{ListJSON}}"""
    
    prompt = prompt_template.format(ListJSON=ListJSON)
    
    response = client.models.generate_content(
      model="gemini-2.5-flash",
      contents=prompt,
      config= {
          "response_modalities": ["TEXT"],
          "thinking_config": types.ThinkingConfig(thinking_budget=-1), 
          "system_instruction": self.system_prompt,
          "response_mime_type": "application/json",
          "response_schema": list[Researcher]
      }
    )
    researcher = json.loads(response.text)
    return researcher

json_test_input = """
The initial search provided crucial information.

From the University of Alberta directory, it states that David Z. Zhu received his M.Sc. and B.Sc. from Shanghai Jiao Tong University, China, in 1989 and 1986, respectively. It also mentions he was a "Guest Professor, Zhejiang University, China, 2006".

More importantly, a seminar abstract and IAHR profile explicitly state: "Dr. David Zhu is a Professor at Ningbo University in China. He is a Professor Emeritus in the Department of Civil and Environmental Engineering at the University of Alberta, Canada where he was a faculty member for 25 years." This is also confirmed in an IAHR Water Monograph listing. Another source, apise.org, also confirms his affiliation with Ningbo University.

The Wikipedia entry for "David Zhu" refers to a Chinese racing driver, which is clearly not the same person. The entry for "Song-Chun Zhu" is also a different person. "Zhenduo Zhu" from Tsinghua University is also a different researcher.

The surname "Zhu" (朱) is common. Given the information that he is a professor at Ningbo University and was a Guest Professor at Zhejiang University, I should look for his Chinese name associated with these institutions. The middle initial "Z." might correspond to his given name. Some results mentioned "Zhiwei Zhu" as an alternative for David Z. Zhu.

I need to find a Chinese-speaking source that explicitly links "David Z. Zhu" to a Chinese name and his current affiliation (Ningbo University) with a supporting quote.

Let's refine the search to specifically look for "David Z. Zhu" and "Ningbo University" in Chinese, and try to find his full Chinese name. I will also include "Zhiwei Zhu" in Chinese if possible.

Possible Chinese names for "David Z. Zhu":
*   大卫·朱 (Dàwèi Zhū) - Direct transliteration of David Zhu.
*   朱兆安 (Zhū Zhào'ān) - This name appeared in some less relevant search results, but it's a possibility due to "Z."
*   朱志伟 (Zhū Zhìwěi) - Based on "Zhiwei Zhu" from.

Let's search for "朱教授 宁波大学" (Professor Zhu Ningbo University) and "朱志伟 宁波大学" (Zhu Zhiwei Ningbo University), and "大卫·朱 宁波大学" (David Zhu Ningbo University). I'll prioritize searching on Chinese academic or university websites.
The search results confirm the Chinese name and affiliation of Professor David Z. Zhu.

Specifically, multiple Chinese-speaking sources identify "David Z. Zhu" as "朱志伟" (Zhū Zhìwěi).

Here's the evidence:
*   "加拿大工程院院士、加拿大Alberta大学土木与环境工程学院、宁波大学海洋工程研究院院长朱志伟教授在我院水工水力学研究所做了题为《 环境水力学：研究与应用》的学术报告" (Canadian Academy of Engineering Academician, Professor Zhu Zhiwei, Dean of the Ocean Engineering Research Institute of Ningbo University, and from the Department of Civil and Environmental Engineering at the University of Alberta, Canada, gave an academic report titled "Environmental Hydraulics: Research and Application" at our institute's Hydraulic Engineering Research Institute.) This quote explicitly links "David Z. Zhu" (朱志伟) to Ningbo University and the University of Alberta.
*   "半年引进三院士！加拿大工程院院士朱志伟全职加盟宁波大学 继中科院院士赵玉芬、中国工程院院士陈剑平之后，最近，加拿大工程院院士朱志伟正式加盟宁波大学，成为该校海洋工程研究院院长。" (Half a year, three academicians introduced! Canadian Academy of Engineering Academician Zhu Zhiwei fully joined Ningbo University. Following Academician Zhao Yufen of the Chinese Academy of Sciences and Academician Chen Jianping of the Chinese Academy of Engineering, recently, Canadian Academy of Engineering Academician Zhu Zhiwei officially joined Ningbo University and became the Dean of its Ocean Engineering Research Institute.) This further confirms his full-time affiliation and title at Ningbo University.
*   "3月21日，加拿大工程院院士朱志伟走进宁波市海曙外国语学校，为海外学子们带来了一堂生动且深刻的科学讲座。 朱志伟院士现任宁波大学海洋工程研究院院长..." (On March 21st, Canadian Academy of Engineering Academician Zhu Zhiwei visited Ningbo Haishu Foreign Language School and delivered a lively and profound science lecture to overseas students. Academician Zhu Zhiwei is currently the Dean of Ningbo University's Ocean Engineering Research Institute...) This reiterates his current position.
*   "2018年4月，朱志伟院士作为宁波大学全职引进的高层次领军人才，正式入职宁波大学，现任宁波大学海洋工程研究院院长。" (In April 2018, Academician Zhu Zhiwei officially joined Ningbo University as a high-level leading talent introduced full-time by Ningbo University, and is currently the Dean of Ningbo University's Ocean Engineering Research Institute.) This provides the date of his full-time employment.
*   "宁波大学成立海洋工程学院成立揭牌，加拿大工程院院士朱志伟正式加盟宁波大学，担任海洋工程研究院院长。" (The establishment of Ningbo University's College of Ocean Engineering was unveiled, and Canadian Academy of Engineering Academician Zhu Zhiwei officially joined Ningbo University as the Dean of the Ocean Engineering Research Institute.)

Based on these results, Professor David Z. Zhu's Chinese name is 朱志伟 (Zhū Zhìwěi), and he is currently affiliated with Ningbo University.

Therefore, I have sufficient information to construct the JSON output.

{
  "name_in_Chinese": "朱志伟",
  "affiliations": [
    {
      "name_in_Chinese": "宁波大学海洋工程研究院",
      "title": "院长",
      "quote": "加拿大工程院院士朱志伟全职加盟宁波大学，成为该校海洋工程研究院院长。 [5]"
      "email": "zhuzhiwei@nbu.edu.cn",
      "phone": "123-456-7890"
    },
    {
      "name_in_Chinese": "宁波大学",
      "title": "教授",
      "quote": "Dr. David Zhu is a Professor at Ningbo University in China. [3]"
      "email": "zhuzhiwei@nbu.edu.cn",
      "phone": "123-456-7890"
    }
  ]
}

市海曙外国语学校，为海外学子们带来了一堂生动且深刻的科学讲座。 朱志伟院士现任宁波大学海洋工程研究院院长..." (On March 21st, Canadian Academy of Engineering Academician Zhu Zhiwei visited Ningbo Haishu Foreign Language School and delivered a lively and profound science lecture to overseas students. Academician Zhu Zhiwei is currently the Dean of Ningbo University's Ocean Engineering Research Institute...) This reiterates his current position.

"""

json_worker = JSONWorker()
extract1 = json.dumps(json_worker.extract(json_test_input), indent=2, ensure_ascii=False)
print (extract1)

extract2 = json.dumps(json_worker.extract(json_test_input), indent=2, ensure_ascii=False)
json_list = [extract1, extract2]

consolidated = json.dumps(json_worker.consolidate(json_list), indent=2, ensure_ascii=False)
print (consolidated)



{
  "I_am_sure_that_I_found_the_researcher": true,
  "name_in_Chinese": "朱志伟",
  "affiliations": [
    {
      "name_in_Chinese": "宁波大学海洋工程研究院",
      "quote": "加拿大工程院院士朱志伟全职加盟宁波大学，成为该校海洋工程研究院院长。 [5]",
      "title": "院长",
      "email": "zhuzhiwei@nbu.edu.cn",
      "phone": "123-456-7890"
    },
    {
      "name_in_Chinese": "宁波大学",
      "quote": "Dr. David Zhu is a Professor at Ningbo University in China. [3]",
      "title": "教授",
      "email": "zhuzhiwei@nbu.edu.cn",
      "phone": "123-456-7890"
    }
  ]
}
[
  {
    "I_am_sure_that_I_found_the_researcher": true,
    "name_in_Chinese": "张三",
    "research_keywords": [
      "机器学习",
      "自然语言处理"
    ],
    "affiliations": [
      {
        "name_in_Chinese": "清华大学",
        "quote": "教授",
        "title": "教授",
        "email": "zhangsan@tsinghua.edu.cn"
      }
    ]
  },
  {
    "I_am_sure_that_I_found_the_researcher": false,
    "name_in_Chinese": "张三",
    "research_keywords": [
      "机器学习"
    ],
    "affiliations": [
 

In [None]:
# Main worker

"""Steps
1. Load queries from a CSV file. 
2. Send queries to InfoGatherer (TODO: Add romanization hint to main prompt) and Extract the JSON string from the response. Then check whether the name extracted makes sense. If so, buffer and save. 
3. Run steps above for 3 times for each query. 
"""