In [60]:
import dotenv
import os
dotenv.load_dotenv()
import json

from pydantic import BaseModel
from typing import List, Dict, Tuple, Set

class Affiliation(BaseModel):
    name: str
    title: str

class Response(BaseModel):
    name_in_Chinese: str
    affiliations: List[Affiliation]

KNown limitations: 
1. Tool use with JSON output is not supported.  `ClientError: 400 INVALID_ARGUMENT. {'error': {'code': 400, 'message': "Tool use with a response mime type: 'application/json' is unsupported", 'status': 'INVALID_ARGUMENT'}}
`

In [None]:
Please return your output in a short bulleted list in the following format:
* Chinese name
* Affiliation in China 1
* Affiliation in China 2
* Affiliation in China 3

## One-prompt approach -- let the agent do the work behind the scene

In [79]:
prompt_template = """
Below is the name, affiliation, contact info of a researcher in English. The researcher likely has an affiliation in China.
Your job is to find the name, affiliation, and contact info of the same researcher in China.

Please first come up with a strategy. Then, iteratively generate web search queries to find the researcher in the Chinese-speaking world. After each search, you should refine or expand the search queries based on the results, including but not limited to extracting all names and affiliations from returned web pages and adding them to the search queries. You can search in both English and Chinese resources but the evidence to support the Chinese name and affiliation of the searcher must be in Chinese. Stop when you are confident that you have found the researcher in the Chinese-speaking world or no new results can be found in the latest 3 searches.

Note that some researchers may work for different institutions in the English-speaking world (e.g. University of ABC in United States) and the Chinese-speaking world (e.g. University of XYZ in China). Also note many researchers use different names, especialy first/given names, in different contextsl, e.g., a Western name in the English-speaking world and a Chinese name in the Chinese-speaking world.

Output in a JSON format that includes the name in Chinese and a list of affiliations and corresponding titles of the researcher in China like this:
{{
  "name_in_Chinese": "张三",
  "affiliations": [
    {{"name": "清华大学", "title": "教授"}}, 
    {{"name": "北京大学", "title": "副教授"}}
  ]
}}

Do NOT tell me your thoughts, reasoning, or steps. Just return the output.

Here are some ideas for you to get started:
{ideas}

Here is the information of the researcher to research:
{researcher_info}
"""

ideas = """
* Translate the affiliation to Chinese and search for it. 
* Search using the email address or phone number in Chinese resources especially scientific journals. 
* Searching using the researcher's name is difficult because different Chinese names can be transliterated or Romanized to the same English name. Please try to come up some possible Chinese names, and then search for them. Doing so for the family/last name is easier than the first name. 
* Prioritize your search source to be Chinese-speaking websites.
* One way is to obtain all Chinese names of researchers matching the research topic and the affiliation, and then transliterate them to English to find the one that matches the English info of the researcher. 
* A researcher may move from one affiliation to another. But you can link two researchers if they share co-authors, especially when the co-authors are from the same institutions at the two occurences of the researcher. 
* Try to leaverage the co-author network/circle of the researcher. 
* A research may publish papers under multiple affiliations. Expand your search queries to include other affiliations appear in publications as long as they are related to the research topic. 
* From all pages you have found, extract all names and affiliations and add them to the search queries. 
"""


from google import genai
from google.genai import types
from google.genai.types import Tool, GenerateContentConfig, GoogleSearch

def call_gemini(prompt, model_id: str):
    client = genai.Client()

    google_search_tool = Tool(
        google_search = GoogleSearch()
    )

    response = client.models.generate_content(
        model=model_id,
        contents=prompt,
        config= {
           "tools": [google_search_tool],
           "response_modalities": ["TEXT"],
           "thinking_config": types.ThinkingConfig(thinking_budget=-1), 
           "system_instruction": "You are a cross-lingual customer discovery agent", 
          #  "candidateCount": 3, # BUG: Why does Gemini only return 1 candidate?
          #  "response_mime_type": "application/json", # Tool use with JSON output is not supported in Gemini
        }
    )
    return response

def calculate_cost(response, model_id: str):
  web_search_queries = response.candidates[0].grounding_metadata.web_search_queries
  
  pricing = { # per 1M tokens
    "gemini-2.5-flash": 
      {"input": 0.30, "output": 2.5, "cache": 0.075, "web_search": 0.035},
    "gemini-2.5-pro":
      {"input": 1.25, "output": 10, "cache": 0.31, "web_search": 0.035},
  }

  pricing_for_model = pricing[model_id]

  cost = 0
  cost += pricing_for_model["input"] * response.usage_metadata.prompt_token_count / 1000000
  cost += pricing_for_model["output"] * (response.usage_metadata.candidates_token_count + response.usage_metadata.thoughts_token_count) / 1000000
  num_cached_tokens = 0 if response.usage_metadata.cached_content_token_count is None else response.usage_metadata.cached_content_token_count
  cost += pricing_for_model["cache"] * num_cached_tokens / 1000000
  cost += pricing_for_model["web_search"] * len(web_search_queries) / 1000
  
  return cost

def one_prompt_approach(prompt_template: str, ideas: str, researcher_info: str, model_id: str):
  prompt = prompt_template.format(researcher_info=researcher_info, ideas=ideas)
  response = call_gemini(prompt, model_id)

  for candidate in response.candidates:
    print ("One candidate:")
    for each in candidate.content.parts:
        print(each.text)
    print ( "=================")

  cost = calculate_cost(response, model_id)

  the_other_model_id = {"gemini-2.5-flash": "gemini-2.5-pro", "gemini-2.5-pro": "gemini-2.5-flash"}[model_id]
  print (f"Cost of using {model_id}: ${cost:.6f}")
  print (f"The cost would have been ${calculate_cost(response, the_other_model_id):.6f} if using {the_other_model_id}")
  
  return response


# Try Gemini Flash
researcher_info = """Professor David Z. Zhu, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, AB, T6G 1H9, Canada"""
model_id = "gemini-2.5-flash"
response = one_prompt_approach(prompt_template, ideas, researcher_info, model_id)

One candidate:
The initial searches have been highly successful. Several results directly identify "David Z. Zhu" with a Chinese name and affiliations in China.

Here's what I found:

*   **Chinese Name:** 朱志伟 (Zhū Zhìwěi)
*   **Affiliations in China:**
    *   Ningbo University (宁波大学) - Professor, Dean of Ocean Engineering Research Institute (海洋工程研究院院长)
    *   Zhejiang University (浙江大学) - Professor, Permanent/Guest Professor (永谦讲座教授/Guest Professor), Academic Leader in Urban Water Environment Research Direction (市政工程学科城市水环境研究方向的学术带头人)
    *   Shanghai Jiaotong University (上海交通大学) (BSc and MSc education, also listed as alumni)

It seems Professor David Z. Zhu holds positions at both Ningbo University and Zhejiang University in China, in addition to his Professor Emeritus position at the University of Alberta.

The information is quite consistent across multiple sources. I have found the Chinese name, current affiliations, and titles. I believe I have sufficient information to answer t

In [80]:
# Try Gemini Pro
researcher_info = """Professor David Z. Zhu, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, AB, T6G 1H9, Canada"""
model_id = "gemini-2.5-pro"
response = one_prompt_approach(prompt_template, ideas, researcher_info, model_id)

One candidate:
{
  "name_in_Chinese": "朱志伟",
  "affiliations": [
    {
      "name": "宁波大学",
      "title": "教授"
    },
    {
      "name": "浙江大学",
      "title": "客座教授"
    }
  ]
}
Cost of using gemini-2.5-pro: $0.009633
Cost of using gemini-2.5-pro: $0.009633
The cost would have been $0.002661 if using gemini-2.5-flash
