In [None]:
# import required libraries
import requests        # to make HTTP requests to the GeoNames API
import time            # to pause between API requests to avoid rate limiting

# specify your GeoNames username (required for authentication)
geonames_username = "zainab128"

# get coordinates of a place using GeoNames API
def get_coordinates(place, username=geonames_username, fuzzy=0, timeout=1):

  # pause before making a request to avoid overloading the API server
  time.sleep(timeout)

  # define the base URL for the GeoNames API
  url = "http://api.geonames.org/searchJSON?"

  # define parameters for the API request
  params = {
      "q": place,
      "username": username,
      "fuzzy": fuzzy,
      "maxRows": 1,
      "isNameRequired": True
  }

  # make the API request
  response = requests.get(url, params=params)

  # convert the response into JSON (dictionary)
  results = response.json()
  print(results)  # print the response for debugging

  # try to extract the first result's coordinates
  try:
    result = results["geonames"][0]  # access the first match
    return {"latitude": result["lat"], "longitude": result["lng"]}  # return lat/lng as a dictionary
  except (IndexError, KeyError):  # handle cases where no match is found
    print("No results found for your API call", response.request.url)  # show the failed URL

# import CSV module to read/write TSV files
import csv

# define the output file where the gazetteer (place names + coordinates) will be saved
filename = "ner_gazetteer.tsv"

# open the output file in write mode with UTF-8 encoding
with open(filename, mode="w", encoding="utf-8") as file:

  # write the header row (column names)
  header = "place\tlatitude\tlongitude\n"
  file.write(header)

  # open the input file containing place names and their counts
  with open("ner_counts.tsv", encoding="utf-8") as infile:
    # create a CSV reader that uses tab as a delimiter
    reader = csv.DictReader(infile, delimiter='\t')

    # iterate through each row in the input TSV
    for row in reader:
      place_name = row['place']  # extract the place name

      # call the get_coordinates function to get lat/lon
      coordinates = get_coordinates(place_name)

      # if coordinates are successfully found
      if coordinates:
        latitude = coordinates["latitude"]     # extract latitude
        longitude = coordinates["longitude"]   # extract longitude
        # format the row for output
        row_data = f"{place_name}\t{latitude}\t{longitude}\n"
        file.write(row_data)  # write to output file
      else:
        # if coordinates were not found, mark as "NA"
        row_data = f"{place_name}\tNA\tNA\n"
        file.write(row_data)  # write to output file
