# Download: Geonames
- Create function "fetch_geonames()".
- Download the geonames file provided in the API structure document.
- Turn the json file into a Pandas DataFrame "geonames" for future use.

In [None]:
# Import the required libraries
import requests
import pandas as pd
import json



# Function downloading the geonames data from the World Bank Climate Knowledge Portal and turning it into a pandas DataFrame
def download_geonames():
         
    # Define the URL for the JSON file
    url_geonames = "https://climateknowledgeportal.worldbank.org/themes/cckp/data/geonames.json"
    # Fetch the JSON data using a GET request
    response_geonames = requests.get(url_geonames)

    # Parse the JSON response into a Python dictionary, if the request was successful
    if response_geonames.status_code == 200:
        geonames_json = response_geonames.json()
    else:
        print(f"Failed to fetch data. Status code: {response.status_code}")
        geonames_json = None

    # Flatten the nested structure into a list of dictionaries
    flat_geonames_data_json = [
        {
            "Country Code": country_code,
            "Country Name": details["N"],
            "Subnational Code": subnational_code,
            "Subnational Name": subnational_name
        }
        for country_code, details in geonames_json["country"].items()
        for subnational_code, subnational_name in details["S"].items()
    ]
    
    # Convert the list into a pandas DataFrame
    geonames_df = pd.DataFrame(flat_geonames_data_json)

    # Return the DataFrame for further use
    return geonames_df

# Execute the function and fetch the geonames data
geonames_df = download_geonames()

## Download: Climate Data, CRU Data Set
- Function with flexible URL. Function can call different variables. Insert variable name as function argument.
- Data Specifications on WorldBank Platform: CRU data set > timeseries > monthly > 1901-2022 > mean > model "ts4.07"


In [None]:
# The API Structure:
# https://cckpapi.worldbank.org/cckp/v1/cru-x0.5_timeseries_{variable_name}_timeseries_monthly_1901-2022_mean_historical_cru_ts4.07_mean/all_countries?_format=json




# Function downloading the climate data from the World Bank Climate Knowledge Portal and turning it into a pandas DataFrame
def download_cru_data(cru_variable):
        
        # Define the URL for the JSON file
        url_climate_data = f"https://cckpapi.worldbank.org/cckp/v1/cru-x0.5_timeseries_{cru_variable}_timeseries_monthly_1901-2022_mean_historical_cru_ts4.07_mean/all_countries?_format=json"
        # Fetch the JSON data using a GET request
        response_climate_data = requests.get(url_climate_data)
    
        # Parse the JSON response into a Python dictionary, if the request was successful
        if response_climate_data.status_code == 200:
            climate_data_json = response_climate_data.json()
        else:
            print(f"Failed to fetch data. Status code: ç")
            climate_data_json = None
    
        # Flatten the nested structure into a list of dictionaries
        climate_data_json_clean = climate_data_json["data"]

        rows = []

        for country, timeseries in climate_data_json_clean.items():
            for date, value in timeseries.items():
                rows.append({"Country": country, "Date": date, "Value": value})
    
        # Convert the list into a pandas DataFrame
        climate_data_df = pd.DataFrame(rows)
    
        # Return the DataFrame for further use
        return climate_data_df

# Execute the function and fetch the climate data for the three variables
tas_df = download_cru_data("tas")
tasmax_df = download_cru_data("tasmax")
tasmin_df = download_cru_data("tasmin")


## [To be deleted - Trying out alternative code]

In [None]:
# # I think our assumptions about the structure of the json file is inaccurate. Let's print the json file to see what it looks like.

# # Let's download one of the json files and print it to see what it looks like.

# # This is the structure of the json file:

# #  {
# #   "metadata": {
# #     "apiVersion": "v1",
# #     "status": "success",
# #     "messages": []
# #   },
# #   "data": {
# #     "ABW": {
# #       "1901-01": 27.2,
# #       "1901-02": 27.1,
# #       "1901-03": 27.6,
# #       "1901-04": 28.6,
# #       "1901-05": 29.2,
# #       "1901-06": 28.7,
# #       "1901-07": 29.1,
# #       "1901-08": 29.7,
# #       "1901-09": 29.5,
# #       "1901-10": 28.5,
# #       "1901-11": 27.1,
# #       "1901-12": 26.3,
# #       "1902-01": 26,

          
# # Define the URL for the JSON file
# url_climate_data = f"https://cckpapi.worldbank.org/cckp/v1/cru-x0.5_timeseries_tas_timeseries_monthly_1901-2022_mean_historical_cru_ts4.07_mean/all_countries?_format=json"
# # Fetch the JSON data using a GET request
# response_climate_data = requests.get(url_climate_data)

# # Parse the JSON response into a Python dictionary, if the request was successful
# climate_data_json = response_climate_data.json()

# # Try 3 !!!!!! IT WORKS !!!!
# climate_data_json_clean = climate_data_json["data"]

# rows = []
# for country, timeseries in climate_data_json_clean.items():
#     for date, value in timeseries.items():
#         rows.append({"Country": country, "Date": date, "Value": value})

# climate_data_df3 = pd.DataFrame(rows)

## [ To be deleted - For comparison: Geonames JSON Files]

In [None]:
# # Define the URL for the JSON file
# url_geonames = "https://climateknowledgeportal.worldbank.org/themes/cckp/data/geonames.json"
# # Fetch the JSON data using a GET request
# response_geonames = requests.get(url_geonames)

# # Parse the JSON response into a Python dictionary, if the request was successful
# if response_geonames.status_code == 200:
#     geonames_json = response_geonames.json()
# else:
#     print(f"Failed to fetch data. Status code: {response.status_code}")
#     geonames_json = None

# # # Flatten the nested structure into a list of dictionaries
# # flat_geonames_data_json = [
# #     {
# #         "Country Code": country_code,
# #         "Country Name": details["N"],
# #         "Subnational Code": subnational_code,
# #         "Subnational Name": subnational_name
# #     }
# #     for country_code, details in geonames_json["country"].items()
# #     for subnational_code, subnational_name in details["S"].items()
# # ]
# #Explain the code above:
# #1. Loop through the country codes and details in the geonames_json dictionary

# # Convert the list into a pandas DataFrame
# # geonames_df = pd.DataFrame(flat_geonames_data_json)