## Set Up: Jupyter Notebook

In [3]:
# Import the required libraries
import requests
import pandas as pd

# Run in Terminal:
pip install ingester3

SyntaxError: invalid syntax (1325460312.py, line 6)

# Download: Geonames File
- Create function "download_geonames()".
- Download the geonames file provided in the API structure document.
- Turn the json file into a Pandas DataFrame "geonames" for future use.

In [None]:
# Function downloading the geonames data from the World Bank Climate Knowledge Portal and turning it into a pandas DataFrame
def download_geonames():
         
    # Define the URL for the JSON file
    url_geonames = "https://climateknowledgeportal.worldbank.org/themes/cckp/data/geonames.json"
    # Fetch the JSON data using a GET request
    response_geonames = requests.get(url_geonames)

    # Parse the JSON response into a Python dictionary, if the request was successful
    if response_geonames.status_code == 200:
        geonames_json = response_geonames.json()
    else:
        print(f"Failed to fetch data. Status code: {response.status_code}")
        geonames_json = None

    # Flatten the nested structure into a list of dictionaries
    flat_geonames_data_json = [
        {
            "Country Code": country_code,
            "Country Name": details["N"],
            "Subnational Code": subnational_code,
            "Subnational Name": subnational_name
        }
        for country_code, details in geonames_json["country"].items()
        for subnational_code, subnational_name in details["S"].items()
    ]
    
    # Convert the list into a pandas DataFrame
    geonames_df = pd.DataFrame(flat_geonames_data_json)

    # Return the DataFrame for further use
    return geonames_df

# Execute the function and fetch the geonames data
geonames_df = download_geonames()

## Download: Climate Data from CRU and ERA5 data sats
- Functions with flexible URL. Function can call different variables. Insert variable name as function argument.
    - Function for CRU data: download_cru_data
    - Function for ERA5 data: download_era5_data
- Data Specifications on WorldBank Climate Data Platform:
    - CRU data set > timeseries > monthly > 1901-2022 > mean > model "ts4.07"
    - ERA5 data set > timeseries > monthly > 1950-2023 > mean > model "era5" > model label "x0.25"

#### Define Function: Download CRU data

In [3]:
# The API Structure:
# https://cckpapi.worldbank.org/cckp/v1/cru-x0.5_timeseries_{variable_name}_timeseries_monthly_1901-2022_mean_historical_cru_ts4.07_mean/all_countries?_format=json

# Function downloading the climate data from the World Bank Climate Knowledge Portal and turning it into a pandas DataFrame
def download_cru_data(cru_variable):
        
        # Define the URL for the JSON file
        url_climate_data = f"https://cckpapi.worldbank.org/cckp/v1/cru-x0.5_timeseries_{cru_variable}_timeseries_monthly_1901-2022_mean_historical_cru_ts4.07_mean/all_countries?_format=json"
        # Fetch the JSON data using a GET request
        response_climate_data = requests.get(url_climate_data)
    
        # Parse the JSON response into a Python dictionary, if the request was successful
        if response_climate_data.status_code == 200:
            climate_data_json = response_climate_data.json()
        else:
            print(f"Failed to fetch data.")
            climate_data_json = None
    
        # Flatten the nested structure into a list of dictionaries
        climate_data_json_only_data = climate_data_json["data"]

        rows = []

        for country, timeseries in climate_data_json_only_data.items():
            for date, value in timeseries.items():
                rows.append({"Country": country, "Date": date, "Value": value})
    
        # Convert the list into a pandas DataFrame
        climate_data_df = pd.DataFrame(rows)
    
        # Return the DataFrame for further use
        return climate_data_df


#### Download: CRU variables that should definitely be adopted.

In [4]:
# tas // Average Mean Surface Air Temperature // G: Only relative temperature variables
tas_cru_df = download_cru_data("tas")

# tasmax // Average Maximum Surface Air Temperature // G: Only relative temperature variables
tasmax_cru_df = download_cru_data("tasmax")

# tasmin // Average Minimum Surface Air Temperature // G: Only relative temperature variables
tasmin_cru_df = download_cru_data("tasmin")


#### Define Function: Download ERA5 data

In [5]:
# The API Structure:
# https://cckpapi.worldbank.org/cckp/v1/era5-x0.25_timeseries_{variable_name}_timeseries_monthly_1950-2023_mean_historical_era5_x0.25_mean/all_countries?_format=json

# Function downloading the climate data from the World Bank Climate Knowledge Portal and turning it into a pandas DataFrame
def download_era5_data(era5_variable):
        
        # Define the URL for the JSON file
        url_climate_data = f"https://cckpapi.worldbank.org/cckp/v1/era5-x0.25_timeseries_{era5_variable}_timeseries_monthly_1950-2023_mean_historical_era5_x0.25_mean/all_countries?_format=json"
        # Fetch the JSON data using a GET request
        response_climate_data = requests.get(url_climate_data)
    
        # Parse the JSON response into a Python dictionary, if the request was successful
        if response_climate_data.status_code == 200:
            climate_data_json = response_climate_data.json()
        else:
            print(f"Failed to fetch data.")
            climate_data_json = None
    
        # Flatten the nested structure into a list of dictionaries
        climate_data_json_only_data = climate_data_json["data"]

        rows = []

        for country, timeseries in climate_data_json_only_data.items():
            for date, value in timeseries.items():
                rows.append({"Country": country, "Date": date, "Value": value})
    
        # Convert the list into a pandas DataFrame
        climate_data_df = pd.DataFrame(rows)
    
        # Return the DataFrame for further use
        return climate_data_df

#### Download: ERA5 variables that should definitely be adopted.

In [6]:
# cdd65 // Cooling Degree Days (ref-65°F)
cdd65_era5_df = download_era5_data("cdd65")

# hd35 // Number of Hot Days (Tmax > 35°C)
hd35_era5_df = download_era5_data("hd35")

# hd40 // Number of Hot Days (Tmax > 40°C)
hd40_era5_df = download_era5_data("hd40")

# hd42 // Number of Hot Days (Tmax > 42°C)
hd42_era5_df = download_era5_data("hd42")

# hdd65 // Heating degree days (ref-65°F)
hdd65_era5_df = download_era5_data("hdd65")

# hi35 // Number of Days with Heat Index > 35°C
hi35_era5_df = download_era5_data("hi35")

# hi37 // Number of Days with Heat Index > 37°C
hi37_era5_df = download_era5_data("hi37")

# hurs // Relative Humidity
hurs_era5_df = download_era5_data("hurs")

# prpercnt // Precipitation Percent Change
prpercnt_era5_df = download_era5_data("prpercnt")

# rx1day // Average Largest 1-Day Precipitation
rx1day_era5_df = download_era5_data("rx1day")

# rx5day // Average Largest 5-Day Cumulative Precipitation
rx5day_era5_df = download_era5_data("rx5day")

# tnn // Minimum of Daily Min-Temperature
tnn_era5_df = download_era5_data("tnn")

# txx // Maximum of Daily Max-Temperature
txx_era5_df = download_era5_data("txx")


#### Download: ERA5 variables that should probably be adopted.

In [8]:
# pr // Precipitation // G: "Total wet day precipitation" // similar, not identical to existing "pr" variable // This variable is average precipitation over a given time // Potentially also includes smaller variation of rain below 1mm
pr_era5_df = download_era5_data("pr")

# hd30 // Number of Hot Days (Tmax > 30°C) //  G: - // Overlaping with existing hd35, hd40, hd42
hd30_era5_df = download_era5_data("hd30")

# hd50 // Number of Hot Days (Tmax > 50°C) // G: - // Only daily maximum temperature per month/year.
hd50_era5_df = download_era5_data("hd50")

# hi39 // Number of Days with Heat Index > 39°C // Did not include variable that was available
hi39_era5_df = download_era5_data("hi39")

# hi41 // Number of Days with Heat Index > 41°C // G: - // Did not include variable that was available
hi41_era5_df = download_era5_data("hi41")

# r50mm // Number of Days with Precipitation >50mm // G: - // Only "Very heavy precipitation days" = days with more than 20mm
r50mm_era5_df = download_era5_data("r50mm")

# r95ptot // Precipitation amount during wettest days //  G: - // Did not include identical variable "Very wet day precipitation"
r95ptot_era5_df = download_era5_data("r95ptot")

# tr23 // Number of Tropical Nights (T-min > 23°C) // [Very similar to "tr"]
tr23_era5_df = download_era5_data("tr23")

# tr26 // Number of Tropical Nights (T-min > 26°C) // [Very similar to "tr"]
tr26_era5_df = download_era5_data("tr26")

# tr29 // Number of Tropical Nights (T-min > 29°C) // [Very similar to "tr"]
tr29_era5_df = download_era5_data("tr29")

# tr32 // Number of Tropical Nights (T-min > 32°C) // [Very similar to "tr"]
tr23_era5_df = download_era5_data("tr32")


#### Download: ERA5 variables that should probably NOT be adopted.

In [9]:
# fd // Number of Frost Days (Tmin < 0°C) // G: Almost identical variable // G: count variable // Here: Average over time, i.e. data period. Smoothed-out, long-term perspective of Frost Days; Less interesting for forecasting.
fd_era5_df = download_era5_data("fd")

# id // Number of Ice Days (Tmax < 0°C) // G: Almost identical variable // G: count variable // Here: Average over time, i.e. data period. Smoothed-out, long-term perspective of Ice Days; Less interesting for forecasting.
id_era5_df = download_era5_data("id")


## Data Wrangling: Bringing data sets with climate variables into suitable VIEWS format