In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point

In [None]:
pd.set_option('mode.chained_assignment', None)
pd.options.display.float_format = '{:.2f}'.format
pd.options.display.max_columns = None
pd.options.display.max_rows = None

# Ignore Warnings
import warnings
warnings.simplefilter('ignore', category=FutureWarning)
warnings.simplefilter('ignore', category=UserWarning)

In [None]:
wdi = pd.read_csv("/work/data/World_Bank/wdi_1980_to_2020.csv")

In [None]:
gcb = pd.read_parquet("/work/data/Global_Coral_Bleaching_DB/gcb_v4.parquet")

In [None]:
fert = pd.read_csv("/work/data/United_Nations/FAOSTAT/Fertilizers_by_Nutrients/Inputs_FertilizersNutrient_E_All_Data_(Normalized).csv", encoding = "ISO-8859-1", low_memory=False)

In [None]:
country_code_df = pd.read_csv("/work/data/References/country_name_codes.csv", low_memory=False)

In [None]:
# Filter the WDI dataset on the following WDI Codes
# Fishing, Timber, Forest, Marine, Agriculture, Fertilizer, Tourism, and Pop Totals
filter_codes = [
    'AG.LND.IRIG.AG.ZS','AG.LND.AGRI.ZS',
    'AG.LND.AGRI.K2','NV.AGR.TOTL.ZS','NV.AGR.TOTL.KD.ZG','NV.AGR.TOTL.CD',
    'ER.FSH.AQUA.MT','ER.FSH.CAPT.MT','AG.CON.FERT.ZS',
    'EN.FSH.THRD.NO','AG.LND.FRST.ZS','AG.LND.FRST.K2',
    'ST.INT.ARVL','ER.MRN.PTMR.ZS','EN.POP.DNST',
    'SP.POP.TOTL','ER.PTD.TOTL.ZS','ER.FSH.PROD.MT']
df = wdi[wdi['Series_Code'].isin(filter_codes)]

In [None]:
columns_years = [str(year) for year in range(1980, 2021)]
missing_all_data_countries = df[columns_years].isna().all(axis=1)
# Filter out these countries
df_with_some_data = df[~missing_all_data_countries]

In [None]:
df[missing_all_data_countries]

Unnamed: 0,Country_Name,Country_Code,Series_Name,Series_Code,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
661,Afghanistan,AFG,"International tourism, number of arrivals",ST.INT.ARVL,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
748,Afghanistan,AFG,Marine protected areas (% of territorial waters),ER.MRN.PTMR.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4517,American Samoa,ASM,Agricultural irrigated land (% of total agricu...,AG.LND.IRIG.AG.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4528,American Samoa,ASM,"Agriculture, forestry, and fishing, value adde...",NV.AGR.TOTL.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4529,American Samoa,ASM,"Agriculture, forestry, and fishing, value adde...",NV.AGR.TOTL.KD.ZG,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4533,American Samoa,ASM,"Agriculture, forestry, and fishing, value adde...",NV.AGR.TOTL.CD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6003,Andorra,AND,Agricultural irrigated land (% of total agricu...,AG.LND.IRIG.AG.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6034,Andorra,AND,Aquaculture production (metric tons),ER.FSH.AQUA.MT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6374,Andorra,AND,Fertilizer consumption (kilograms per hectare ...,AG.CON.FERT.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6692,Andorra,AND,Marine protected areas (% of territorial waters),ER.MRN.PTMR.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
df.loc[(df[columns_years].isna().all(axis=1)) & (df['Country_Code'].isin(gcb.Country_Code.unique()))]

Unnamed: 0,Country_Name,Country_Code,Series_Name,Series_Code,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
8975,Antigua and Barbuda,ATG,Agricultural irrigated land (% of total agricu...,AG.LND.IRIG.AG.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
19377,"Bahamas, The",BHS,Agricultural irrigated land (% of total agricu...,AG.LND.IRIG.AG.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
23835,Barbados,BRB,Agricultural irrigated land (% of total agricu...,AG.LND.IRIG.AG.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
28293,Belize,BLZ,Agricultural irrigated land (% of total agricu...,AG.LND.IRIG.AG.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
31265,Bermuda,BMU,Agricultural irrigated land (% of total agricu...,AG.LND.IRIG.AG.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
31296,Bermuda,BMU,Aquaculture production (metric tons),ER.FSH.AQUA.MT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
32592,Bermuda,BMU,Total fisheries production (metric tons),ER.FSH.PROD.MT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
49097,Cambodia,KHM,Agricultural irrigated land (% of total agricu...,AG.LND.IRIG.AG.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
53555,Cayman Islands,CYM,Agricultural irrigated land (% of total agricu...,AG.LND.IRIG.AG.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
60985,China,CHN,Agricultural irrigated land (% of total agricu...,AG.LND.IRIG.AG.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
len(gcb.Country_Code.sort_values().unique())

85

In [None]:
gcb.groupby(['Country_Name','Date_Year'])['AG.LND.IRIG.AG.ZS'].mean()

Country_Name                                  Date_Year
Antigua and Barbuda                           2003.00       NaN
                                              2004.00       NaN
Australia                                     1992.00       NaN
                                              1997.00       NaN
                                              1998.00       NaN
                                              1999.00       NaN
                                              2002.00      0.57
                                              2003.00      0.54
                                              2004.00      0.61
                                              2005.00      0.59
                                              2006.00      0.62
                                              2007.00      0.48
                                              2008.00      0.48
                                              2009.00      0.46
                                              20

In [None]:
df[(df['Series_Code'] == 'AG.LND.IRIG.AG.ZS') & (df['Country_Name'] == 'Malaysia')][columns_years].mean(axis=1)

178379   5.24
dtype: float64

In [None]:
# Missing AG.CON.FERT.PT.ZS values
map_fert_pct = {
               'ATG': 7.69, 'BHS': 8.33, 'BRB': 39.00, 'BLZ': 3.48, 'BMU': 0, 
               'KHM': 9.17, 'CYM': 7.69, 'CHN': 51.48, 'COM': 0.13, 'CRI': 18.14,
               'CUB': 15.68, 'DJI': 100, 'DMA': 0.87, 'EGY': 100, 'ERI': 5.49,
               'FJI': 1.59, 'PYF': 3.88, 'IDN': 41.54, 'KEN': 2.34, 'KIR': 0,
               'KWT': 67.6, 'MDV': 0, 'MHL': 0, 'FSM': 0, 'NCL': 0, 'NIC': 10.89,
               'MNP': 0, 'PLW': 0, 'PNG': 0, 'WSM': 0, 'STP': 9.7, 'SGP': 0,
               'SLB': 0.04, 'LKA': 29.23, 'KNA': 0.49, 'LCA': 32.26,
               'VCT': 5.98, 'TZA': 2.32, 'THA': 33.76, 'TON': 0, 'TCA': 0, 
               'TUV': 0, 'VUT': 0, 'VNM': 48.67, 'YEM': 41.8
               }

In [None]:
fert.sample(10)

Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag,Note
131940,174,'620,Portugal,3103,Nutrient phosphate P2O5 (total),5157,Agricultural Use,1988,1988,t,89100.0,A,Official data from questionnaire
180137,251,'894,Zambia,3104,Nutrient potash K2O (total),5172,Use per capita,2020,2020,kg/cap,2.83,E,
84086,106,'380,Italy,3102,Nutrient nitrogen N (total),5173,Use per value of agricultural production,1980,1980,kg/1000 Int.$,21.49,E,
178735,248,'890,Yugoslav SFR,3102,Nutrient nitrogen N (total),5172,Use per capita,1982,1982,kg/cap,22.97,E,
168157,225,'784,United Arab Emirates,3102,Nutrient nitrogen N (total),5610,Import Quantity,2007,2007,t,12979.87,E,
213677,5504,'061,Polynesia,3104,Nutrient potash K2O (total),5173,Use per value of agricultural production,1995,1995,kg/1000 Int.$,1.52,E,
119809,159,'566,Nigeria,3104,Nutrient potash K2O (total),5173,Use per value of agricultural production,2017,2017,kg/1000 Int.$,2.31,E,
5053,9,'032,Argentina,3102,Nutrient nitrogen N (total),5159,Use per area of cropland,1969,1969,kg/ha,1.41,E,
163749,222,'788,Tunisia,3103,Nutrient phosphate P2O5 (total),5910,Export Quantity,1996,1996,t,705000.0,X,
36259,44,'170,Colombia,3102,Nutrient nitrogen N (total),5173,Use per value of agricultural production,2009,2009,kg/1000 Int.$,19.35,E,


In [None]:
# Add a country code variable for easy joining with other datasets
fert_with_codes = pd.merge(
    fert, country_code_df[['Country_Name','Country_Code']], 
    how="left", left_on="Area", right_on="Country_Name")

In [None]:
fert_with_codes[fert_with_codes['Country_Code'].isna()].sample(10)

Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag,Note,Country_Name,Country_Code
218288,5803,'722,Small Island Developing States,3104,Nutrient potash K2O (total),5910,Export Quantity,2010,2010,t,46124.01,E,,,
181397,5000,'001,World,3102,Nutrient nitrogen N (total),5610,Import Quantity,1981,1981,t,12258392.0,E,,,
200244,5304,'035,South-eastern Asia,3102,Nutrient nitrogen N (total),5610,Import Quantity,1982,1982,t,1064454.0,E,,,
187748,5104,'018,Southern Africa,3103,Nutrient phosphate P2O5 (total),5510,Production,2019,2019,t,186200.0,E,,,
165321,223,'792,Türkiye,3104,Nutrient potash K2O (total),5157,Agricultural Use,1996,1996,t,73500.0,X,,,
198828,5302,'030,Eastern Asia,3104,Nutrient potash K2O (total),5159,Use per area of cropland,1971,1971,kg/ha,6.25,E,,,
196951,5300,'142,Asia,3104,Nutrient potash K2O (total),5159,Use per area of cropland,1964,1964,kg/ha,1.65,E,,,
207928,5404,'155,Western Europe,3102,Nutrient nitrogen N (total),5157,Agricultural Use,1965,1965,t,2739119.0,A,,,
193800,5206,'029,Caribbean,3102,Nutrient nitrogen N (total),5173,Use per value of agricultural production,1988,1988,kg/1000 Int.$,29.78,E,,,
195431,5207,'005,South America,3104,Nutrient potash K2O (total),5510,Production,1969,1969,t,17424.0,A,,,


In [None]:
 fert[(fert['Element'] == 'Agricultural Use') & (fert['Year'] >= 1980) & (fert['Area'].isin(gcb.Country_Name.unique()))]['Area'].unique()

array(['Antigua and Barbuda', 'Australia', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belize', 'Bermuda', 'Brazil',
       'Cambodia', 'Cayman Islands', 'Chile', 'China', 'Colombia',
       'Costa Rica', 'Cuba', 'Djibouti', 'Dominica', 'Dominican Republic',
       'Ecuador', 'Egypt', 'Eritrea', 'Fiji', 'France',
       'French Polynesia', 'Grenada', 'Guatemala', 'Haiti', 'Honduras',
       'India', 'Indonesia', 'Israel', 'Jamaica', 'Japan', 'Jordan',
       'Kenya', 'Kiribati', 'Kuwait', 'Madagascar', 'Malaysia',
       'Maldives', 'Mauritius', 'Mexico', 'Mozambique', 'Myanmar',
       'New Caledonia', 'Nicaragua', 'Oman', 'Palau', 'Panama',
       'Papua New Guinea', 'Philippines', 'Saint Kitts and Nevis',
       'Saint Lucia', 'Saint Vincent and the Grenadines', 'Samoa',
       'Saudi Arabia', 'Seychelles', 'Singapore', 'Solomon Islands',
       'South Africa', 'Sri Lanka', 'Sudan', 'Thailand', 'Tonga',
       'Trinidad and Tobago', 'Tuvalu', 'United Arab Emirates', 'Van

In [None]:
gcb.Country_Name.sort_values().unique()

array(['Antigua and Barbuda', 'Australia', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belize', 'Bermuda', 'Brazil', 'Brunei',
       'Cambodia', 'Cayman Islands', 'Chile', 'China', 'Colombia',
       'Commonwealth of the Northern Mariana Islands', 'Comoro Islands',
       'Costa Rica', 'Cuba', 'Djibouti', 'Dominica', 'Dominican Republic',
       'East Timor', 'Ecuador', 'Egypt', 'Eritrea',
       'Federated States of Micronesia', 'Fiji', 'France',
       'French Polynesia', 'Grenada', 'Guatemala', 'Haiti', 'Honduras',
       'India', 'Indonesia', 'Iran', 'Israel', 'Jamaica', 'Japan',
       'Jordan', 'Kenya', 'Kiribati', 'Kuwait', 'Madagascar', 'Malaysia',
       'Maldives', 'Marshall Islands', 'Mauritius', 'Mexico',
       'Mozambique', 'Myanmar', 'Netherlands Antilles', 'New Caledonia',
       'Nicaragua', 'Oman', 'Palau', 'Panama', 'Papua New Guinea',
       'Philippines', 'Saint Kitts and Nevis', 'Saint Lucia',
       'Saint Vincent and the Grenadines', 'Samoa', 'Sao T

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=6a5ab9c4-f125-4a61-a0b4-af0fc5325c87' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>