# GFW climate biomass widgets

In [None]:
!pip install progressbar2

In [1]:
import geopandas as gpd
import numpy as np
from tqdm import tqdm
import requests
import json
from pprint import pprint
import progressbar
%matplotlib inline

In [None]:
#import pandas as pd
#import h5py

## Table with biomass density and total biomass

**GADM 3.6 admin 2**

In [3]:
df = gpd.read_file('/Users/Ben/Downloads/gadm36_shp/gadm36.shp')

In [4]:
df.head()

Unnamed: 0,UID,GID_0,ID_0,NAME_0,GID_1,ID_1,NAME_1,VARNAME_1,NL_NAME_1,HASC_1,...,GID_5,ID_5,NAME_5,CC_5,TYPE_5,ENGTYPE_5,REGION,VARREGION,zone,geometry
0,183022,AFG,1,Afghanistan,AFG.1_1,1,Badakhshan,Badahšan,,AF.BD,...,,0,,,,,,,4,"POLYGON ((71.18168640136724 36.49195861816412,..."
1,182550,AFG,1,Afghanistan,AFG.1_1,1,Badakhshan,Badahšan,,AF.BD,...,,0,,,,,,,4,"POLYGON ((71.33762359619141 38.11840820312511,..."
2,182994,AFG,1,Afghanistan,AFG.1_1,1,Badakhshan,Badahšan,,AF.BD,...,,0,,,,,,,4,"POLYGON ((70.09976196289062 37.00258255004883,..."
3,183088,AFG,1,Afghanistan,AFG.1_1,1,Badakhshan,Badahšan,,AF.BD,...,,0,,,,,,,4,"POLYGON ((71.3193359375 37.24847793579107, 71...."
4,183159,AFG,1,Afghanistan,AFG.1_1,1,Badakhshan,Badahšan,,AF.BD,...,,0,,,,,,,4,"POLYGON ((71.18168640136724 36.49195861816412,..."


In [None]:
#gadm_ids = df[['GID_0', 'ID_0', 'NAME_0', 'ID_1', 'NAME_1', 'ID_2', 'NAME_2','GID_1','GID_2']]

In [None]:
#gadm_ids[gadm_ids['GID_2'] == 'AFG.2.1_1']

In [None]:
#tmp = gadm_ids[gadm_ids['GID_0']=='BRA']

In [None]:
#tmp[tmp['GID_1'] == 'BRA.2_1'].head()

In [5]:
missing_df = df[df['GID_2'] == '']

In [6]:
f'{len(missing_df)/len(df) * 100:3.2f}% of rows are missing admin-2 id codes.'

'0.24% of rows are missing admin-2 id codes.'

In [7]:
def process_gid_2(gid_2):
    """Return dict of iso (string), and admin_1 and admin_2 (ints) from gid_2 entry."""
    try:
        iso, admin_1, tmp_admin_2 = gid_2.split('.')
        admin_2 = tmp_admin_2.split('_')[0]
        return {'iso':iso, 'admin_1':int(admin_1), 'admin_2':int(admin_2)}
    except:
        return None

In [112]:
all_areas = []
for x in df['GID_2'].values:
    tmp = process_gid_2(x)
    if tmp:
        all_areas.append(tmp)

In [113]:
all_areas[0:10]

[{'iso': 'AFG', 'admin_1': 1, 'admin_2': 1},
 {'iso': 'AFG', 'admin_1': 1, 'admin_2': 2},
 {'iso': 'AFG', 'admin_1': 1, 'admin_2': 3},
 {'iso': 'AFG', 'admin_1': 1, 'admin_2': 4},
 {'iso': 'AFG', 'admin_1': 1, 'admin_2': 5},
 {'iso': 'AFG', 'admin_1': 1, 'admin_2': 6},
 {'iso': 'AFG', 'admin_1': 1, 'admin_2': 7},
 {'iso': 'AFG', 'admin_1': 1, 'admin_2': 8},
 {'iso': 'AFG', 'admin_1': 1, 'admin_2': 9},
 {'iso': 'AFG', 'admin_1': 1, 'admin_2': 10}]

In [None]:
# now we have all the codes for all areas I am going to de-allocate the memory of the df to save RAM
df = 0

In [None]:
#df['NAME_2'].replace('', np.nan, inplace=True)

In [None]:
#df.dropna(subset=['NAME_2'], inplace=True)

In [None]:
#df.drop_duplicates(subset=['NAME_0', 'NAME_1', 'NAME_2'], keep='first', inplace=True)

The API contains an endpoint for `whrc-biomass` to compute the total biomass and biomass density of a given municipality.

In [114]:
def make_query(area):
    try:
        r = requests.get(f"https://production-api.globalforestwatch.org/v1/whrc-biomass/admin/{area['iso']}/{area['admin_1']}/{area['admin_2']}")
        if r.status_code == 200:
            return r.json().get('data').get('attributes')
        else:
            return None
    except:
        return None

In [115]:
with open("./tmp_whrc_data.json","a+") as f:
    with progressbar.ProgressBar(max_value=len(all_areas)) as bar:
        for n, area in enumerate(all_areas[0:5]):
            bar.update(n)
            # If no data has yet been sucsessfully retrieved for this area then attempt to get it
            if not area.get('data', None) or not area.get('biomassDensity', None):
                tmp_data = make_query(area)
                if tmp_data:
                    tmp_d = {**area, **tmp_data}
                    f.write(json.dumps(tmp_d) +'\n')
                    all_areas[n]['data'] = tmp_data

100% (338307 of 338307) |################| Elapsed Time: 0:00:25 Time:  0:00:25


In [119]:
# If you need to load/restore the data from a tmp file (due to failure etc) you can do the following...
with open("./tmp_whrc_data.json", 'r') as f:
    text = f.readlines()
        #d = json.loads(f)

In [123]:
reload_data = []
for row in text:
    reload_data.append(json.loads(row))
reload_data[0:3]

[{'iso': 'AFG',
  'admin_1': 1,
  'admin_2': 1,
  'areaHa': 300946.516215708,
  'biomassDensity': 0.0004634254670893975,
  'totalBiomass': 139.4662798461914},
 {'iso': 'AFG',
  'admin_1': 1,
  'admin_2': 2,
  'areaHa': 292525.2285638284,
  'biomassDensity': 0.01798740487590836,
  'totalBiomass': 5261.769722595215},
 {'iso': 'AFG',
  'admin_1': 1,
  'admin_2': 3,
  'areaHa': 294512.4936914639,
  'biomassDensity': 0.0008910649319352455,
  'totalBiomass': 262.4297551452637}]

In [145]:
print("🐸 Work needed here...💥")
# Missing some logic where you would loop back over the array and re-try failed entries
# You could compare the tmp file to see if there is an entry for it to the original series. If no data exists try it again...

🐸 Work needed here...💥


In [125]:
# Final table needs row names of 'biomassdensity','gid_0','id_1','id_2','totalbiomass','areaHa'. Use rename function below

In [127]:
import pandas as pd

In [133]:
output_df = pd.DataFrame(reload_data)#,columns=['id_1','id_2','areaHa','biomassdensity','iso','totalbiomass'])

In [135]:
output_df.keys()

Index(['admin_1', 'admin_2', 'areaHa', 'biomassDensity', 'iso',
       'totalBiomass'],
      dtype='object')

In [138]:
output_df = output_df.rename(index=str, columns={'admin_1':'id_1','admin_2':'id_2','biomassDensity':'biomassdensity','totalBiomass':'totalbiomass'})

In [139]:
output_df.head()

Unnamed: 0,id_1,id_2,areaHa,biomassdensity,iso,totalbiomass
0,1,1,300946.516216,0.000463,AFG,139.46628
1,1,2,292525.228564,0.017987,AFG,5261.769723
2,1,3,294512.493691,0.000891,AFG,262.429755
3,1,4,157239.687241,0.000204,AFG,32.149948
4,1,5,351695.388169,0.00085,AFG,299.115355


In [140]:
# Finally, save the file
output_df.to_csv('./whrc_biomass.csv')

In [None]:
# url = "https://production-api.globalforestwatch.org/v1/"

# nValues = len(df)

# with h5py.File('./data/biomass.hdf5', 'w') as f:
#     density = f.create_dataset("density", shape=(nValues,), dtype=np.float32)
#     total = f.create_dataset("total", shape=(nValues,), dtype=np.float32)
    
#     for i in tqdm(range(nValues)):
#         for attempt in range(4):
#             try:
#                 r = requests.get(url + f"whrc-biomass/admin/{df['GID_0'].iloc[i+325]}/{df['ID_1'].iloc[i+325]}/{df['ID_2'].iloc[i+325]}")
#                 density[i] = r.json().get('data').get('attributes').get('biomassDensity')
#                 total[i] = r.json().get('data').get('attributes').get('totalBiomass') 
#             except:
#                 if r.status_code != 404:
#                     continue
#             else:
#                 break

In [None]:
# with h5py.File('./data/biomass.hdf5', 'r') as f:
#     density = f['density'][:]
#     total = f['total'][:]
    
#     print(len(total))

In [None]:
#df['biomassDensity'] = density
#df['totalBiomass'] = total

Save table

In [None]:
#df.to_csv("./data/whrc-biomass.csv")