In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('https://storage.googleapis.com/mangrove_atlas/widget_data/Drivers_of_change_Mangroves.csv')
df.columns = df.columns.str.lower().str.replace(' ', '_')
df.rename(columns={'iso-3_code':'iso'}, inplace=True)
# ISO codes are incorrectly assigned in the original dataset. Do not use after this cleaning step.
df = df[df['iso'].notna()]
df = df[['country', 'erosion_pct','episodic_disturbances_pct','commodities_pct','npc_pct','settlement_pct', 'primary_driver']]
df.head()

Unnamed: 0,country,erosion_pct,episodic_disturbances_pct,commodities_pct,npc_pct,settlement_pct,primary_driver
0,Angola,0.864627,1.525797,0.0,71.370431,26.239145,NPC
1,Australia,20.055568,70.644338,0.013919,9.205866,0.080309,Episodic Disturbances
2,Bangladesh,79.299352,5.609774,4.654516,9.465341,0.971016,Erosion
3,Belize,6.969702,20.694848,20.0074,25.872602,26.455449,Settlement
4,Benin,9.016235,0.0,13.114575,54.096062,23.773128,NPC


In [3]:
df['primary_driver'].value_counts()

NPC                      26
Erosion                  22
Commodities              14
Episodic Disturbances     7
Settlement                2
Name: primary_driver, dtype: int64

In [4]:
#Check if there are any countries with null values
df[df['primary_driver'].isna()]

Unnamed: 0,country,erosion_pct,episodic_disturbances_pct,commodities_pct,npc_pct,settlement_pct,primary_driver


In [5]:
# Load locations polygons
locations_file = 'https://storage.googleapis.com/mangrove_atlas/boundaries/processed/location_final/locations_v3_not_merged_with_old.gpkg'
locations = gpd.read_file(locations_file)
locations = locations[locations['type'] == 'country']
locations.head()

Unnamed: 0,name,iso,type,area_m2,wdpaid,globalid,perimeter_m,location_idn,coast_length_m,geometry
82,Qatar,QAT,country,3.880224,,{AF97ABE2-6405-4438-A7ED-1494A43DA379},8.392644,06d2e6f9-bc89-59bf-a0e2-ab804e5db9fd,1345769.96,"MULTIPOLYGON (((50.73769 24.93464, 50.73779 24..."
89,Mayotte,MYT,country,5.611808,,{57E86B5B-7EF0-4754-A8D4-A9DC3212D421},10.086238,0750953f-4af9-549b-aeea-329663249a56,291036.71,"POLYGON ((46.63483 -12.96039, 46.63197 -12.969..."
118,Vietnam,VNM,country,90.156489,,{B2A84FBB-34CD-4A51-9463-B9DB2DB62A10},81.714911,09a1ab14-11ad-56ec-8acb-a149e5697abd,9005760.08,"MULTIPOLYGON (((104.31952 10.36051, 104.31975 ..."
132,Grenada,GRD,country,2.154728,,{F8753179-5FFA-4D9E-8AD9-083F31C48528},6.743601,0b0ecb56-bb8e-5ef1-b8ee-3cdad67fed0e,260664.47,"MULTIPOLYGON (((-61.91525 11.37330, -61.91813 ..."
149,India,IND,country,473.029671,,{A4A6CE4D-8D03-4246-9A2F-BD9811232115},211.564078,0c07ca53-7b17-5650-a2c6-0cc27249a4bd,16917891.22,"MULTIPOLYGON (((79.52922 9.38411, 79.52921 9.3..."


In [6]:
len(locations)

122

In [7]:
print(f'Number of countries with drivers data: {len(df["country"].unique())}')
print(f'Countries with drivers data in the locations file: {len(df[df["country"].isin(locations["name"])])}')

Number of countries with drivers data: 71
Countries with drivers data in the locations file: 67


In [8]:
df[~df['country'].isin(locations['name'])]

Unnamed: 0,country,erosion_pct,episodic_disturbances_pct,commodities_pct,npc_pct,settlement_pct,primary_driver
20,The Gambia,76.387253,2.886656,0.0,18.889797,1.836295,Erosion
31,Cote d'Ivoire,0.0,0.0,0.0,100.0,0.0,NPC
38,Mexico,29.896673,9.008041,16.125975,40.800024,4.169287,NPC
64,The Bahamas,63.078377,22.583613,0.0,14.304883,0.033127,Erosion


In [9]:
pattern = 'Baha'
locations[locations['name'].str.contains(pattern)]['name']

2000    Bahamas
Name: name, dtype: object

In [10]:
df.loc[df['country'] == 'The Gambia', 'country'] = 'Gambia'
df.loc[df['country'] == 'Cote d\'Ivoire', 'country'] = 'Côte d\'Ivoire'
df.loc[df['country'] == 'Mexico', 'country'] = 'México'
df.loc[df['country'] == 'The Bahamas', 'country'] = 'Bahamas'

In [11]:
df[~df['country'].isin(locations['name'])]

Unnamed: 0,country,erosion_pct,episodic_disturbances_pct,commodities_pct,npc_pct,settlement_pct,primary_driver


## Map layer preparation  
For data updates / fixes, skip this part

Merge Loss Drivers data with locations and format geojson

In [12]:
loss_drivers_gdf = locations.merge(df, left_on='name', right_on='country', how='left')
#loss_drivers_gdf = gpd.GeoDataFrame(loss_drivers)
loss_drivers_gdf = loss_drivers_gdf[['name','iso','erosion_pct','episodic_disturbances_pct','commodities_pct','npc_pct','settlement_pct','primary_driver','geometry']]
loss_drivers_gdf.head(3)

Unnamed: 0,name,iso,erosion_pct,episodic_disturbances_pct,commodities_pct,npc_pct,settlement_pct,primary_driver,geometry
0,Qatar,QAT,0.0,0.0,0.0,100.0,0.0,NPC,"MULTIPOLYGON (((50.73769 24.93464, 50.73779 24..."
1,Mayotte,MYT,,,,,,,"POLYGON ((46.63483 -12.96039, 46.63197 -12.969..."
2,Vietnam,VNM,32.471499,0.088626,61.753005,2.811297,2.875573,Commodities,"MULTIPOLYGON (((104.31952 10.36051, 104.31975 ..."


In [13]:
#Confirm that all names are still correct after the join
df[~df['country'].isin(loss_drivers_gdf['name'])]

Unnamed: 0,country,erosion_pct,episodic_disturbances_pct,commodities_pct,npc_pct,settlement_pct,primary_driver


In [14]:
loss_drivers_gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [15]:
loss_drivers_gdf.to_file('../../../../data/main_loss_drivers.geojson', driver='GeoJSON')

### Create MBTILES

In [2]:
in_file = '../../../../data/main_loss_drivers.geojson'
out_file = '../../../../data/main_loss_drivers.mbtiles'
!tippecanoe -zg -o $out_file --drop-densest-as-needed --force $in_file
print('ALL DONE!')

For layer 0, using name "main_loss_drivers"
122 features, 8679746 bytes of geometry, 3467 bytes of separate metadata, 6209 bytes of string pool
Choosing a maxzoom of -z0 for features about 3191542 feet (972782 meters) apart
Choosing a maxzoom of -z9 for resolution of about 749 feet (228 meters) within features
  99.9%  9/324/211  
ALL DONE!


## Data preparation

In [16]:
df.head()

Unnamed: 0,country,erosion_pct,episodic_disturbances_pct,commodities_pct,npc_pct,settlement_pct,primary_driver
0,Angola,0.864627,1.525797,0.0,71.370431,26.239145,NPC
1,Australia,20.055568,70.644338,0.013919,9.205866,0.080309,Episodic Disturbances
2,Bangladesh,79.299352,5.609774,4.654516,9.465341,0.971016,Erosion
3,Belize,6.969702,20.694848,20.0074,25.872602,26.455449,Settlement
4,Benin,9.016235,0.0,13.114575,54.096062,23.773128,NPC


In [17]:
df_long = pd.melt(df, id_vars=['country', 'primary_driver'], value_vars=['erosion_pct','episodic_disturbances_pct','commodities_pct','npc_pct','settlement_pct'])
df_long

Unnamed: 0,country,primary_driver,variable,value
0,Angola,NPC,erosion_pct,0.864627
1,Australia,Episodic Disturbances,erosion_pct,20.055568
2,Bangladesh,Erosion,erosion_pct,79.299352
3,Belize,Settlement,erosion_pct,6.969702
4,Benin,NPC,erosion_pct,9.016235
...,...,...,...,...
350,Trinidad and Tobago,NPC,settlement_pct,14.340400
351,United Arab Emirates,NPC,settlement_pct,1.975474
352,United States,Erosion,settlement_pct,9.489101
353,Venezuela,Erosion,settlement_pct,0.479327


In [18]:
locations = locations[['name', 'location_idn']]
locations.head()

Unnamed: 0,name,location_idn
82,Qatar,06d2e6f9-bc89-59bf-a0e2-ab804e5db9fd
89,Mayotte,0750953f-4af9-549b-aeea-329663249a56
118,Vietnam,09a1ab14-11ad-56ec-8acb-a149e5697abd
132,Grenada,0b0ecb56-bb8e-5ef1-b8ee-3cdad67fed0e
149,India,0c07ca53-7b17-5650-a2c6-0cc27249a4bd


In [19]:
api_locs = pd.read_csv('https://storage.googleapis.com/mangrove_atlas/widget_data/locations_staging.csv')
api_locs.rename(columns={'location_id': 'location_idn'}, inplace=True)
api_locs.head()

Unnamed: 0,id,location_idn
0,1563,000bd204-c0fd-510b-a1ad-132a7ef7470d
1,1564,00250a0f-f66d-54a0-b7a3-d80035881cbf
2,1565,0041637b-f6a2-5b89-87ce-850f5c5431b3
3,1566,005b49ef-6b7f-575a-85b3-ff19261a0755
4,1567,00921349-70fb-5a7e-8207-b3157aecc349


In [20]:
loss_drivers_data = df_long.merge(locations, left_on='country', right_on='name', how='left')
loss_drivers_data = loss_drivers_data.merge(api_locs, on='location_idn', how='left')
loss_drivers_data.head()

Unnamed: 0,country,primary_driver,variable,value,name,location_idn,id
0,Angola,NPC,erosion_pct,0.864627,Angola,27ceab8c-946e-5286-a06f-8bd98ec81f77,2029
1,Australia,Episodic Disturbances,erosion_pct,20.055568,Australia,48287653-09c8-5cfd-95b8-6a5b66b600bb,2441
2,Bangladesh,Erosion,erosion_pct,79.299352,Bangladesh,f58d8175-a251-50e8-b1e1-756d60d70d96,4587
3,Belize,Settlement,erosion_pct,6.969702,Belize,b686e902-4ce4-5b7a-b84a-e76021dd3f03,3831
4,Benin,NPC,erosion_pct,9.016235,Benin,f0b5314c-bab2-5a15-8bbf-2698f5140830,4530


In [21]:
loss_drivers_data = loss_drivers_data[['id', 'variable', 'value', 'primary_driver']]
loss_drivers_data.rename(columns={'id': 'location_id', 'variable':'indicator'}, inplace=True)
loss_drivers_data.head()

Unnamed: 0,location_id,indicator,value,primary_driver
0,2029,erosion_pct,0.864627,NPC
1,2441,erosion_pct,20.055568,Episodic Disturbances
2,4587,erosion_pct,79.299352,Erosion
3,3831,erosion_pct,6.969702,Settlement
4,4530,erosion_pct,9.016235,NPC


In [22]:
loss_drivers_data.sample(5)

Unnamed: 0,location_id,indicator,value,primary_driver
253,4616,npc_pct,3.34813,Commodities
157,1970,commodities_pct,7.319974,NPC
126,4306,episodic_disturbances_pct,0.0,NPC
244,3532,npc_pct,100.0,NPC
180,4422,commodities_pct,16.125975,NPC


In [23]:
loss_drivers_data['indicator'].unique()

array(['erosion_pct', 'episodic_disturbances_pct', 'commodities_pct',
       'npc_pct', 'settlement_pct'], dtype=object)

In [24]:
loss_drivers_data['primary_driver'].unique()

array(['NPC', 'Episodic Disturbances', 'Erosion', 'Settlement',
       'Commodities'], dtype=object)

In [25]:
pct_check = loss_drivers_data.groupby(['location_id']).sum('value').reset_index()
pct_check

Unnamed: 0,location_id,value
0,1645,100.0
1,1681,100.0
2,1712,100.0
3,1736,100.0
4,1879,100.0
...,...,...
66,4530,100.0
67,4543,100.0
68,4587,100.0
69,4616,100.0


In [26]:
# Check global values (but this data is not used in the widget at worldwide level)
df_total = loss_drivers_data[['indicator', 'value']].groupby(['indicator']).mean('value').reset_index()
display(df_total)
print(f'Total average: {df_total["value"].sum()}')

Unnamed: 0,indicator,value
0,commodities_pct,17.515014
1,episodic_disturbances_pct,13.778124
2,erosion_pct,28.100153
3,npc_pct,35.109311
4,settlement_pct,5.497398


Total average: 99.99999999999858


In [27]:
loss_drivers_data

Unnamed: 0,location_id,indicator,value,primary_driver
0,2029,erosion_pct,0.864627,NPC
1,2441,erosion_pct,20.055568,Episodic Disturbances
2,4587,erosion_pct,79.299352,Erosion
3,3831,erosion_pct,6.969702,Settlement
4,4530,erosion_pct,9.016235,NPC
...,...,...,...,...
350,2691,settlement_pct,14.340400,NPC
351,3123,settlement_pct,1.975474,NPC
352,3733,settlement_pct,9.489101,Erosion
353,3582,settlement_pct,0.479327,Erosion


In [30]:
loss_drivers_data.rename(columns={'indicator': 'variable'}, inplace=True)
loss_drivers_data.to_csv('../../../../data/loss_drivers_data.csv', index=False)