# Coastal protection widget  
Data exploration and preparation for the coastal protection data.

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import product

Load data from bucket

In [2]:
df = pd.read_csv('https://storage.googleapis.com/mangrove_atlas/widget_data/Mangrove_Coastal_Protection_Stats_20230426.csv', skiprows=1)
df.head()

Unnamed: 0,Name,Area,Population,Stock,Area.1,Population.1,Stock.1,Area.2,Population.2,Stock.2
0,American Samoa,,,,,,,,,
1,Angola,,3128.645656,2381327.0,0.0,7241.891811,3939056.0,0.0,14039.01065,12073501.76
2,Anguilla,0.615599,0.0,5042979.0,0.922914,0.0,2926016.0,2.781378,0.0,7497182.3
3,Antigua and Barbuda,2.375643,312.024456,15536550.0,2.835863,1208.306314,30533020.0,9.206714,1144.118241,70476111.4
4,Aruba,,,,,,,,,


Generate standard names for the columns

In [3]:
period = ['annual', '25_year', '100_year']
impact = ['area', 'population', 'stock']
cols_names = []

for p, i in product(period, impact):
    cols_names.append(f'{p}_{i}')
cols_names

['annual_area',
 'annual_population',
 'annual_stock',
 '25_year_area',
 '25_year_population',
 '25_year_stock',
 '100_year_area',
 '100_year_population',
 '100_year_stock']

In [4]:
df.columns = ['country'] + cols_names
df.head(1)

Unnamed: 0,country,annual_area,annual_population,annual_stock,25_year_area,25_year_population,25_year_stock,100_year_area,100_year_population,100_year_stock
0,American Samoa,,,,,,,,,


Check number of NAs per row, eliminate rows with all NAs (sum = 9)

In [5]:
df['number_na'] = df.isna().sum(axis=1)
df['number_na'].value_counts()

0    95
9    27
1     1
Name: number_na, dtype: int64

In [6]:
df = df[df['number_na'] < 9]
print(f'Rows with data: {len(df)}')

Rows with data: 96


In [7]:
df_long = pd.melt(df, id_vars=['country'], value_vars=cols_names)
df_long.rename(columns={'variable':'indicator'}, inplace=True)
df_long['period'] = df_long['indicator'].str.split('_').str[0].replace({'25':'25_year', '100':'100_year'})
df_long['indicator'] = df_long['indicator'].str.split('_').str[-1]
df_long

Unnamed: 0,country,indicator,value,period
0,Angola,area,,annual
1,Anguilla,area,6.155990e-01,annual
2,Antigua and Barbuda,area,2.375643e+00,annual
3,Australia,area,5.879710e+01,annual
4,Bahamas,area,2.470507e+03,annual
...,...,...,...,...
859,Vietnam,stock,7.473397e+09,100_year
860,British Virgin Islands,stock,2.444817e+07,100_year
861,"Virgin Islands, U.S.",stock,1.503067e+07,100_year
862,Yemen,stock,1.357952e+07,100_year


In [8]:
units_df = pd.DataFrame({'indicator': impact, 'unit': ['km2', 'people', 'usd']})
units_df

Unnamed: 0,indicator,unit
0,area,km2
1,population,people
2,stock,USD


In [9]:
df_long = df_long.merge(units_df, on='indicator')
df_long

Unnamed: 0,country,indicator,value,period,unit
0,Angola,area,,annual,km2
1,Anguilla,area,6.155990e-01,annual,km2
2,Antigua and Barbuda,area,2.375643e+00,annual,km2
3,Australia,area,5.879710e+01,annual,km2
4,Bahamas,area,2.470507e+03,annual,km2
...,...,...,...,...,...
859,Vietnam,stock,7.473397e+09,100_year,USD
860,British Virgin Islands,stock,2.444817e+07,100_year,USD
861,"Virgin Islands, U.S.",stock,1.503067e+07,100_year,USD
862,Yemen,stock,1.357952e+07,100_year,USD


In [10]:
#Tableau data for prototype and exploration for design
#df_long.to_csv('../../../../data/coastal_protection_tableau.csv', index=False)

## Add locations

In [10]:
locations_file = 'https://storage.googleapis.com/mangrove_atlas/boundaries/processed/location_final/locations_v3_not_merged_with_old.gpkg'
locations = gpd.read_file(locations_file)
locations = locations[locations['type'] == 'country']
locations.head()

Unnamed: 0,name,iso,type,area_m2,wdpaid,globalid,perimeter_m,location_idn,coast_length_m,geometry
82,Qatar,QAT,country,3.880224,,{AF97ABE2-6405-4438-A7ED-1494A43DA379},8.392644,06d2e6f9-bc89-59bf-a0e2-ab804e5db9fd,1345769.96,"MULTIPOLYGON (((50.73769 24.93464, 50.73779 24..."
89,Mayotte,MYT,country,5.611808,,{57E86B5B-7EF0-4754-A8D4-A9DC3212D421},10.086238,0750953f-4af9-549b-aeea-329663249a56,291036.71,"POLYGON ((46.63483 -12.96039, 46.63197 -12.969..."
118,Vietnam,VNM,country,90.156489,,{B2A84FBB-34CD-4A51-9463-B9DB2DB62A10},81.714911,09a1ab14-11ad-56ec-8acb-a149e5697abd,9005760.08,"MULTIPOLYGON (((104.31952 10.36051, 104.31975 ..."
132,Grenada,GRD,country,2.154728,,{F8753179-5FFA-4D9E-8AD9-083F31C48528},6.743601,0b0ecb56-bb8e-5ef1-b8ee-3cdad67fed0e,260664.47,"MULTIPOLYGON (((-61.91525 11.37330, -61.91813 ..."
149,India,IND,country,473.029671,,{A4A6CE4D-8D03-4246-9A2F-BD9811232115},211.564078,0c07ca53-7b17-5650-a2c6-0cc27249a4bd,16917891.22,"MULTIPOLYGON (((79.52922 9.38411, 79.52921 9.3..."


In [11]:
api_locs = pd.read_csv('https://storage.googleapis.com/mangrove_atlas/widget_data/locations_staging.csv')
api_locs.rename(columns={'location_id': 'location_idn'}, inplace=True)
api_locs.head()

Unnamed: 0,id,location_idn
0,1563,000bd204-c0fd-510b-a1ad-132a7ef7470d
1,1564,00250a0f-f66d-54a0-b7a3-d80035881cbf
2,1565,0041637b-f6a2-5b89-87ce-850f5c5431b3
3,1566,005b49ef-6b7f-575a-85b3-ff19261a0755
4,1567,00921349-70fb-5a7e-8207-b3157aecc349


In [12]:
df_long[df_long['country'].isin(locations['name']) == False]['country'].unique()

array(['Congo', "Cote d'Ivoire", 'Mexico', 'Worldwide'], dtype=object)

In [13]:
df_long.loc[df_long['country'] == 'Congo', 'country'] = 'Republic of the Congo'
df_long.loc[df_long['country'] == "Cote d'Ivoire", 'country'] = "Côte d'Ivoire"
df_long.loc[df_long['country'] == 'Mexico', 'country'] = 'México'


In [15]:
df_locs = df_long.merge(locations[['name', 'location_idn']], left_on='country', right_on='name', how='left')
df_locs = df_locs.merge(api_locs, on='location_idn', how='left')
df_locs.head()

Unnamed: 0,country,indicator,value,period,unit,name,location_idn,id
0,Angola,area,,annual,km2,Angola,27ceab8c-946e-5286-a06f-8bd98ec81f77,2029.0
1,Anguilla,area,0.615599,annual,km2,Anguilla,1ce4c2e5-8456-5db8-8e34-8bfe86083790,1915.0
2,Antigua and Barbuda,area,2.375643,annual,km2,Antigua and Barbuda,7c8d9de5-4c1a-5ed4-838c-05906eaed3f7,3095.0
3,Australia,area,58.797103,annual,km2,Australia,48287653-09c8-5cfd-95b8-6a5b66b600bb,2441.0
4,Bahamas,area,2470.507071,annual,km2,Bahamas,a0d0a60d-1c43-5709-9d80-4b7376421c1d,3563.0


In [27]:
coastal_protection_final = df_locs[['id', 'indicator', 'period', 'value', 'unit']].copy()
coastal_protection_final.rename(columns={'id': 'location_id'}, inplace=True)
coastal_protection_final = coastal_protection_final[~coastal_protection_final['location_id'].isna()]
coastal_protection_final = coastal_protection_final[~coastal_protection_final['value'].isna()]
coastal_protection_final.unit = coastal_protection_final.unit.str.lower()
coastal_protection_final.head()

Unnamed: 0,location_id,indicator,period,value,unit
1,1915.0,area,annual,0.615599,km2
2,3095.0,area,annual,2.375643,km2
3,2441.0,area,annual,58.797103,km2
4,3563.0,area,annual,2470.507071,km2
5,4559.0,area,annual,3.108358,km2


In [28]:
coastal_protection_final[~coastal_protection_final['value'].isna()].to_csv('../../../../data/coastal_protection_data.csv', index=False)