# Coastal protection widget  
Data exploration and preparation for the coastal protection data.

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import product

Load data from bucket

In [2]:
df = pd.read_csv('https://storage.googleapis.com/mangrove_atlas/widget_data/Mangrove_Coastal_Protection_Stats_20230426.csv', skiprows=1)
df.head()

Unnamed: 0,Name,Area,Population,Stock,Area.1,Population.1,Stock.1,Area.2,Population.2,Stock.2
0,American Samoa,,,,,,,,,
1,Angola,,3128.645656,2381327.0,0.0,7241.891811,3939056.0,0.0,14039.01065,12073501.76
2,Anguilla,0.615599,0.0,5042979.0,0.922914,0.0,2926016.0,2.781378,0.0,7497182.3
3,Antigua and Barbuda,2.375643,312.024456,15536550.0,2.835863,1208.306314,30533020.0,9.206714,1144.118241,70476111.4
4,Aruba,,,,,,,,,


Generate standard names for the columns

In [3]:
period = ['annual', '25_year', '100_year']
impact = ['area', 'population', 'stock']
cols_names = []

for p, i in product(period, impact):
    cols_names.append(f'{p}_{i}')
cols_names

['annual_area',
 'annual_population',
 'annual_stock',
 '25_year_area',
 '25_year_population',
 '25_year_stock',
 '100_year_area',
 '100_year_population',
 '100_year_stock']

In [4]:
df.columns = ['country'] + cols_names
df.head(1)

Unnamed: 0,country,annual_area,annual_population,annual_stock,25_year_area,25_year_population,25_year_stock,100_year_area,100_year_population,100_year_stock
0,American Samoa,,,,,,,,,


Check number of NAs per row, eliminate rows with all NAs (sum = 9)

In [5]:
df['number_na'] = df.isna().sum(axis=1)
df['number_na'].value_counts()

0    95
9    27
1     1
Name: number_na, dtype: int64

In [6]:
df = df[df['number_na'] < 9]
print(f'Rows with data: {len(df)}')

Rows with data: 96


In [7]:
df_long = pd.melt(df, id_vars=['country'], value_vars=cols_names)
df_long.rename(columns={'variable':'indicator'}, inplace=True)
df_long['period'] = df_long['indicator'].str.split('_').str[0].replace({'25':'25_year', '100':'100_year'})
df_long['indicator'] = df_long['indicator'].str.split('_').str[-1]
df_long

Unnamed: 0,country,indicator,value,period
0,Angola,area,,annual
1,Anguilla,area,6.155990e-01,annual
2,Antigua and Barbuda,area,2.375643e+00,annual
3,Australia,area,5.879710e+01,annual
4,Bahamas,area,2.470507e+03,annual
...,...,...,...,...
859,Vietnam,stock,7.473397e+09,100_year
860,British Virgin Islands,stock,2.444817e+07,100_year
861,"Virgin Islands, U.S.",stock,1.503067e+07,100_year
862,Yemen,stock,1.357952e+07,100_year


In [8]:
units_df = pd.DataFrame({'indicator': impact, 'unit': ['km2', 'people', 'USD']})
units_df

Unnamed: 0,indicator,unit
0,area,km2
1,population,people
2,stock,USD


In [9]:
df_long = df_long.merge(units_df, on='indicator')
df_long

Unnamed: 0,country,indicator,value,period,unit
0,Angola,area,,annual,km2
1,Anguilla,area,6.155990e-01,annual,km2
2,Antigua and Barbuda,area,2.375643e+00,annual,km2
3,Australia,area,5.879710e+01,annual,km2
4,Bahamas,area,2.470507e+03,annual,km2
...,...,...,...,...,...
859,Vietnam,stock,7.473397e+09,100_year,USD
860,British Virgin Islands,stock,2.444817e+07,100_year,USD
861,"Virgin Islands, U.S.",stock,1.503067e+07,100_year,USD
862,Yemen,stock,1.357952e+07,100_year,USD


In [10]:
df_long.to_csv('../../../../data/coastal_protection_tableau.csv', index=False)