# IUCN Ecoregions map layer preparation  
To build the ecoregions map and populate it with data, we need to combine the geometr shapefile with the data on the excel file (both on the GCS bucket).  


In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd

## 1) Get data

In [2]:
gcs_url = 'https://storage.googleapis.com/mangrove_atlas/widget_data'

**Map data**

In [3]:
map_layer = gpd.read_file(f'{gcs_url}/Mangrove_EcoregionsL4_v2023/Mangrove_EcoregionsL4_v2023.shp')
map_layer = map_layer.to_crs('EPSG:4326')
map_layer = map_layer[['unit_name', 'Region', 'geometry']]
map_layer.head(2)

Unnamed: 0,unit_name,Region,geometry
0,Mangroves of Warm Temperate Northwest Atlantic,"The Caribbean, Latin and Noth America","POLYGON Z ((-77.50101 30.24087 0.00000, -77.57..."
1,Mangroves of Warm Temperate Northwest Pacific,Eastern and South-Eastern Asia,"POLYGON Z ((133.58409 28.34363 0.00000, 130.70..."


**Ecoregions status data**

In [4]:
excel_file = pd.ExcelFile(f'{gcs_url}/Ecoregion.RedList.Data.xlsx')
sheet_names = excel_file.sheet_names
sheet_names

['Sunda Shelf',
 'Western Coral Triangle',
 'Andaman',
 'South China Sea',
 'Bay of Bengal']

In [10]:
prefix = 'Mangroves of '
sheet_names_long = [prefix + sheet_name for sheet_name in sheet_names]
sheet_names_long

['Mangroves of Sunda Shelf',
 'Mangroves of Western Coral Triangle',
 'Mangroves of Andaman',
 'Mangroves of South China Sea',
 'Mangroves of Bay of Bengal']

In [13]:
map_layer.unit_name[map_layer.unit_name.isin(sheet_names_long)]

15             Mangroves of Bay of Bengal
16                   Mangroves of Andaman
17           Mangroves of South China Sea
18               Mangroves of Sunda Shelf
22    Mangroves of Western Coral Triangle
Name: unit_name, dtype: object

In [64]:
#Function to read the data for an ecoregion from the excel file
def get_ecoregion_data(sheet_name, skip_initial_rows=1, total_rows=6):
    df = pd.read_excel(excel_file, sheet_name=sheet_name, skiprows=skip_initial_rows, nrows=total_rows)
    df.columns = ['cat', '1', '2', '3']
    df['cat'] = df['cat'].str.replace(r'^[A-Z]\.\s*', '', regex=True).str.lower().str.strip().str.replace(' ', '_')
    return df

In [75]:
sheet_name = sheet_names[0]
get_ecoregion_data(sheet_name, skip_initial_rows, total_rows)


Unnamed: 0,cat,1,2,3
0,reduction_in_geographic_distribution,VU,LC,DD
1,restricted_geographic_distribution,LC,LC,LC
2,environmental_degradation,DD,VU,DD
3,distribution_of_biotic_processes,DD,LC,DD
4,quantitative_risk_analysis,NE,,


In [73]:
#Function to get the conservation status of an ecoregion
def get_ecoregion_status(sheet_name):
    df = pd.read_excel(excel_file, sheet_name=sheet_name, nrows=1, header=None)
    status = df.iloc[0, 1]
    return status

In [74]:
get_ecoregion_status(sheet_names[0])

'VU'

In [62]:
#Function to rearrange the data for an ecoregion in the final format
def arrange_ecoregion_data(df):
    df_wide = pd.DataFrame()
    for r in range(len(df)):

        row = pd.DataFrame(df.iloc[r]).T
        row.reset_index(drop=True, inplace=True)
        cat = row['cat'][0]
        row.columns = ['cat', f'{cat}_1', f'{cat}_2', f'{cat}_3']
        row = row.drop(columns='cat')
        if r == 0:
            df_wide = row
        else:
            df_wide = pd.concat([df_wide, row], axis=1)

    return df_wide

In [63]:
arrange_ecoregion_data(df)

Unnamed: 0,reduction_in_geographic_distribution_1,reduction_in_geographic_distribution_2,reduction_in_geographic_distribution_3,restricted_geographic_distribution_1,restricted_geographic_distribution_2,restricted_geographic_distribution_3,environmental_degradation_1,environmental_degradation_2,environmental_degradation_3,distribution_of_biotic_processes_1,distribution_of_biotic_processes_2,distribution_of_biotic_processes_3,quantitative_risk_analysis_1,quantitative_risk_analysis_2,quantitative_risk_analysis_3
0,VU,LC,DD,LC,LC,LC,DD,VU,DD,DD,LC,DD,NE,,


In [77]:
# Read and combine all the data

skip_initial_rows = 1
total_rows = 6
ecoregions_df = pd.DataFrame()

for sheet_name in sheet_names:
    #Read region data
    df = get_ecoregion_data(sheet_name, skip_initial_rows, total_rows)
    #Rearrange data
    df_region = arrange_ecoregion_data(df)
    #Get assesment status
    status = get_ecoregion_status(sheet_name)

    df_region['unit_name'] = f'Mangroves of {sheet_name}'
    df_region['overall_assessment'] = status
    ecoregions_df = pd.concat([ecoregions_df, df_region], axis=0)
    ecoregions_df.reset_index(drop=True, inplace=True)

ecoregions_df.drop(columns=['quantitative_risk_analysis_2', 'quantitative_risk_analysis_3'], inplace=True)
ecoregions_df


Unnamed: 0,reduction_in_geographic_distribution_1,reduction_in_geographic_distribution_2,reduction_in_geographic_distribution_3,restricted_geographic_distribution_1,restricted_geographic_distribution_2,restricted_geographic_distribution_3,environmental_degradation_1,environmental_degradation_2,environmental_degradation_3,distribution_of_biotic_processes_1,distribution_of_biotic_processes_2,distribution_of_biotic_processes_3,quantitative_risk_analysis_1,unit_name,overall_assessment
0,VU,LC,DD,LC,LC,LC,DD,VU,DD,DD,LC,DD,NE,Mangroves of Sunda Shelf,VU
1,VU,LC,DD,LC,LC,LC,DD,VU,DD,DD,LC,DD,NE,Mangroves of Western Coral Triangle,VU
2,LC,LC,DD,LC,LC,LC,DD,LC,DD,DD,LC,DD,NE,Mangroves of Andaman,LC
3,VU,LC,DD,LC,LC,LC,DD,EN,DD,DD,LC,DD,NE,Mangroves of South China Sea,EN
4,LC,LC,DD,LC,LC,LC,DD,LC,DD,DD,LC,DD,NE,Mangroves of Bay of Bengal,LC


## 2) Combine data and geometries

In [79]:
map_layer_combined = map_layer.merge(ecoregions_df, on='unit_name', how='left')
map_layer_combined[~map_layer_combined['overall_assessment'].isna()]

Unnamed: 0,unit_name,Region,geometry,reduction_in_geographic_distribution_1,reduction_in_geographic_distribution_2,reduction_in_geographic_distribution_3,restricted_geographic_distribution_1,restricted_geographic_distribution_2,restricted_geographic_distribution_3,environmental_degradation_1,environmental_degradation_2,environmental_degradation_3,distribution_of_biotic_processes_1,distribution_of_biotic_processes_2,distribution_of_biotic_processes_3,quantitative_risk_analysis_1,overall_assessment
15,Mangroves of Bay of Bengal,Eastern and South-Eastern Asia,"POLYGON Z ((99.55866 23.09577 0.00000, 101.487...",LC,LC,DD,LC,LC,LC,DD,LC,DD,DD,LC,DD,NE,LC
16,Mangroves of Andaman,Eastern and South-Eastern Asia,"POLYGON Z ((99.34435 14.60911 0.00000, 99.3872...",LC,LC,DD,LC,LC,LC,DD,LC,DD,DD,LC,DD,NE,LC
17,Mangroves of South China Sea,Eastern and South-Eastern Asia,"POLYGON Z ((120.44116 22.72976 0.00000, 120.14...",VU,LC,DD,LC,LC,LC,DD,EN,DD,DD,LC,DD,NE,EN
18,Mangroves of Sunda Shelf,Eastern and South-Eastern Asia,"POLYGON Z ((112.10483 5.40675 0.00000, 112.805...",VU,LC,DD,LC,LC,LC,DD,VU,DD,DD,LC,DD,NE,VU
22,Mangroves of Western Coral Triangle,Eastern and South-Eastern Asia,"POLYGON Z ((125.66776 18.15363 0.00000, 125.80...",VU,LC,DD,LC,LC,LC,DD,VU,DD,DD,LC,DD,NE,VU


## 3) Save data as geoJSON and prepare mbtiles

In [83]:
path_geojson = '../../../../data/ecoregions_data.geojson'
map_layer_combined.to_file(path_geojson, driver='GeoJSON')

In [84]:
path_mbtiles = '../../../../data/ecoregions_data.mbtiles'
!tippecanoe -zg -f -P -o {path_mbtiles} --extend-zooms-if-still-dropping {path_geojson}

For layer 0, using name "ecoregions_data"
../../../../data/ecoregions_data.geojson:2: ignoring dimensions beyond two
In JSON object [125.667762285032794,18.153625720791197,0.0]
In JSON object {"type":"Feature","properties":{"unit_name":"Mangroves of Western Coral Triangle","Region":"Eastern and South-Eastern Asia","reduction_in_geographic_distribution_1":"VU","reduction_in_geographic_distribution_2":"LC","reduction_in_geographic_distribution_3":"DD","restricted_geographic_distribution_1":"LC","restricted_geographic_distribution_2":"LC","restricted_geographic_distribution_3":"LC","environmental_degradation_1":"DD","environmental_degradation_2":"VU","environmental_degradation_3":"DD...
../../../../data/ecoregions_data.geojson:5: Found ] at top level
../../../../data/ecoregions_data.geojson:10: Reached EOF without all containers being closed
In JSON object {"type":"FeatureCollection","crs":{"type":"name","properties":{"name":"urn:ogc:def:crs:OGC:1.3:CRS84"}},"features":[]}
40 features, 75

In [86]:
map_layer_combined['overall_assessment'].value_counts()

overall_assessment
LC    2
VU    2
EN    1
Name: count, dtype: int64