# Preprocessing

In [398]:
import os
import json
import random
import textwrap

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import folium

In [288]:
data_dir = '/mnt/c/Users/HP/Documents/projects/ecoregions-map/us_eco_l3_state_boundaries'

Read in the shapefile as a `GeoDataFrame`; make sure to convert the coordinate reference system (CRS) to match the shapefile (i.e., 'Albers USA').

In [418]:
df_descrip = pd.read_csv(os.path.join(data_dir, 'eco_descriptions.txt'), 
                         delimiter='!', header=None).rename(columns={0: 'id', 
                                                                     1: 'US_L3NAME', 
                                                                     2: 'description'})
df_descrip.head()

Unnamed: 0,id,US_L3NAME,description
0,1,COAST RANGE,The low mountains of the Coast Range of wester...
1,2,PUGET LOWLANDS,This broad rolling lowland is characterized by...
2,3,WILLAMETTE VALLEY,The Willamette Valley ecoregion contains terra...
3,4,CASCADES,This mountainous ecoregion stretches from the ...
4,5,SIERRA NEVADA,"The Sierra Nevada is a mountainous, deeply dis..."


In [419]:
df = gpd.read_file(os.path.join(data_dir, 'us_eco_l3_state_boundaries.shx')).to_crs('EPSG:5070')

# drop unneeded columns and clean up some columns
df['geometry'] = df['geometry'].simplify(500)
df['US_L3NAME'] = df['US_L3NAME'].apply(lambda x: x.upper())
df['EPA_REGION'] = df['EPA_REGION'].astype('str')
df = df.drop(columns=['NA_L3CODE', 'NA_L3NAME', 'NA_L2CODE', 'NA_L1CODE', 'L3_KEY', 'L2_KEY', 'L1_KEY'])

In [420]:
# this adds the description of each L3 ecoregion
s = json.loads(df.to_json())
for elt in s['features']:
    elt_id = int(elt['properties']['US_L3CODE']) 
    descrip = ''.join([f'{word}<br>' for word in textwrap.wrap(df_descrip.iloc[elt_id - 1]['description'])])
    elt['properties']['description'] = descrip

In [421]:
df = gpd.GeoDataFrame.from_features(s['features']).set_crs('EPSG:5070')
df.head()

Unnamed: 0,geometry,US_L3CODE,US_L3NAME,NA_L2NAME,NA_L1NAME,STATE_NAME,EPA_REGION,description
0,"POLYGON ((-2284145.646 1942003.860, -2272506.9...",1,COAST RANGE,MARINE WEST COAST FOREST,MARINE WEST COAST FOREST,California,9,The low mountains of the Coast Range of wester...
1,"POLYGON ((-2326859.904 1961336.254, -2327000.9...",1,COAST RANGE,MARINE WEST COAST FOREST,MARINE WEST COAST FOREST,California,9,The low mountains of the Coast Range of wester...
2,"POLYGON ((-2326706.425 1962413.547, -2326659.7...",1,COAST RANGE,MARINE WEST COAST FOREST,MARINE WEST COAST FOREST,California,9,The low mountains of the Coast Range of wester...
3,"POLYGON ((-2328276.727 1965731.514, -2328248.3...",1,COAST RANGE,MARINE WEST COAST FOREST,MARINE WEST COAST FOREST,California,9,The low mountains of the Coast Range of wester...
4,"POLYGON ((-2332682.748 1971271.442, -2332657.2...",1,COAST RANGE,MARINE WEST COAST FOREST,MARINE WEST COAST FOREST,California,9,The low mountains of the Coast Range of wester...


# Interactive Map

In [414]:
col_of_interest = 'US_L3NAME'

if col_of_interest == 'NA_L1NAME':
    idx_list = [6, 1, 4, 2, 5, 0, 7, 3, 8, 9]
    color_list = [list(plt.cm.get_cmap('Set3').colors)[i] for i in idx_list]
    color_list[1] = '#d8b365'
elif col_of_interest == 'NA_L2NAME':
    color_list = list(plt.cm.get_cmap('Dark2').colors) + \
                 list(plt.cm.get_cmap('Set3').colors) + \
                 [list(plt.cm.get_cmap('tab10').colors)[-1]]
    random.shuffle(color_list)
elif col_of_interest == 'US_L3NAME':
    color_list = list(plt.cm.get_cmap('Dark2').colors) + \
                 list(plt.cm.get_cmap('Set3').colors) + \
                 list(plt.cm.get_cmap('tab20').colors) + \
                 list(plt.cm.get_cmap('Pastel2').colors) + \
                 list(plt.cm.get_cmap('tab20b').colors) + \
                 list(plt.cm.get_cmap('Set1').colors) + \
                 list(plt.cm.get_cmap('Paired').colors)
    random.shuffle(color_list)
else:
    raise NotImplementedError('invalid column')

colors = matplotlib.colors.ListedColormap(color_list)

In [422]:
fig = df.explore(column=col_of_interest, cmap=colors, legend=False)

In [423]:
fig

In [424]:
fig.save(f'{col_of_interest}.html')
# fig.save('test.html')