### Setup

In [1]:
# Uncomment and run this cell if Pandas library is not already installed

# ! pip install --upgrade pip

# ! pip install numpy

# ! pip install pandas

# ! pip install ipywidgets

# ! pip install altair

# ! pip install folium

# see additional instructions for geopandas library > https://geopandas.org/getting_started/install.html


### Import Libraries

In [2]:
from random import randint

from IPython.display import display
import ipywidgets as widgets

import numpy as np 
import pandas as pd
import altair as alt

import folium
import geopandas
from folium.plugins import MarkerCluster


In [3]:
# enable extension for widgets
! jupyter nbextension enable --py widgetsnbextension

# disable the default 5000 limit on number of rows
alt.data_transformers.disable_max_rows()

# enable renderer
alt.renderers.enable('default')


Enabling notebook extension jupyter-js-widgets/extension...


RendererRegistry.enable('default')

      - Validating: ok


### CloudFront domain

In [4]:
# define CloudFront domain name (to access S3)

cloudfront = 'https://d1b4l80qig6dum.cloudfront.net'
cloudfront


'https://d1b4l80qig6dum.cloudfront.net'

### Fishing entity selection

In [5]:
# Get the fishing entity names from reference table

# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for FISHING ENTITY lookup data

fishing_entity_key = 'reference_tables/fishing_entity.csv'
fishing_entity_data_location = '{}/{}'.format(cloudfront, fishing_entity_key)

# load the EEZ data using Pandas
fishing_entity_lookup = pd.read_csv(fishing_entity_data_location)

# print the number of records & columns, and also display the sample data
print(fishing_entity_lookup.shape)
fishing_entity_lookup.head()


(199, 2)


Unnamed: 0,fishing_entity_id,fishing_entity
0,1,Albania
1,2,Algeria
2,3,American Samoa
3,4,Angola
4,5,Antigua & Barbuda


In [6]:
# save the fishing entity names in a list for user selection purpose

fishing_entities_list = list()
fishing_entities_list = fishing_entity_lookup['fishing_entity'].to_list()

# display first five names from the list
fishing_entities_list[:5]


['Albania', 'Algeria', 'American Samoa', 'Angola', 'Antigua & Barbuda']

__Fishing entity selection widget__

In [7]:
# define fishing entity dropdown using above list

fishing_entity_dropdown = widgets.Dropdown(
    options = fishing_entities_list,
    value = 'Canada',
    description = 'Fishing entity :',
    disabled = False,
)

# display the fishing entity dropdown for user selection
display(fishing_entity_dropdown)


Dropdown(description='Fishing entity :', index=25, options=('Albania', 'Algeria', 'American Samoa', 'Angola', …

### Load EEZ data for the fishing entity selection

In [9]:
# create the EEZ filename to be loaded based on fishing entity selected

filename_eez = str(fishing_entity_dropdown.value) + '_fishing_entity_eez_data.csv'
filename_eez


'Canada_fishing_entity_eez_data.csv'

In [10]:
# data is in a subfolder 'fishing_entity_data' on the S3 bucket
# define CloudFront access location format for EEZ data

eez_data_key = 'fishing_entity_data/' + filename_eez
eez_data_location = '{}/{}'.format(cloudfront, eez_data_key)

# load the EEZ data using Pandas
df_eez = pd.read_csv(eez_data_location)

# print the number of records & columns, and also display the sample EEZ data
print(df_eez.shape)
df_eez.head(3)


(11222726, 12)


Unnamed: 0,fishing_entity_id,cell_id,lon,lat,eez_id,taxon_key,reporting_status_id,sector_type_id,catch_type_id,gear_type_id,year,catch_sum
0,26,16770,-75.25,78.25,924,100039,1,1,1,1,2006,0.904239
1,26,16770,-75.25,78.25,924,100039,1,1,1,1,2007,0.956645
2,26,16770,-75.25,78.25,924,100039,1,1,1,1,2008,0.835748


### Load Reference tables

__EEZ NAME lookup by ID__

In [11]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for EEZ NAME lookup data

eez_lookup_key = 'reference_tables/eez.csv'
eez_lookup_data_location = '{}/{}'.format(cloudfront, eez_lookup_key)

# load the EEZ NAME lookup data using Pandas
eez_lookup = pd.read_csv(eez_lookup_data_location)

# print the number of records & columns, and also display the sample data
print(eez_lookup.shape)
eez_lookup.head()


(283, 2)


Unnamed: 0,eez_id,eez
0,312,Guadeloupe (France)
1,12,Algeria
2,16,American Samoa
3,28,Antigua & Barbuda
4,32,Argentina


__CATCH TYPE lookup by ID__

In [12]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for CATCH TYPE lookup data

catch_lookup_key = 'reference_tables/catch_type.csv'
catch_lookup_data_location = '{}/{}'.format(cloudfront, catch_lookup_key)

# load the CATCH TYPE lookup data using Pandas
catch_lookup = pd.read_csv(catch_lookup_data_location)

# rename column gear to catch_type
catch_lookup.rename(columns = {"name": "catch_type"}, inplace = True)

# print the number of records & columns, and also display the sample data
print(catch_lookup.shape)
catch_lookup.head()


(2, 3)


Unnamed: 0,catch_type_id,catch_type,abbreviation
0,1,Landings,R
1,2,Discards,D


__SECTOR TYPE lookup by ID__

In [13]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for SECTOR TYPE lookup data

sector_lookup_key = 'reference_tables/sector_type.csv'
sector_lookup_data_location = '{}/{}'.format(cloudfront, sector_lookup_key)

# load the SECTOR TYPE lookup data using Pandas
sector_lookup = pd.read_csv(sector_lookup_data_location)

# rename column name to sector_type
sector_lookup.rename(columns = {"name": "sector_type"}, inplace = True)

# print the number of records & columns, and also display the sample data
print(sector_lookup.shape)
sector_lookup.head()


(4, 2)


Unnamed: 0,sector_type_id,sector_type
0,1,Industrial
1,2,Subsistence
2,3,Artisanal
3,4,Recreational


__GEAR TYPE lookup by ID__

In [14]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for GEAR TYPE lookup data

gear_lookup_key = 'reference_tables/gear.csv'
gear_lookup_data_location = '{}/{}'.format(cloudfront, gear_lookup_key)

# load the GEAR TYPE lookup data using Pandas
gear_lookup = pd.read_csv(gear_lookup_data_location)

# rename column gear to gear_type
gear_lookup.rename(columns = {"gear": "gear_type"}, inplace = True)

# print the number of records & columns, and also display the sample data
print(gear_lookup.shape)
gear_lookup.head()


(43, 2)


Unnamed: 0,gear_type_id,gear_type
0,1,bottom trawl
1,1,bottom trawl
2,1,bottom trawl
3,1,bottom trawl
4,2,pelagic trawl


In [15]:
# remove duplicate rows from the gear type reference table

gear_lookup.drop_duplicates(inplace = True)

# print the number of records & columns, and also display the sample data
print(gear_lookup.shape)
gear_lookup.head()


(8, 2)


Unnamed: 0,gear_type_id,gear_type
0,1,bottom trawl
4,2,pelagic trawl
5,3,longline
9,4,purse seine
12,5,gillnets


__END USE TYPE lookup by ID__

In [16]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for END USE TYPE lookup data

end_use_lookup_key = 'reference_tables/end_use_type.csv'
end_use_lookup_data_location = '{}/{}'.format(cloudfront, end_use_lookup_key)

# load the END USE TYPE lookup data using Pandas
end_use_lookup = pd.read_csv(end_use_lookup_data_location)

# print the number of records & columns, and also display the sample data
print(end_use_lookup.shape)
end_use_lookup.head()


(4, 2)


Unnamed: 0,end_use_type_id,end_use_name
0,1,Direct human consumption
1,2,Fishmeal and fish oil
2,3,Other
3,4,Discards


__REPORTING STATUS lookup by ID__

In [17]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for REPORTING STATUS lookup data

reporting_status_lookup_key = 'reference_tables/reporting_status.csv'
reporting_status_lookup_data_location = '{}/{}'.format(cloudfront, reporting_status_lookup_key)

# load the REPORTING STATUS lookup data using Pandas
reporting_status_lookup = pd.read_csv(reporting_status_lookup_data_location)

# print the number of records & columns, and also display the sample data
print(reporting_status_lookup.shape)
reporting_status_lookup.head()


(2, 3)


Unnamed: 0,reporting_status_id,name,abbreviation
0,1,Reported,R
1,2,Unreported,U


__TAXON NAMES lookup by ID__

In [18]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for TAXON NAMES lookup data

taxon_lookup_key = 'reference_tables/taxon.csv'
taxon_lookup_data_location = '{}/{}'.format(cloudfront, taxon_lookup_key)

# load the TAXON NAMES lookup data using Pandas
taxon_lookup = pd.read_csv(taxon_lookup_data_location)

# print the number of records & columns, and also display the sample data
print(taxon_lookup.shape)
taxon_lookup.head()


(3261, 6)


Unnamed: 0,taxon_key,scientific_name,common_name,species,genus,family
0,601456,Etrumeus whiteheadi,Whitehead's round herring,whiteheadi,Etrumeus,Dussumieriidae
1,605123,Exocoetus monocirrhus,Barbel flyingfish,monocirrhus,Exocoetus,Exocoetidae
2,200538,Chondrichthyes,"Sharks, rays, chimaeras",,,
3,400313,Echeneidae,Remoras,,,Echeneidae
4,400176,Percopsidae,Trout-perches,,,Percopsidae


### Merge IDs in EEZ data with NAMES from Reference tables

In [19]:
# display sample data from EEZ **before** Merging

df_eez.head(3)


Unnamed: 0,fishing_entity_id,cell_id,lon,lat,eez_id,taxon_key,reporting_status_id,sector_type_id,catch_type_id,gear_type_id,year,catch_sum
0,26,16770,-75.25,78.25,924,100039,1,1,1,1,2006,0.904239
1,26,16770,-75.25,78.25,924,100039,1,1,1,1,2007,0.956645
2,26,16770,-75.25,78.25,924,100039,1,1,1,1,2008,0.835748


In [20]:
# Merge EEZ, Sector type, Catch type, Gear type names from reference tables based on their respective IDs

df_eez = pd.merge(df_eez, sector_lookup, on = 'sector_type_id', how = 'inner')
df_eez = pd.merge(df_eez, catch_lookup, on = 'catch_type_id', how = 'inner')
df_eez = pd.merge(df_eez, gear_lookup, on = 'gear_type_id', how = 'inner')
df_eez = pd.merge(df_eez, eez_lookup, on = 'eez_id', how = 'inner')

# Drop columns that are not necessary for maps (after merging)
df_eez.drop(['fishing_entity_id', 
             'reporting_status_id', 
             'sector_type_id', 
             'catch_type_id',
             'gear_type_id',
             'abbreviation'            
            ], axis = 1, inplace = True)

# display sample data from EEZ **after** Merging
df_eez.head(3)


Unnamed: 0,cell_id,lon,lat,eez_id,taxon_key,year,catch_sum,sector_type,catch_type,gear_type,eez
0,16770,-75.25,78.25,924,100039,2006,0.904239,Industrial,Landings,bottom trawl,Canada (Arctic)
1,16770,-75.25,78.25,924,100039,2007,0.956645,Industrial,Landings,bottom trawl,Canada (Arctic)
2,16770,-75.25,78.25,924,100039,2008,0.835748,Industrial,Landings,bottom trawl,Canada (Arctic)


<div class = "alert alert-block alert-warning">
<b>Real Value:</b> <br>
<br>Create Real value metric based on dummy data.
<br>Remove this after Real value is added in the original data.<br>
</div>

In [21]:
df_eez['real_value'] = df_eez['catch_sum'] * randint(500, 1000)

df_eez.head(3)


Unnamed: 0,cell_id,lon,lat,eez_id,taxon_key,year,catch_sum,sector_type,catch_type,gear_type,eez,real_value
0,16770,-75.25,78.25,924,100039,2006,0.904239,Industrial,Landings,bottom trawl,Canada (Arctic),562.436861
1,16770,-75.25,78.25,924,100039,2007,0.956645,Industrial,Landings,bottom trawl,Canada (Arctic),595.033293
2,16770,-75.25,78.25,924,100039,2008,0.835748,Industrial,Landings,bottom trawl,Canada (Arctic),519.83504


In [22]:
# display info of the data: Number of records, Names & data types of columns.

df_eez.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 11222726 entries, 0 to 11222725
Data columns (total 12 columns):
 #   Column       Dtype  
---  ------       -----  
 0   cell_id      int64  
 1   lon          float64
 2   lat          float64
 3   eez_id       int64  
 4   taxon_key    int64  
 5   year         int64  
 6   catch_sum    float64
 7   sector_type  object 
 8   catch_type   object 
 9   gear_type    object 
 10  eez          object 
 11  real_value   float64
dtypes: float64(4), int64(4), object(4)
memory usage: 1.1+ GB


### Data Analysis (to be updated)

In [None]:
# Visualizations to be updated next to maps

In [None]:
# selections,
# functions
# check notepad 

### Shape files for Cells

In [23]:
# data is in a subfolder 'maps_shape_files' on the S3 bucket
# define CloudFront access location format for Cell shape files data

cell_shape_data_key = 'maps_shape_files/fixed_geom_world_ocean_cells.zip'
cell_shape_data_location = '{}/{}'.format(cloudfront, cell_shape_data_key)

# load the Cell shape files data using GeoPandas
# this returns a GeoDataFrame 
cell_gdf = geopandas.read_file(cell_shape_data_location)

# print the number of records & columns, and also display the sample data
print(cell_gdf.shape)
cell_gdf.head(3)


(153533, 6)


Unnamed: 0,ID,CENTROID,WATER_AREA,SEQ,NEW_SEQ,geometry
0,1,"47.75,-36.25",2493.295817,181896.0,164053.0,"POLYGON ((47.50000 -36.00000, 48.00000 -36.000..."
1,2,"47.75,-34.25",2554.452574,179016.0,164049.0,"POLYGON ((47.50000 -34.00000, 48.00000 -34.000..."
2,3,"47.75,-39.75",2378.926818,186936.0,164060.0,"POLYGON ((47.50000 -39.50000, 48.00000 -39.500..."


In [24]:
# drop columns not necessary for maps
# update this as needed - in case NEW_SEQ has to be used in future instead of SEQ
cell_gdf.drop(['ID', 'WATER_AREA', 'NEW_SEQ', 'CENTROID'], axis = 1, inplace = True)

# rename the SEQ column as cell_id and also update the column type to 'int'
# update this as needed - in case NEW_SEQ has to be used in future instead of SEQ
cell_gdf['SEQ'] = cell_gdf['SEQ'].astype(int)
cell_gdf.rename(columns = {"SEQ": "cell_id"}, inplace = True)

# print the number of records & columns, and also display the sample data
print(cell_gdf.shape)
cell_gdf.head(3)


(153533, 2)


Unnamed: 0,cell_id,geometry
0,181896,"POLYGON ((47.50000 -36.00000, 48.00000 -36.000..."
1,179016,"POLYGON ((47.50000 -34.00000, 48.00000 -34.000..."
2,186936,"POLYGON ((47.50000 -39.50000, 48.00000 -39.500..."


### Shape files for EEZ

In [25]:
# data is in a subfolder 'maps_shape_files' on the S3 bucket
# define CloudFront access location format for EEZ shape files data

eez_shape_data_key = 'maps_shape_files/fixed_geom_eez.zip'
eez_shape_data_location = '{}/{}'.format(cloudfront, eez_shape_data_key)

# load the EEZ shape files data using GeoPandas
# this returns a GeoDataFrame 
eez_gdf = geopandas.read_file(eez_shape_data_location)

# print the number of records & columns, and also display the sample data
print(eez_gdf.shape)
eez_gdf.head(3)


(281, 9)


Unnamed: 0,ID,FID,CAT,EEZ_ID,EEZ_NAME,AREA_KM2,LAYER,PATH,geometry
0,45,141.0,45,174,Comoros Isl.,231636.310536,SAU_EEZ_v9_Mar2018,C:\Users\spopov\Documents\GIS\EEZ\SAU EEZ v9 2...,"POLYGON ((45.73564 -14.31894, 45.39066 -14.430..."
1,47,167.0,47,178,"Congo, R. of",34012.666059,SAU_EEZ_v9_Mar2018,C:\Users\spopov\Documents\GIS\EEZ\SAU EEZ v9 2...,"POLYGON ((9.12111 -6.73441, 8.91135 -6.43164, ..."
2,48,168.0,48,180,Congo (ex-Zaire),13155.472627,SAU_EEZ_v9_Mar2018,C:\Users\spopov\Documents\GIS\EEZ\SAU EEZ v9 2...,"POLYGON ((9.11375 -7.06505, 9.00732 -6.76987, ..."


In [26]:
# drop columns not necessary for maps
# update this as needed

eez_gdf.drop(['ID', 'FID', 'CAT', 'AREA_KM2', 'LAYER', 'PATH'], axis = 1, inplace = True)

# rename the EEZ columns to merge with spatial catch data 
# update this as needed
eez_gdf.rename(columns = {"EEZ_NAME": "eez"}, inplace = True)
eez_gdf.rename(columns = {"EEZ_ID": "eez_id"}, inplace = True)

# print the number of records & columns, and also display the sample data
print(eez_gdf.shape)
eez_gdf.head(3)


(281, 3)


Unnamed: 0,eez_id,eez,geometry
0,174,Comoros Isl.,"POLYGON ((45.73564 -14.31894, 45.39066 -14.430..."
1,178,"Congo, R. of","POLYGON ((9.12111 -6.73441, 8.91135 -6.43164, ..."
2,180,Congo (ex-Zaire),"POLYGON ((9.11375 -7.06505, 9.00732 -6.76987, ..."


<div class = "alert alert-block alert-danger">
<b>Data for Widgets:</b> <br>
<br>This section can be updated only if necessary, with caution.<br>
</div>

In [28]:
### Data for Start Year & End Year widgets

# get unique list of years from the dataframe
years = sorted(list(df_eez['year'].unique()))

# display first few records in the list
years[:5]


[1950, 1951, 1952, 1953, 1954]

In [29]:
### Data for Sector type widgets

# get unique list of Sector types from the dataframe
sector_types = sorted(list(df_eez['sector_type'].unique()))

# display first few records in the list
sector_types[:5]


['Artisanal', 'Industrial', 'Recreational', 'Subsistence']

In [30]:
### Data for Catch type widgets

# get unique list of Catch types from the dataframe
catch_types = sorted(list(df_eez['catch_type'].unique()))

# display first few records in the list
catch_types[:5]


['Discards', 'Landings']

In [31]:
### Data for Gear type widgets

# get unique list of Gear types from the dataframe
gear_types = sorted(list(df_eez['gear_type'].unique()))

# display first few records in the list
gear_types[:5]


['bottom trawl', 'gillnets', 'longline', 'other', 'pelagic trawl']

In [32]:
### dictionary with taxon id and their respective names

taxon_list = list(set(zip(taxon_lookup['scientific_name'], taxon_lookup['taxon_key'])))
taxon_list.sort()

# display first few records in the list
taxon_list[:5]


[('Abalistes stellaris', 600009),
 ('Ablennes hians', 600972),
 ('Abramis brama', 600268),
 ('Abudefduf', 509206),
 ('Abudefduf luridus', 611820)]