### Setup

In [1]:
# Uncomment and run this cell if Pandas library is not already installed

# ! pip install --upgrade pip

# ! pip install numpy

# ! pip install pandas

# ! pip install ipywidgets

# ! pip install altair

# ! pip install folium

# see additional instructions for geopandas library > https://geopandas.org/getting_started/install.html


### Import Libraries

In [2]:
from random import randint

from IPython.display import display
import ipywidgets as widgets

import numpy as np 
import pandas as pd
import altair as alt

import folium
import geopandas
from folium.plugins import MarkerCluster


In [3]:
# enable extension for widgets
! jupyter nbextension enable --py widgetsnbextension

# disable the default 5000 limit on number of rows
alt.data_transformers.disable_max_rows()

# enable renderer
alt.renderers.enable('default')


Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: ok


RendererRegistry.enable('default')

### CloudFront domain

In [4]:
# define CloudFront domain name (to access S3)

cloudfront = 'https://d1b4l80qig6dum.cloudfront.net'
cloudfront


'https://d1b4l80qig6dum.cloudfront.net'

### Fishing entity selection

In [5]:
# Get the fishing entity names from reference table

# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for FISHING ENTITY lookup data

fishing_entity_key = 'reference_tables/fishing_entity.csv'
fishing_entity_data_location = '{}/{}'.format(cloudfront, fishing_entity_key)

# load the EEZ data using Pandas
fishing_entity_lookup = pd.read_csv(fishing_entity_data_location)

# print the number of records & columns, and also display the sample data
print(fishing_entity_lookup.shape)
fishing_entity_lookup.head()


(199, 2)


Unnamed: 0,fishing_entity_id,fishing_entity
0,1,Albania
1,2,Algeria
2,3,American Samoa
3,4,Angola
4,5,Antigua & Barbuda


In [6]:
# save the fishing entity names in a list for user selection purpose

fishing_entities_list = list()
fishing_entities_list = fishing_entity_lookup['fishing_entity'].to_list()

# display first five names from the list
fishing_entities_list[:5]


['Albania', 'Algeria', 'American Samoa', 'Angola', 'Antigua & Barbuda']

__Fishing entity selection widget__

In [7]:
# define fishing entity dropdown using above list

fishing_entity_dropdown = widgets.Dropdown(
    options = fishing_entities_list,
    value = 'Canada',
    description = 'Fishing entity :',
    disabled = False,
)

# display the fishing entity dropdown for user selection
display(fishing_entity_dropdown)


Dropdown(description='Fishing entity :', index=25, options=('Albania', 'Algeria', 'American Samoa', 'Angola', …

### Load LME data for the fishing entity selection

In [8]:
# create the LME filename to be loaded based on fishing entity selected

filename_lme = str(fishing_entity_dropdown.value) + '_fishing_entity_lme_data.csv'
filename_lme


'Canada_fishing_entity_lme_data.csv'

In [9]:
# data is in a subfolder 'fishing_entity_data' on the S3 bucket
# define CloudFront access location format for LME data

lme_data_key = 'fishing_entity_data/' + filename_lme
lme_data_location = '{}/{}'.format(cloudfront, lme_data_key)

# load the LME data using Pandas
df_lme = pd.read_csv(lme_data_location)

# print the number of records & columns, and also display the sample LME data
print(df_lme.shape)
df_lme.head(3)


(10398662, 12)


Unnamed: 0,fishing_entity_id,cell_id,lon,lat,lme_id,taxon_key,reporting_status_id,sector_type_id,catch_type_id,gear_type_id,year,catch_sum
0,26,16773,-73.75,78.25,18,100039,1,1,1,1,1994,0.001701
1,26,16773,-73.75,78.25,18,100039,1,1,1,1,2017,0.007632
2,26,16773,-73.75,78.25,66,100039,1,1,1,1,1994,0.001701


### Load Reference tables

__LME NAME lookup by ID__

In [10]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for LME NAME lookup data

lme_lookup_key = 'reference_tables/lme.csv'
lme_lookup_data_location = '{}/{}'.format(cloudfront, lme_lookup_key)

# load the LME NAME lookup data using Pandas
lme_lookup = pd.read_csv(lme_lookup_data_location)

# print the number of records & columns, and also display the sample data
print(lme_lookup.shape)
lme_lookup.head()


(66, 2)


Unnamed: 0,lme_id,lme
0,1,East Bering Sea
1,2,Gulf of Alaska
2,3,California Current
3,4,Gulf of California
4,5,Gulf of Mexico


__CATCH TYPE lookup by ID__

In [11]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for CATCH TYPE lookup data

catch_lookup_key = 'reference_tables/catch_type.csv'
catch_lookup_data_location = '{}/{}'.format(cloudfront, catch_lookup_key)

# load the CATCH TYPE lookup data using Pandas
catch_lookup = pd.read_csv(catch_lookup_data_location)

# rename column gear to catch_type
catch_lookup.rename(columns = {"name": "catch_type"}, inplace = True)

# print the number of records & columns, and also display the sample data
print(catch_lookup.shape)
catch_lookup.head()


(2, 3)


Unnamed: 0,catch_type_id,catch_type,abbreviation
0,1,Landings,R
1,2,Discards,D


__SECTOR TYPE lookup by ID__

In [12]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for SECTOR TYPE lookup data

sector_lookup_key = 'reference_tables/sector_type.csv'
sector_lookup_data_location = '{}/{}'.format(cloudfront, sector_lookup_key)

# load the SECTOR TYPE lookup data using Pandas
sector_lookup = pd.read_csv(sector_lookup_data_location)

# rename column name to sector_type
sector_lookup.rename(columns = {"name": "sector_type"}, inplace = True)

# print the number of records & columns, and also display the sample data
print(sector_lookup.shape)
sector_lookup.head()


(4, 2)


Unnamed: 0,sector_type_id,sector_type
0,1,Industrial
1,2,Subsistence
2,3,Artisanal
3,4,Recreational


__GEAR TYPE lookup by ID__

In [13]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for GEAR TYPE lookup data

gear_lookup_key = 'reference_tables/gear.csv'
gear_lookup_data_location = '{}/{}'.format(cloudfront, gear_lookup_key)

# load the GEAR TYPE lookup data using Pandas
gear_lookup = pd.read_csv(gear_lookup_data_location)

# rename column gear to gear_type
gear_lookup.rename(columns = {"gear": "gear_type"}, inplace = True)

# print the number of records & columns, and also display the sample data
print(gear_lookup.shape)
gear_lookup.head()


(43, 2)


Unnamed: 0,gear_type_id,gear_type
0,1,bottom trawl
1,1,bottom trawl
2,1,bottom trawl
3,1,bottom trawl
4,2,pelagic trawl


In [14]:
# remove duplicate rows from the gear type reference table

gear_lookup.drop_duplicates(inplace = True)

# print the number of records & columns, and also display the sample data
print(gear_lookup.shape)
gear_lookup.head()


(8, 2)


Unnamed: 0,gear_type_id,gear_type
0,1,bottom trawl
4,2,pelagic trawl
5,3,longline
9,4,purse seine
12,5,gillnets


__END USE TYPE lookup by ID__

In [15]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for END USE TYPE lookup data

end_use_lookup_key = 'reference_tables/end_use_type.csv'
end_use_lookup_data_location = '{}/{}'.format(cloudfront, end_use_lookup_key)

# load the END USE TYPE lookup data using Pandas
end_use_lookup = pd.read_csv(end_use_lookup_data_location)

# print the number of records & columns, and also display the sample data
print(end_use_lookup.shape)
end_use_lookup.head()


(4, 2)


Unnamed: 0,end_use_type_id,end_use_name
0,1,Direct human consumption
1,2,Fishmeal and fish oil
2,3,Other
3,4,Discards


__REPORTING STATUS lookup by ID__

In [16]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for REPORTING STATUS lookup data

reporting_status_lookup_key = 'reference_tables/reporting_status.csv'
reporting_status_lookup_data_location = '{}/{}'.format(cloudfront, reporting_status_lookup_key)

# load the REPORTING STATUS lookup data using Pandas
reporting_status_lookup = pd.read_csv(reporting_status_lookup_data_location)

# print the number of records & columns, and also display the sample data
print(reporting_status_lookup.shape)
reporting_status_lookup.head()


(2, 3)


Unnamed: 0,reporting_status_id,name,abbreviation
0,1,Reported,R
1,2,Unreported,U


__TAXON NAMES lookup by ID__

In [17]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for TAXON NAMES lookup data

taxon_lookup_key = 'reference_tables/taxon.csv'
taxon_lookup_data_location = '{}/{}'.format(cloudfront, taxon_lookup_key)

# load the TAXON NAMES lookup data using Pandas
taxon_lookup = pd.read_csv(taxon_lookup_data_location)

# print the number of records & columns, and also display the sample data
print(taxon_lookup.shape)
taxon_lookup.head()


(3261, 6)


Unnamed: 0,taxon_key,scientific_name,common_name,species,genus,family
0,601456,Etrumeus whiteheadi,Whitehead's round herring,whiteheadi,Etrumeus,Dussumieriidae
1,605123,Exocoetus monocirrhus,Barbel flyingfish,monocirrhus,Exocoetus,Exocoetidae
2,200538,Chondrichthyes,"Sharks, rays, chimaeras",,,
3,400313,Echeneidae,Remoras,,,Echeneidae
4,400176,Percopsidae,Trout-perches,,,Percopsidae


### Merge IDs in LME data with NAMES from Reference tables

In [18]:
# display sample data from LME **before** Merging

df_lme.head(3)


Unnamed: 0,fishing_entity_id,cell_id,lon,lat,lme_id,taxon_key,reporting_status_id,sector_type_id,catch_type_id,gear_type_id,year,catch_sum
0,26,16773,-73.75,78.25,18,100039,1,1,1,1,1994,0.001701
1,26,16773,-73.75,78.25,18,100039,1,1,1,1,2017,0.007632
2,26,16773,-73.75,78.25,66,100039,1,1,1,1,1994,0.001701


In [19]:
# Merge LME, Sector type, Catch type, Gear type names from reference tables based on their respective IDs

df_lme = pd.merge(df_lme, sector_lookup, on = 'sector_type_id', how = 'inner')
df_lme = pd.merge(df_lme, catch_lookup, on = 'catch_type_id', how = 'inner')
df_lme = pd.merge(df_lme, gear_lookup, on = 'gear_type_id', how = 'inner')
df_lme = pd.merge(df_lme, lme_lookup, on = 'lme_id', how = 'inner')

# Drop columns that are not necessary for maps (after merging)
df_lme.drop(['fishing_entity_id', 
             'reporting_status_id', 
             'sector_type_id', 
             'catch_type_id',
             'gear_type_id',
             'abbreviation'            
            ], axis = 1, inplace = True)

# display sample data from LME **after** Merging
df_lme.head(3)


Unnamed: 0,cell_id,lon,lat,lme_id,taxon_key,year,catch_sum,sector_type,catch_type,gear_type,lme
0,16773,-73.75,78.25,18,100039,1994,0.001701,Industrial,Landings,bottom trawl,Canadian Eastern Arctic - West Greenland
1,16773,-73.75,78.25,18,100039,2017,0.007632,Industrial,Landings,bottom trawl,Canadian Eastern Arctic - West Greenland
2,16774,-73.25,78.25,18,100039,1994,0.002202,Industrial,Landings,bottom trawl,Canadian Eastern Arctic - West Greenland


<div class = "alert alert-block alert-warning">
<b>Real Value:</b> <br>
<br>Create Real value metric based on dummy data.
<br>Remove this after Real value is added in the original data.<br>
</div>

In [20]:
df_lme['real_value'] = df_lme['catch_sum'] * randint(500, 1000)

df_lme.head(3)


Unnamed: 0,cell_id,lon,lat,lme_id,taxon_key,year,catch_sum,sector_type,catch_type,gear_type,lme,real_value
0,16773,-73.75,78.25,18,100039,1994,0.001701,Industrial,Landings,bottom trawl,Canadian Eastern Arctic - West Greenland,1.356004
1,16773,-73.75,78.25,18,100039,2017,0.007632,Industrial,Landings,bottom trawl,Canadian Eastern Arctic - West Greenland,6.082392
2,16774,-73.25,78.25,18,100039,1994,0.002202,Industrial,Landings,bottom trawl,Canadian Eastern Arctic - West Greenland,1.754603


In [21]:
# display info of the data: Number of records, Names & data types of columns.

df_lme.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 10398662 entries, 0 to 10398661
Data columns (total 12 columns):
 #   Column       Dtype  
---  ------       -----  
 0   cell_id      int64  
 1   lon          float64
 2   lat          float64
 3   lme_id       int64  
 4   taxon_key    int64  
 5   year         int64  
 6   catch_sum    float64
 7   sector_type  object 
 8   catch_type   object 
 9   gear_type    object 
 10  lme          object 
 11  real_value   float64
dtypes: float64(4), int64(4), object(4)
memory usage: 1.0+ GB


### Data Analysis (to be updated)

In [None]:
# Visualizations to be updated next to maps

In [None]:
# selections,
# functions
# check notepad 

### Shape files for Cells

In [22]:
# data is in a subfolder 'maps_shape_files' on the S3 bucket
# define CloudFront access location format for Cell shape files data

cell_shape_data_key = 'maps_shape_files/fixed_geom_world_ocean_cells.zip'
cell_shape_data_location = '{}/{}'.format(cloudfront, cell_shape_data_key)

# load the Cell shape files data using GeoPandas
# this returns a GeoDataFrame 
cell_gdf = geopandas.read_file(cell_shape_data_location)

# print the number of records & columns, and also display the sample data
print(cell_gdf.shape)
cell_gdf.head(3)


(153533, 6)


Unnamed: 0,ID,CENTROID,WATER_AREA,SEQ,NEW_SEQ,geometry
0,1,"47.75,-36.25",2493.295817,181896.0,164053.0,"POLYGON ((47.50000 -36.00000, 48.00000 -36.000..."
1,2,"47.75,-34.25",2554.452574,179016.0,164049.0,"POLYGON ((47.50000 -34.00000, 48.00000 -34.000..."
2,3,"47.75,-39.75",2378.926818,186936.0,164060.0,"POLYGON ((47.50000 -39.50000, 48.00000 -39.500..."


In [23]:
# drop columns not necessary for maps
# update this as needed - in case NEW_SEQ has to be used in future instead of SEQ
cell_gdf.drop(['ID', 'WATER_AREA', 'NEW_SEQ', 'CENTROID'], axis = 1, inplace = True)

# rename the SEQ column as cell_id and also update the column type to 'int'
# update this as needed - in case NEW_SEQ has to be used in future instead of SEQ
cell_gdf['SEQ'] = cell_gdf['SEQ'].astype(int)
cell_gdf.rename(columns = {"SEQ": "cell_id"}, inplace = True)

# print the number of records & columns, and also display the sample data
print(cell_gdf.shape)
cell_gdf.head(3)


(153533, 2)


Unnamed: 0,cell_id,geometry
0,181896,"POLYGON ((47.50000 -36.00000, 48.00000 -36.000..."
1,179016,"POLYGON ((47.50000 -34.00000, 48.00000 -34.000..."
2,186936,"POLYGON ((47.50000 -39.50000, 48.00000 -39.500..."


### Shape files for LME

In [24]:
# data is in a subfolder 'maps_shape_files' on the S3 bucket
# define CloudFront access location format for LME shape files data

lme_shape_data_key = 'maps_shape_files/fixed_geom_lme.zip'
lme_shape_data_location = '{}/{}'.format(cloudfront, lme_shape_data_key)

# load the LME shape files data using GeoPandas
# this returns a GeoDataFrame
lme_gdf = geopandas.read_file(lme_shape_data_location)

# print the number of records & columns, and also display the sample data
print(lme_gdf.shape)
lme_gdf.head(3)


(66, 7)


Unnamed: 0,OBJECT_ID,LME_NUMBER,LME_NAME,SHAPE_LENG,SHAPE_AREA,PROFILE_UR,geometry
0,1,23,Baltic Sea,219.326402,61.950021,http://lme.edc.uri.edu/index.php?option=com_co...,"POLYGON ((10.97944 54.38055, 10.96944 54.38055..."
1,2,52,Sea of Okhotsk,145.932989,213.796584,http://lme.edc.uri.edu/index.php?option=com_co...,"POLYGON ((156.70705 51.18858, 156.62592 51.160..."
2,3,22,North Sea,209.486403,103.965247,http://lme.edc.uri.edu/index.php?option=com_co...,"POLYGON ((5.27889 61.98027, 5.24917 61.97388, ..."


In [25]:
# drop columns not necessary for maps
# update this as needed

lme_gdf.drop(['OBJECT_ID', 'SHAPE_LENG', 'SHAPE_AREA', 'PROFILE_UR'], axis = 1, inplace = True)

# rename the LME columns to merge with spatial catch data 
# update this as needed
lme_gdf.rename(columns = {"LME_NAME": "lme"}, inplace = True)
lme_gdf.rename(columns = {"LME_NUMBER": "lme_id"}, inplace = True)

# print the number of records & columns, and also display the sample data
print(lme_gdf.shape)
lme_gdf.head(3)


(66, 3)


Unnamed: 0,lme_id,lme,geometry
0,23,Baltic Sea,"POLYGON ((10.97944 54.38055, 10.96944 54.38055..."
1,52,Sea of Okhotsk,"POLYGON ((156.70705 51.18858, 156.62592 51.160..."
2,22,North Sea,"POLYGON ((5.27889 61.98027, 5.24917 61.97388, ..."


<div class = "alert alert-block alert-danger">
<b>Data for Widgets:</b> <br>
<br>This section can be updated only if necessary, with caution.<br>
</div>

In [26]:
### Data for Start Year & End Year widgets

# get unique list of years from the dataframe
years = sorted(list(df_lme['year'].unique()))

# display first few records in the list
years[:5]


[1950, 1951, 1952, 1953, 1954]

In [27]:
### Data for Sector type widgets

# get unique list of Sector types from the dataframe
sector_types = sorted(list(df_lme['sector_type'].unique()))

# display first few records in the list
sector_types[:5]


['Artisanal', 'Industrial', 'Recreational', 'Subsistence']

In [28]:
### Data for Catch type widgets

# get unique list of Catch types from the dataframe
catch_types = sorted(list(df_lme['catch_type'].unique()))

# display first few records in the list
catch_types[:5]


['Discards', 'Landings']

In [29]:
### Data for Gear type widgets

# get unique list of Gear types from the dataframe
gear_types = sorted(list(df_lme['gear_type'].unique()))

# display first few records in the list
gear_types[:5]


['bottom trawl', 'gillnets', 'longline', 'other', 'pelagic trawl']

In [30]:
### dictionary with taxon id and their respective names

taxon_list = list(set(zip(taxon_lookup['scientific_name'], taxon_lookup['taxon_key'])))
taxon_list.sort()

# display first few records in the list
taxon_list[:5]


[('Abalistes stellaris', 600009),
 ('Ablennes hians', 600972),
 ('Abramis brama', 600268),
 ('Abudefduf', 509206),
 ('Abudefduf luridus', 611820)]