### Setup

In [1]:
# Uncomment this cell if necessary libraries are not already installed

# ! pip install --upgrade pip

# ! pip install altair

# ! pip install pandas


### Import Libraries

In [2]:
import pandas as pd

import altair as alt


In [3]:
# disable the default 5000 limit on number of rows
alt.data_transformers.disable_max_rows()

# enable renderer
alt.renderers.enable('default')


RendererRegistry.enable('default')

### CloudFront domain

In [4]:
# define CloudFront domain name (to access S3)

cloudfront = 'https://d1b4l80qig6dum.cloudfront.net'
cloudfront


'https://d1b4l80qig6dum.cloudfront.net'

### Fishing entity selection

In [5]:
# this selection will  be based on the user input in concolidated final notebook

fishing_entity_selected = 'Canada'
fishing_entity_selected


'Canada'

### Load EEZ data from S3 based on the fishing entity selection

In [6]:
# create the EEZ filename to be loaded based on fishing entity selected

filename_eez = str(fishing_entity_selected) + '_fishing_entity_eez_data.csv'
filename_eez


'Canada_fishing_entity_eez_data.csv'

In [7]:
# data is in a subfolder 'fishing_entity_data' on the S3 bucket
# define CloudFront access location format for EEZ data

eez_data_key = 'fishing_entity_data/' + filename_eez
eez_data_location = '{}/{}'.format(cloudfront, eez_data_key)

# load the EEZ data using Pandas
df_eez = pd.read_csv(eez_data_location)

# print the number of records & columns, and also display the sample EEZ data
print(df_eez.shape)
df_eez.head(3)


(11222726, 12)


Unnamed: 0,fishing_entity_id,cell_id,lon,lat,eez_id,taxon_key,reporting_status_id,sector_type_id,catch_type_id,gear_type_id,year,catch_sum
0,26,16770,-75.25,78.25,924,100039,1,1,1,1,2006,0.904239
1,26,16770,-75.25,78.25,924,100039,1,1,1,1,2007,0.956645
2,26,16770,-75.25,78.25,924,100039,1,1,1,1,2008,0.835748


### Sector type lookup by ID

In [8]:
# data is in a subfolder 'reference_tables' on the S3 bucket
# define CloudFront access location format for SECTOR TYPE lookup data

sector_lookup_key = 'reference_tables/sector_type.csv'
sector_lookup_data_location = '{}/{}'.format(cloudfront, sector_lookup_key)

# load the SECTOR TYPE lookup data using Pandas
sector_lookup = pd.read_csv(sector_lookup_data_location)

# rename column name to sector_type
sector_lookup.rename(columns = {"name": "sector_type"}, inplace = True)

# print the number of records & columns, and also display the sample data
print(sector_lookup.shape)
sector_lookup.head()


(4, 2)


Unnamed: 0,sector_type_id,sector_type
0,1,Industrial
1,2,Subsistence
2,3,Artisanal
3,4,Recreational


### Aggregate catch by sector type ID and merge with lookup to get Sector type name

In [9]:
# aggregate the catch_sum for every Sector type by year

df_sector_catch = df_eez.groupby(['sector_type_id', 'year'], as_index = False)[['catch_sum']].sum().copy()
df_sector_catch.head(3)


Unnamed: 0,sector_type_id,year,catch_sum
0,1,1950,652905.375452
1,1,1951,677837.133408
2,1,1952,635820.514578


In [10]:
# merge above data by id to get the sector type name

df_sector_catch = pd.merge(df_sector_catch, sector_lookup, on = 'sector_type_id', how = 'inner')

# print the number of records & columns, and also display the sample LME data
print(df_sector_catch.shape)
df_sector_catch.head()


(276, 4)


Unnamed: 0,sector_type_id,year,catch_sum,sector_type
0,1,1950,652905.375452,Industrial
1,1,1951,677837.133408,Industrial
2,1,1952,635820.514578,Industrial
3,1,1953,707227.293922,Industrial
4,1,1954,939536.957707,Industrial


In [11]:
# verify if all sector type names are loaded (print # of records for each)

df_sector_catch['sector_type'].value_counts()


Industrial      69
Subsistence     69
Artisanal       69
Recreational    69
Name: sector_type, dtype: int64

### Altair visualization

In [14]:
alt.Chart(df_sector_catch).mark_area().encode(
            x = alt.X('year:N', 
                      title = 'Year', 
                      axis = alt.Axis(titleFontSize = 14, 
                                          labelFontSize = 14,
                                          tickCount = df_sector_catch.shape[0],
                                          grid = False,
                                          labelExpr = "datum.value % 2 ? null : datum.label")),
            y = alt.Y('sum(catch_sum)', 
                      title = 'Total Catch', 
                      axis = alt.Axis(titleFontSize = 14, 
                                      labelFontSize = 14)),
            color = alt.Color('sector_type:N', 
                              legend = alt.Legend(title = 'Sector type', 
                                                  titleFontSize = 16, 
                                                  labelFontSize = 16, 
                                                  labelColor = 'steelblue', 
                                                  titleColor = 'steelblue')),
            row = "sector_type:N",
            tooltip = ['year', 'sector_type', 'catch_sum']
        ).properties(
            width = 600,
            height = 150,
            title = 'Total Catch by Sector type trend'
        ).resolve_scale(y = 'independent'
        ).configure_legend(orient = 'right'
        ).configure_title(fontSize = 18, color = 'teal'
        ).configure_axis(titleColor = 'steelblue')
