This notebook will be use to explore the metadata of the Fishing Global Watch of a Earth Engine Snippet (GFW/GFF/V1/fishing_hours)

Metadata stands for data about data. In case of images, metadata means details about the image and its production. Some metadata is generated automatically by the capturing device. 



In [4]:
# Import packages
import ee
import geemap
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Authenticate google earth engine
#ee.Authenticate()

In [5]:
# Initialize google earth engine 
ee.Initialize()

In [6]:
# read in the data on fishing hours from google earth engine
dataset = ee.ImageCollection('GFW/GFF/V1/fishing_hours')
# read in data on total vessel hours from google earth engine
dataset2 = ee.ImageCollection('GFW/GFF/V1/vessel_hours')

In [7]:
dataset

<ee.imagecollection.ImageCollection at 0x7f9be015ded0>

In [8]:
dataset2

<ee.imagecollection.ImageCollection at 0x7f9be015e1d0>

In [9]:
# Create a filter for the dates
elnino_data = dataset.filter(ee.Filter.date('2015-01-01', '2016-01-01'))
elnino_data

<ee.imagecollection.ImageCollection at 0x7f9be015dfc0>

In [10]:
u_lon = -80
u_lat = -10
x = ee.Geometry.Point([u_lon, u_lat])

In [11]:
# See image properties with their names and values

fishing_image = ee.ImageCollection('GFW/GFF/V1/fishing_hours') \
    .filterBounds(x) \
    .filter(ee.Filter.date('2015-01-01', '2015-01-10')) \
    .first()

In [12]:
fishing_props = geemap.image_props(fishing_image)
fishing_props.getInfo()


{'IMAGE_DATE': '2015-01-01',
 'NOMINAL_SCALE': 1113.1949079327358,
 'country': 'ARG',
 'system:asset_size': '0.015705 MB',
 'system:band_names': ['drifting_longlines',
  'fixed_gear',
  'other_fishing',
  'purse_seines',
  'squid_jigger',
  'trawlers'],
 'system:id': 'GFW/GFF/V1/fishing_hours/ARG-20150101',
 'system:index': 'ARG-20150101',
 'system:time_end': '2015-01-02 00:00:00',
 'system:time_start': '2015-01-01 00:00:00',
 'system:version': 1518618589881092}

In [39]:
country = fishing_props.get('country')
print('Country:', country.getInfo())

Country: ARG


In [14]:
properties = fishing_image.propertyNames()
print('Metadata properties: ', properties.getInfo())

Metadata properties:  ['system:time_start', 'country', 'system:footprint', 'system:time_end', 'system:version', 'system:id', 'system:asset_size', 'system:index', 'system:bands', 'system:band_names']


In [15]:
def flag_totals(gee_snipet, lon, lat, flag, start_date, end_date):
    dataset_name = ee.ImageCollection(gee_snipet)
    area = ee.Geometry.Point([lon, lat])
    collection = (dataset_name
                  .filterMetadata('country', 'equals', flag)
                  .filterDate(start_date, end_date)
                  .filter(ee.Filter.bounds(area))
                  .sum())
    return ee.Image(collection)

flag_totals('GFW/GFF/V1/fishing_hours',-80.5,-10.5, "ARG",'2016-01-01','2017-01-01')

<ee.image.Image at 0x7f9be0203c10>

In [38]:
arg = flag_totals('GFW/GFF/V1/fishing_hours',-80.5,-10.5, "ARG",'2016-01-01','2017-01-01')
print(type(arg))
arg_all = arg.reduce(ee.Reducer.sum())
print(type(arg_all))
#print(arg_all)

<class 'ee.image.Image'>
<class 'ee.image.Image'>


In [35]:
arg_drift_lines = arg.select('drifting_longlines')
type(arg_drift_lines)

#print(arg_drift_lines)

ee.image.Image

In [31]:
raw = pd.read_csv('fishing-vessels-v1.csv')
raw.shape

raw

Unnamed: 0,mmsi,flag,geartype,length,tonnage,engine_power,active_2012,active_2013,active_2014,active_2015,active_2016
0,603100157,AGO,trawlers,32.808468,299.003814,733.826977,False,False,False,True,True
1,603100137,AGO,trawlers,34.568782,395.683171,864.960188,False,False,False,True,True
2,603100161,AGO,trawlers,28.822140,263.849149,651.809642,False,False,False,True,True
3,603100174,AGO,trawlers,30.721429,299.700916,703.796086,False,False,False,True,True
4,603100164,AGO,trawlers,37.479248,405.967747,850.976640,False,False,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...
73004,601764000,ZAF,drifting_longlines,17.032258,52.550920,207.818629,False,True,True,True,True
73005,601089100,ZAF,drifting_longlines,22.915622,180.506028,527.507463,False,False,False,True,True
73006,601849000,ZAF,drifting_longlines,19.692080,86.485661,362.553279,False,False,True,True,True
73007,601183700,ZAF,drifting_longlines,31.872860,307.446773,977.330342,False,False,True,True,True


In [20]:
# Create dataframe of 70,000 rows
# The MMSI number (Maritime Mobile Service Identity) is a nine-digit number for vessel identification. 
raw_sample = raw.sample(n=70000, replace=False, random_state=42)
raw_sample


# Description: 
#This table includes all mmsi that are included in the fishing effort data. There is only one row for each mmsi.
#Includes all vessels that were identified as fishing vessels by the neural network and which were not identified as non-fishing vessels by registries and manual review. 
#If an mmsi was matched to a fishing vessel on a registry, but the neural net did not classify it as a fishing vessel, it is not included on this list.

Unnamed: 0,mmsi,flag,geartype,length,tonnage,engine_power,active_2012,active_2013,active_2014,active_2015,active_2016
62254,440306340,KOR,drifting_longlines,21.182532,65.050847,327.646697,False,True,True,True,True
24333,800037033,CHN,trawlers,18.682916,61.637528,288.314884,False,False,True,True,True
1230,316007519,CAN,purse_seines,21.687323,43.000000,250.203429,True,True,True,True,True
18576,965000123,CHN,trawlers,24.244807,97.601110,312.918558,False,False,False,False,True
31642,412414087,CHN,fixed_gear,22.012319,53.212939,211.590235,False,False,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...
7659,872415232,CHN,trawlers,26.534121,118.072952,238.748890,True,True,True,True,True
46169,414156183,CHN,purse_seines,19.188585,54.696560,246.444619,False,False,False,False,True
38458,1537,CHN,fixed_gear,9.699666,10.147057,117.551010,False,True,False,True,True
49429,412699450,CHN,drifting_longlines,29.100000,134.345449,339.000000,False,False,False,True,True


In [21]:
# Check data types
raw_sample.dtypes

mmsi              int64
flag             object
geartype         object
length          float64
tonnage         float64
engine_power    float64
active_2012        bool
active_2013        bool
active_2014        bool
active_2015        bool
active_2016        bool
dtype: object

In [22]:
# Check for null values
raw_sample.isnull().sum()

mmsi            0
flag            0
geartype        2
length          1
tonnage         1
engine_power    9
active_2012     0
active_2013     0
active_2014     0
active_2015     0
active_2016     0
dtype: int64

In [23]:
roi = ee.Geometry.Polygon([[-84.997101, -12.93982],
      [-84.997101, -3.79797],
      [-76.295929, -3.79797],
      [-76.295929, -12.93982],
      [-84.997101, -12.93982]])

In [33]:
# Check for even representation of vessels
raw['flag'].value_counts().sort_values(ascending=False)

CHN    49039
NOR     2229
USA     2180
ESP     1953
ITA     1588
       ...  
TCD        1
CUB        1
GLP        1
TON        1
STP        1
Name: flag, Length: 128, dtype: int64