# Overview

This notebook prepares landings records from the Government of Greenland, Fisheries Department, Fisheries License Control Authority, for the purposes of reproducing the results of the sea ice fishing study.

## Load Packages

In [1]:
import datetime as dt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.covariance import EllipticEnvelope
from tqdm.notebook import tqdm
import json
from pathlib import Path



## Load and Clean Landings Records

In [2]:
landings = pd.read_csv('../data/landings_raw.csv', index_col=0, encoding = "ISO-8859-1", low_memory=False)

### Create Dictionary of Vessel Types

In [3]:
vessels = {
    'UJOLLE': None,
    'USLAEDE': None,
    'Dinghy': 'water',
    'Larger Inshore Vessel': 'water',
    'Sled': 'ice',
    'Snowmobile': 'ice',
    'ATV': None,
}
landings['vessel'] = landings.vessel_type
landings['vessel_type'] = landings.vessel.map(vessels)

### Correct Erroneous Localities Values

In [4]:
# Some localities have the wrong name
landings['sellers_locality'] = landings['sellers_locality'].replace({'Tasiusaq, Upernavik': 'Upernavik'})
landings['sellers_locality'] = landings['sellers_locality'].replace({'Kuummiit': 'Kuummiut'})
landings['sellers_locality'] = landings['sellers_locality'].replace({'Tiileqilaaq': 'Tiniteqilaaq'})

# Merge localities 
landings.loc[landings['sellers_locality'] == 'Aappilattoq Nanortalimmi', 'sellers_locality'] = 'Nanortalik'
landings.loc[landings['sellers_locality'] == 'Aappilattoq Upernavimmi', 'sellers_locality'] = 'Upernavik'
landings.loc[landings['sellers_locality'] == 'Nuussuaq, Nuuk', 'sellers_locality'] = 'Nuuk'
landings.loc[landings['sellers_locality'] == 'Nuussuaq, Upernavik', 'sellers_locality'] = 'Upernavik'
landings.loc[landings['sellers_locality'] == 'Tasiusaq Nanortalimmi', 'sellers_locality'] = 'Nanortalik'
landings.loc[landings['sellers_locality'] == 'Tasiusaq, Upernavik', 'sellers_locality'] = 'Tasiusaq'
landings.loc[landings['sellers_locality'] == 'Kuummiit', 'sellers_locality'] = 'Kuummiut'
landings.loc[landings['buyers_locality'] == 'Aappilattoq Nanortalimmi', 'buyers_locality'] = 'Nanortalik'
landings.loc[landings['buyers_locality'] == 'Aappilattoq Upernavimmi', 'buyers_locality'] = 'Upernavik'
landings.loc[landings['buyers_locality'] == 'Nuussuaq, Nuuk', 'buyers_locality'] = 'Nuuk'
landings.loc[landings['buyers_locality'] == 'Nuussuaq, Upernavik', 'buyers_locality'] = 'Upernavik'
landings.loc[landings['buyers_locality'] == 'Tasiusaq Nanortalimmi', 'buyers_locality'] = 'Nanortalik'
landings.loc[landings['buyers_locality'] == 'Tasiusaq, Upernavik', 'buyers_locality'] = 'Tasiusaq'
landings.loc[landings['buyers_locality'] == 'Kuummiit', 'buyers_locality'] = 'Kuummiut'

print(f'There are {landings.shape[0]} entries in the landings dataset.')

There are 1662470 entries in the landings dataset.


### Drop Erroneous Records

In [5]:
landings.dropna(subset= "landing_date", inplace =True)
landings = landings.dropna(subset='num_tools')
landings = landings[landings["num_tools"] > 0]

In [6]:
landings.shape

(1630798, 24)

In [7]:
landings = landings[~((landings["gear_en"] == "Longlines") & (landings["num_tools"] < 30))]

In [8]:
landings = landings[landings["fishing_time"] > 0]

### Correct Spatial Data Values

In [9]:
# Clean up degree symbols
landings.latitude = landings.latitude.str.replace('¡', '°')
landings.longitude = landings.longitude.str.replace('¡', '°')

# Some missing data are okay, but some are an immediate disqualifier for further analysis of that row
landings.dropna(subset=['seller_id'], inplace=True)

field_codes = pd.read_csv('../data/fieldcodes.csv', index_col='fieldcode')
field_codes = field_codes.to_dict(orient='index')

def lookup_lat_lon(row):
    """ Find latitude and longitude based on field code """
    try: 
        row['latitude'] = field_codes[row['field_code']]['lat']
        row['longitude'] = field_codes[row['field_code']]['lon']
    except KeyError:
        row['latitude'] = None
        row['longitude'] = None
    return row

landings = landings.apply(lookup_lat_lon, axis='columns')

## Load Fieldcodes

In [10]:
field_code_info = pd.read_csv('../data/fieldcodes.csv', index_col='fieldcode').to_dict(orient='index')


In [11]:
landings['field_code'] = landings.field_code.str.replace('-', '')

In [12]:
def insert_zero_padding(field_code):
    try:
        return field_code[:2] + f'{int(field_code[2:]):03d}'                
    except (ValueError, TypeError):
        return field_code
    
landings['field_code'] = landings.field_code.apply(insert_zero_padding)

In [13]:
bad_field_codes = []
for field_code in landings.field_code.unique():
    if not field_code in field_code_info.keys():
        bad_field_codes.append(field_code)

bad_field_codes

[nan,
 'I/O',
 'JO027',
 'BL021',
 'LMD017',
 'MI029',
 'MI019',
 'AG024',
 'LN287',
 'I/(O',
 'JO025',
 'I/000',
 'LC025',
 'LO020',
 'LD021LD021',
 'J0028',
 'A6048',
 'GK057',
 'UJ021',
 'ZF019',
 'LE',
 'CP020',
 'LHJ02',
 'LLJ02',
 'A6047',
 'LGG02',
 'KS',
 'LG',
 'AA016']

In [14]:
landings = landings[~landings.field_code.isin(bad_field_codes)]

## Create Derived Variables

### Create Seasonal Year

In [15]:
landings['landing_date'] =  pd.to_datetime(landings.landing_date)
landings['seasonal_year'] = landings.landing_date.apply(lambda x: x.year - 1 if x.month < 8 else x.year)
landings = landings.query("seasonal_year > 2011 & seasonal_year < 2023")


### Calculate Catch Per Unit of Effort (CPUE)

In [16]:
def calculate_cpue(landing):
    if landing.gear_en == "Longlines":
        landing.cpue = landing.amount_in_kg / (landing.num_tools / 100) / landing.fishing_time
    elif landing.gear_en == "Gill nets":
        landing.cpue = landing.amount_in_kg / landing.num_tools / landing.fishing_time
    return landing
landings["cpue"] = None

landings = landings.apply(calculate_cpue, axis=1)
landings.shape 

(1520208, 26)

### Calculate Price Per Kilo (DKK/kg)

In [17]:
landings['ppk'] = landings.value / landings.amount_in_kg
landings.replace([np.inf, -np.inf], np.nan, inplace=True)
landings.dropna(subset=['cpue', 'ppk'], inplace=True)

## Create Derived Dataframes

### Calculate Price Per Kilo According to Locality

In [24]:
ppk_locality = landings.groupby(by=['sellers_locality', 'seasonal_year']).ppk.mean().reset_index()

ppk_locality

ppk_locality.to_csv('ppk_locality.csv')

