# Overview

This notebook prepares landings records from the Government of Greenland, Fisheries Department, Fisheries License Control Authority, for the purposes of reproducing the results of the sea ice fishing study.


## Load Packages


In [188]:
import datetime as dt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.covariance import EllipticEnvelope
from tqdm.notebook import tqdm
import json
from pathlib import Path

## Load and Clean Landings Records


In [189]:
landings = pd.read_csv(
    "../data/raw/landings_raw.csv",
    index_col=0,
    encoding="ISO-8859-1",
    low_memory=False,
    na_values=["I/O"],
)

### Create New Variable for the Vessel Type

TODO: Explain why needed and why some are discarded (mapped to `None`)


In [190]:
vessels = {
    "UJOLLE": None,
    "USLAEDE": None,
    "Dinghy": "water",
    "Larger Inshore Vessel": "water",
    "Sled": "ice",
    "Snowmobile": "ice",
    "ATV": None,
}
landings["vessel"] = landings.vessel_type
landings["vessel_type"] = landings.vessel.map(vessels)

### Correct Erroneous Localities Values


In [191]:
wrong_localities = {
    "Aappilattoq Nanortalimmi": "Nanortalik",
    "Aappilattoq Upernavimmi": "Upernavik",
    "Kuummiit": "Kuummiut",
    "Nuussuaq, Nuuk": "Nuuk",
    "Nuussuaq, Upernavik": "Upernavik",
    "Tasiusaq Nanortalimmi": "Nanortalik",
    "Tasiusaq, Nanortalimmi": "Nanortalik",
    "Tasiusaq, Upernavik": "Upernavik",
    "Tiileqilaaq": "Tiniteqilaaq",
}

landings.sellers_locality = landings.sellers_locality.replace(
    to_replace=wrong_localities
)
landings.buyers_locality = landings.buyers_locality.replace(to_replace=wrong_localities)

### Correct Gears


In [192]:
wrong_gears = {
    "Set gillnets": "Gill nets",
    "Gill nets": "Gill nets",
    "Longlines (not specified)": "Longlines",
}

landings["gears"] = landings.gear_en.replace(to_replace=wrong_gears)
landings = landings[landings["num_tools"] > 0]

### Drop Erroneous Records


In [193]:
# Some missing data are okay, but some are an immediate disqualifier for further analysis of that row
landings = landings.dropna(
    subset=["landing_date", "seller_id", "field_code", "sellers_locality"]
)
landings = landings[landings["fishing_time"] > 0]

### Clean Up Field Codes


In [194]:
field_codes = pd.read_csv("../resources/fieldcodes.csv")

In [195]:
landings["field_code"] = landings.field_code.str.replace("-", "")


def insert_zero_padding(field_code):
    try:
        return field_code[:2] + f"{int(field_code[2:]):03d}"
    except (ValueError, TypeError):
        return field_code


landings["field_code"] = landings.field_code.apply(insert_zero_padding)

In [196]:
landings = landings.drop(columns=["latitude", "longitude"])

In [197]:
landings = landings.merge(right=field_codes)

## Load Locality Information


In [198]:
locality_info = pd.read_csv("../resources/localities.csv")
locality_info = locality_info[["TEKST_GL", "KOMMUNE", "TYPE"]]
locality_info = locality_info.dropna()
locality_info

Unnamed: 0,TEKST_GL,KOMMUNE,TYPE
0,Nerlerit Inaat,Sermersooq,Bygd
2,Sirius,UFK,Station
3,Akunnaaq,Qaasuitsup,Bygd
4,Kitsissuarsuit,Qaasuitsup,Bygd
5,Ikkatteq,Sermersooq,Bygd
...,...,...,...
91,Daneborg,UFK,Station
93,Ikamiut,Qaasuitsup,Bygd
94,Nutaarmiut,Qaasuitsup,Bygd
95,"Ikerasaarsuk, Upernavik",Qaasuitsup,Bygd


In [199]:
landings = (
    landings.merge(locality_info, left_on="sellers_locality", right_on="TEKST_GL")
    .rename(
        columns={"KOMMUNE": "sellers_municipality", "TYPE": "sellers_settlement_size"}
    )
    .drop(columns="TEKST_GL")
)

## Create Derived Variables


### Create Seasonal Year


In [200]:
season_start_month = 8

landings["landing_date"] = pd.to_datetime(landings.landing_date)
landings["seasonal_year"] = landings.landing_date.apply(
    lambda x: x.year - 1 if x.month < season_start_month else x.year
)
landings = landings.query("seasonal_year >= 2012 & seasonal_year <= 2022")

### Calculate Catch Per Unit of Effort (CPUE)


In [201]:
landings["effective_num_tools"] = landings["num_tools"]
landings.loc[landings.gears == "Longlines", "effective_num_tools"] /= 100
landings["cpue"] = (
    landings.amount_in_kg / landings.effective_num_tools / landings.fishing_time
)
landings = landings.drop(columns="effective_num_tools")

### Calculate Price Per Kilo (DKK/kg)


In [202]:
landings["ppk"] = landings.value / landings.amount_in_kg
landings = landings.replace([np.inf, -np.inf], np.nan)
landings = landings.dropna(subset=["cpue", "ppk"])

### Outlier Removal


In [203]:
def is_outlier(s):
    if s.shape[0] < 3:  # For less than 3 data points, outliers cannot be detected
        return s != s
    return pd.Series(
        EllipticEnvelope(support_fraction=0.8).fit_predict(s.values.reshape(-1, 1))
        == -1,
        index=s.index,
    )


landings = landings[
    ~landings.groupby("sellers_municipality", group_keys=False)["cpue"].apply(
        is_outlier
    )
]
landings = landings[
    ~landings.groupby("sellers_municipality", group_keys=False)["ppk"].apply(is_outlier)
]
landings = landings[
    ~landings.groupby("sellers_municipality", group_keys=False)["value"].apply(
        is_outlier
    )
]

## Create Derived Dataframes


### Calculate Price Per Kilo According to Locality


In [204]:
landings.groupby(by=["sellers_locality", "seasonal_year"]).ppk.mean().to_csv(
    "../data/derived/ppk_locality.csv"
)

### Ice Landings and Water Landings


In [206]:
ice_landings = landings.query('vessel_type == "ice"')
water_landings = landings.query('vessel_type == "water"')

### Total Daily Catch


In [None]:
total_daily_catch = (
    ice_landings.groupby(by=["seasonal_year", "landing_date"])
    .amount_in_kg.sum()
    .reset_index()
)


def calc_seasonal_days(row):
    return (row["landing_date"] - dt.datetime(row["seasonal_year"], 8, 1)).days


total_daily_catch["seasonal_day"] = total_daily_catch[
    ["landing_date", "seasonal_year"]
].apply(calc_seasonal_days, axis="columns")

total_daily_catch["cumulative"] = (
    total_daily_catch.sort_values(by="seasonal_day")
    .groupby(by=["seasonal_year"])
    .amount_in_kg.cumsum()
)

total_daily_catch.to_csv("../data/total_daily_catch.csv")

### Local Daily Catch


In [None]:
local_daily_catch = (
    landings.query('vessel_type == "ice"')
    .groupby(by=["seasonal_year", "field_code", "landing_date", "sellers_locality"])
    .amount_in_kg.sum()
    .reset_index()
)


def calc_seasonal_days(row):
    return (row["landing_date"] - dt.datetime(row["seasonal_year"], 8, 1)).days


local_daily_catch["seasonal_day"] = local_daily_catch[
    ["landing_date", "seasonal_year"]
].apply(calc_seasonal_days, axis="columns")

local_daily_catch["cumulative"] = (
    local_daily_catch.sort_values(by="seasonal_day")
    .groupby(by=["seasonal_year", "field_code", "sellers_locality"])
    .amount_in_kg.cumsum()
)


local_daily_catch.to_csv("../data/local_daily_catch.csv")

### Local First Catch


In [None]:
local_first_catch = (
    local_daily_catch.groupby(by=["seasonal_year", "field_code", "sellers_locality"])
    .seasonal_day.min()
    .reset_index()
    .rename(columns={"seasonal_day": "first_catch_day", "seasonal_year": "season"})
)

local_first_catch.to_csv("../data/local_first_catch.csv")

### Local Last Catch


In [None]:
local_last_catch = (
    local_daily_catch.groupby(by=["seasonal_year", "field_code", "sellers_locality"])
    .seasonal_day.max()
    .reset_index()
    .rename(columns={"seasonal_day": "last_catch_day", "seasonal_year": "season"})
)
local_last_catch.to_csv("../data/local_last_catch.csv")

local_last_catch_dates

## Season Length (First/Last Catch Criteria)


In [None]:
total_first_catch = (
    total_daily_catch.groupby(by=["seasonal_year"])
    .seasonal_day.min()
    .reset_index()
    .rename(columns={"seasonal_day": "first_catch_day", "seasonal_year": "season"})
)

total_last_catch = (
    total_daily_catch.groupby(by=["seasonal_year"])
    .seasonal_day.max()
    .reset_index()
    .rename(columns={"seasonal_day": "last_catch_day", "seasonal_year": "season"})
)

total_ice_season_length = (
    total_last_catch.set_index("season").last_catch_day
    - total_first_catch.set_index("season").first_catch_day
)
total_ice_season_length = total_ice_season_length.reset_index(name="ice_season_length")

total_first_catch.to_csv("../data/total_first_catch.csv")
total_last_catch.to_csv("../data/total_last_catch.csv")
total_ice_season_length.to_csv("../data/total_ice_season_length.csv")

### First and Last Days, by Locality, 2012-2022


In [None]:
first_days = (
    local_daily_catch.groupby(by=["seasonal_year", "sellers_locality"])
    .seasonal_day.min()
    .reset_index()
    .merge(
        local_daily_catch[
            [
                "seasonal_year",
                "sellers_locality",
                "field_code",
                "seasonal_day",
                "landing_date",
                "amount_in_kg",
            ]
        ]
    )
    .rename(columns={"seasonal_day": "catch_day", "seasonal_year": "season"})
)

first_days["first_or_last"] = "first"

last_days = (
    local_daily_catch.groupby(by=["seasonal_year", "sellers_locality"])
    .seasonal_day.max()
    .reset_index()
    .merge(
        local_daily_catch[
            [
                "seasonal_year",
                "sellers_locality",
                "field_code",
                "seasonal_day",
                "landing_date",
                "amount_in_kg",
            ]
        ]
    )
    .rename(columns={"seasonal_day": "catch_day", "seasonal_year": "season"})
)

last_days["first_or_last"] = "last"

locality_seasons = pd.concat([first_days, last_days]).sort_values(
    ["season", "sellers_locality"]
)

field_code_info = pd.read_csv("../data/fieldcodes.csv")

locality_seasons = locality_seasons.merge(
    field_code_info, left_on="field_code", right_on="fieldcode"
).drop(columns="fieldcode")

locality_seasons = locality_seasons[
    [
        "season",
        "sellers_locality",
        "landing_date",
        "first_or_last",
        "catch_day",
        "amount_in_kg",
        "field_code",
        "lat",
        "lon",
    ]
].sort_values(["season", "sellers_locality"])

locality_seasons.to_csv("../data/locality_seasons.csv", index=None)

### Local First and Last Catch Days, 2012-2022


In [None]:
local_first_catch_dates = (
    local_daily_catch.groupby(by=["seasonal_year", "field_code", "sellers_locality"])
    .seasonal_day.min()
    .reset_index()
    .rename(columns={"seasonal_day": "first_catch_day", "seasonal_year": "season"})
)

local_first_catch_dates.to_csv("../data/local_first_catch_dates.csv")

local_last_catch_dates = (
    local_daily_catch.groupby(by=["seasonal_year", "field_code", "sellers_locality"])
    .seasonal_day.max()
    .reset_index()
    .rename(columns={"seasonal_day": "last_catch_day", "seasonal_year": "season"})
)

local_last_catch_dates.to_csv("../data/local_last_catch_dates.csv")

### Local Ice Season Length


In [None]:
local_ice_season_length = (
    local_last_catch.set_index(["season", "field_code"]).last_catch_day
    - local_first_catch.set_index(["season", "field_code"]).first_catch_day
).reset_index(name="ice_season_length")

local_ice_season_length

In [None]:
# todo
# derived
# Only consider fields that appear in every season
# fields_of_interest = set.intersection(*local_ice_season_length.groupby(by=['season']).field_code.unique().apply(lambda x: set(x)).values)


# local_ice_season_length[local_ice_season_length.field_code.isin(fields_of_interest)].groupby('field_code').corr(method='kendall').unstack()['season']['ice_season_length']

### Number of Fields Fished, 2012-2022


In [None]:
num_fields = (
    local_first_catch.groupby("season")
    .field_code.nunique()
    .rename("n_fields")
    .reset_index()
)

num_fields.to_csv("../data/num_fields.csv")

### Local Number of Fields Fished, 2012-2022


In [None]:
local_num_fields = (
    ice_landings.groupby(by=["seasonal_year", "sellers_locality"])
    .field_code.nunique()
    .reset_index()
    .rename(columns={"field_code": "n_fields"})
)
local_num_fields

local_num_fields.to_csv("../data/local_num_fields.csv")

### Fishing Grounds, by Locality, by Vessel Type, by Field Code, by Seasonal Year


In [None]:
fishing_grounds = (
    landings.groupby(
        by=["seasonal_year", "sellers_locality", "vessel_type", "field_code"]
    )
    .seller_id.nunique()
    .reset_index()
    .drop(columns="seller_id")
)

fishing_grounds["lon"] = fishing_grounds.field_code.map(
    lambda x: field_code_info[x]["lon"]
)
fishing_grounds["lat"] = fishing_grounds.field_code.map(
    lambda x: field_code_info[x]["lat"]
)

fishing_grounds.to_csv("../data/fishing_grounds.csv")