In [1]:
from copy import deepcopy as copy
import datetime
import json
from math import ceil
import multiprocessing
import logging
import operator
import os
import random
import sys
import time
import typing
import warnings

import joblib
from joblib import delayed, Parallel
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from numpy.core.numeric import outer
import pandas as pd
from scipy.stats import mode, entropy
import seaborn as sns
import sklearn.feature_selection
import sklearn.metrics
from sklearn.metrics import roc_auc_score
import sklearn.model_selection
import typer
from tqdm.auto import tqdm

import src

In [2]:
# Only ships not selected for mobility network.
portcalls = joblib.load('data/portcalls_v3.pkl')
ships = np.load('data/not_selected.npy', allow_pickle=True)

expert_labels = portcalls.groupby('ship')['risk'].max()
y_score = pd.Series({ship: expert_labels.get(ship) for ship in ships})

inspections = src.get_inspections().groupby('IMO')['WasDetained'].any().replace({False: 1, True: 2})
y_true = pd.Series({ship: inspections.get(ship, default=0) for ship in ships})

sensitive = portcalls.groupby('ship')['flag'].last().astype(int).astype(bool)
s = pd.Series({ship: sensitive.get(ship) for ship in ships})
ys = 3*s + y_true

# ALL ships - also selected ones for the mobility network!
all_ships = portcalls.ship.unique()
y_true_all = pd.Series({ship: inspections.get(ship, default=0) for ship in all_ships})
y_score_all = pd.Series({ship: expert_labels.get(ship) for ship in all_ships})
s_all = pd.Series({ship: sensitive.get(ship) for ship in all_ships})

# Distribution targets on whole data

In [3]:
display(y_true_all.replace({0: 'compliant', 1: 'deficiencies', 2: 'detention+deficiencies'}).value_counts().sort_index())
print()
display(y_true_all.replace({0: 'compliant', 1: 'deficiencies', 2: 'detention+deficiencies'}).value_counts(normalize=True).sort_index())

compliant                  6743
deficiencies              21088
detention+deficiencies     1631
dtype: int64




compliant                 0.228871
deficiencies              0.715769
detention+deficiencies    0.055359
dtype: float64

# Ship flag history

In [4]:
result = {'always_pos': 0, 'always_neg': 0, 'changed': 0}
for ship, group in portcalls.groupby('ship')['flag']:
    if group.all():
        result['always_pos'] += 1
    elif (~group).all():
        result['always_neg'] += 1
    else:
        result['changed'] += 1
result = pd.Series(result)

display(result)
print()
display(result / result.sum())

always_pos     1473
always_neg    27260
changed         729
dtype: int64




always_pos    0.049997
always_neg    0.925260
changed       0.024744
dtype: float64

# Distribution target
## All data

In [5]:
data = {
    'sensitive': y_true_all[s_all].replace({0: 'compliant', 1: 'deficiencies', 2: 'detention+deficiencies'}).value_counts(normalize=True).sort_index(),
    'non-sensitive': y_true_all[~s_all].replace({0: 'compliant', 1: 'deficiencies', 2: 'detention+deficiencies'}).value_counts(normalize=True).sort_index(),
    'total': y_true_all.replace({0: 'compliant', 1: 'deficiencies', 2: 'detention+deficiencies'}).value_counts(normalize=True).sort_index()
}
pd.DataFrame(data)

Unnamed: 0,sensitive,non-sensitive,total
compliant,0.195664,0.231149,0.228871
deficiencies,0.554204,0.726851,0.715769
detention+deficiencies,0.250132,0.042001,0.055359


## Only data not used in global cargo ship network

In [6]:
data = {
    'sensitive':     y_true[s ].replace({0: 'compliant', 1: 'deficiencies', 2: 'detention+deficiencies'}).value_counts(normalize=True).sort_index(),
    'non-sensitive': y_true[~s].replace({0: 'compliant', 1: 'deficiencies', 2: 'detention+deficiencies'}).value_counts(normalize=True).sort_index(),
    'total':         y_true.replace({0: 'compliant', 1: 'deficiencies', 2: 'detention+deficiencies'}).value_counts(normalize=True).sort_index()
}
pd.DataFrame(data)

Unnamed: 0,sensitive,non-sensitive,total
compliant,0.17297,0.214599,0.211973
deficiencies,0.568506,0.742206,0.73125
detention+deficiencies,0.258524,0.043195,0.056776


# Distribution sensitive attribute

## All data

In [7]:
pd.Series(np.bincount(s_all) / len(s_all), index=['white flag', 'non-white flag'])

white flag        0.935816
non-white flag    0.064184
dtype: float64

## Only data not used in global cargo ship network

In [8]:
pd.Series(np.bincount(s) / len(s), index=['white flag', 'non-white flag'])

white flag        0.936928
non-white flag    0.063072
dtype: float64

# Performance

In [9]:
skf = sklearn.model_selection.StratifiedKFold(shuffle=True, random_state=42)
performance = [sklearn.metrics.roc_auc_score(y_true.iloc[test] > 0, y_score.iloc[test]) for _, test in skf.split(y_true, ys)]
print(f"AUC_y: {np.mean(performance):.3f}+-{np.std(performance):.3f}")

AUC_y: 0.543+-0.006


In [10]:
skf = sklearn.model_selection.StratifiedKFold(shuffle=True, random_state=42)
performance = [sklearn.metrics.roc_auc_score(s.iloc[test] > 0, y_score.iloc[test]) for _, test in skf.split(y_true, ys)]
print(f"AUC_y: {np.mean(performance):.3f}+-{np.std(performance):.4f}")

AUC_y: 0.672+-0.0097


# Confusion matrix

In [11]:
assert all(y_score.index == y_score.index)
confusion_matrix_sensitive = pd.DataFrame(
    data=sklearn.metrics.confusion_matrix(y_true[s], y_score[s]),
    index=pd.Index(['compliant', 'minor deficiencies', 'detention'], name='inspection result'),
    columns=pd.Index(['low', 'medium', 'high'], name='Expert label')
)
confusion_matrix_non_sensitive = pd.DataFrame(
    data=sklearn.metrics.confusion_matrix(y_true[~s], y_score[~s]),
    index=pd.Index(['compliant', 'minor deficiencies', 'detention'], name='inspection result'),
    columns=pd.Index(['low', 'medium', 'high'], name='Expert label')
)
pd.concat({'sensitive': confusion_matrix_sensitive, 'non sensitive': confusion_matrix_non_sensitive}, axis=1)

Unnamed: 0_level_0,sensitive,sensitive,sensitive,non sensitive,non sensitive,non sensitive
Expert label,low,medium,high,low,medium,high
inspection result,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
compliant,1,261,17,319,4774,49
minor deficiencies,0,620,297,682,16370,732
detention,0,150,267,1,788,246


# Fairness measures

NOTE SWAPPING! From `y=0` (compliant), `y=1` (deficiency), `y=2` (detention) to `y=False` (non-compliant) and `y=True` (compliant)

In [12]:
data = [
    {'measure': 'PPR', 'group': 'sensitive',     'value': (y_score <= 1)[s ].mean()},
    {'measure': 'PPR', 'group': 'non sensitive', 'value': (y_score <= 1)[~s].mean()},
    {'measure': 'FPR', 'group': 'sensitive',     'value': (y_score <= 1)[s  & ~y_true].mean()},
    {'measure': 'FPR', 'group': 'non sensitive', 'value': (y_score <= 1)[~s & ~y_true].mean()},
    {'measure': 'TPR', 'group': 'sensitive',     'value': (y_score <= 1)[s  &  y_true].mean()},
    {'measure': 'TPR', 'group': 'non sensitive', 'value': (y_score <= 1)[~s &  y_true].mean()},
]
data = pd.DataFrame(data).pivot('measure', 'group', 'value')
data.round(3).reindex(['PPR', 'TPR', 'FPR'], axis=0).reindex(['sensitive', 'non sensitive'], axis=1)

group,sensitive,non sensitive
measure,Unnamed: 1_level_1,Unnamed: 2_level_1
PPR,0.64,0.957
TPR,0.676,0.959
FPR,0.592,0.952


In [13]:
ε_impact = 1-(data.at['PPR', 'non sensitive'] / data.at['PPR', 'sensitive'])
ε_odds = max(
    [
        abs(data.at['TPR', 'non sensitive'] - data.at['TPR', 'sensitive']),
        abs(data.at['FPR', 'non sensitive'] - data.at['FPR', 'sensitive'])
    ]
)   
{'ε_impact': ε_impact, 'ε_odds': ε_odds}

{'ε_impact': -0.49599292325481104, 'ε_odds': 0.36028816577626677}