In [None]:
import numpy as np
import pickle
import pandas as pd
from sklearn.metrics import accuracy_score, explained_variance_score

In [None]:
def post_classification(y,number=4):
    '''
        y: predictions
        number: points to check
    '''
        
    if number == 0:
        return y

    new_y = []
    
    for w in range(number+1): #add the first n points of the classification, as they are not verified in the for cycle
        new_y.append(y[w])
    
    for i in range(number+1, len(y)-number):
    
        diff_atras = []
        diff_frente = []

        for j in range(1, number+1): # verifies the number of points after 
        
            if y[i] != y[i+j]:
            
                diff_frente.append(True)
            else:
                
                diff_frente.append(False)
                
            if y[i] != new_y[i-j]:  #verifies the number of points behind 
            
                diff_atras.append(True)
            else:
                diff_atras.append(False)
        
        if (diff_atras.count(True) == number and diff_frente.count(True) == number) or \
            (diff_atras.count(True) == number and diff_frente[-1] == True) :

            if y[i] == 0:
                new_y.append(1.0)
            else:
                new_y.append(0.0)
        
        elif y[i] == 0: 
            new_y.append(0.0)
       
        else:
            new_y.append(1.0)
    
    for w in range(1,number+1): #add the last n points of the classification, as they are not verified in the for cycle
        
        new_y.append(y[-w])
        
    return np.array(new_y)

In [None]:
def calculate_time_difference(end_time, start_time):
    # Convert numpy.datetime64 objects to datetime objects

    # Calculate the time difference in seconds
    time_difference = (end_time - start_time).astype(int) // 10**9

    return time_difference

In [None]:
def fish_events(events,temps, threshold):
    '''
        events: target (fishing or not fishing)
        temps: numpy datetime of the series
        threshold: thresholds between fishing events of the established gears (See paper) 

    '''

    # identifies the fishing events
    idx_fish = np.where(events == 1)[0]

    # identifies the datetimes of those events
    time_fish = np.array(temps)[idx_fish]

    fishing_events = []
    event = 1

    for j in range(len(idx_fish)-1):

        # Estimates the time betweenn fishing events. if this difference is bigger than the minimum threshold than the next sequence of fishing activities is considered a new fishing event
        if ((idx_fish[j+1]) - idx_fish[j]) * calculate_time_difference(time_fish[j+1],time_fish[j]) > threshold:
            fishing_events.append(event)
            event += 1
        else:
            fishing_events.append(event)
    
    fishing_events.append(event)
    unique_events = np.unique(fishing_events)

    return len(unique_events)

In [None]:
# Variables used in the model
to_train = ['Latitude','Longitude','MovingAverageN_10','Hours','Month']

# Defines the model

model = pickle.load("model") # trained model

# Thresholds
metiers = dict()

own_data = pd.read_csv()

# RF was the model used and true fishing the real values of the target variable 
table = pd.DataFrame(columns=['Boat','TrueFishing','RF','Isolated1', 'Isolated2', 'Isolated3', 'Isolated4', 'Isolated5', 'Isolated6', 'Isolated7', 'Isolated8', 'Isolated9', 'Isolated10'])

for boat in pd.unique(own_data['boat_trip_id']):
    results = []
    
    tmp = own_data[own_data['boat.trip']==boat]
    results.append(boat)

    metier = tmp['gear'].iloc[0] + tmp['zone'].iloc[0]
    
    # This part switches the thresholds depending on the metier
    if metier == 'Bivalve DredgesNorthWest':

        threshold = metiers['GNW']
    
    elif metier == 'Bivalve DredgesSouthWest':
        threshold = metiers['GSW']
    
    elif metier == 'Bivalve DredgesSouth':
        threshold = metiers['GS']
    
    elif metier == 'Octopus Traps & PotsSouth':
        threshold = metiers['CS']

        
    
    results.append(len(pd.unique(tmp['fish.event']))-1)

    # Established the different neighbours
    for i in range(0,11):

        pred = model.predict(tmp[to_train])

        if i > 0 and len(pred) > i*2:
            pred = post_classification(pred,i) 

        fishEvent = fish_events(pred,tmp['DateTime'].copy().reset_index(drop = True),threshold)
        results.append(fishEvent)
        
    table.loc[len(table)] = results
    

# Estimates the number of overestimated and underestimated points as well the explained variance

Only RF is not commented

In [None]:
import matplotlib.pyplot as plt
acc_rf, acc_grbo, acc_xgbo, acc_rfB, acc_grboB, acc_xgboB = [], [], [], [], [], []
over_rf, over_grbo, over_xgbo, over_rfB, over_grboB, over_xgboB = [], [], [], [], [], []
under_rf, under_grbo, under_xgbo, under_rfB, under_grboB, under_xgboB = [], [], [], [], [], []
for col in table.columns[2:]:

    acc_rf.append(explained_variance_score(table['TrueFishing'],table[col]))
    over_rf.append(len(np.argwhere(np.array(table[col])>np.array(table['TrueFishing']))))
    under_rf.append((len(np.argwhere(np.array(table[col])<np.array(table['TrueFishing'])))))

#for col in table_GrBo.columns[2:]:
#
#    acc_grbo.append(explained_variance_score(table_GrBo['TrueFishing'],table_GrBo[col]))
#    over_grbo.append(len(np.argwhere(np.array(table_GrBo[col])>np.array(table_GrBo['TrueFishing']))))
#    under_grbo.append((len(np.argwhere(np.array(table_GrBo[col])<np.array(table_GrBo['TrueFishing'])))))
#
#for col in table_XGBo.columns[2:]:
#
#    acc_xgbo.append(explained_variance_score(table_XGBo['TrueFishing'],table_XGBo[col]))
#    over_xgbo.append(len(np.argwhere(np.array(table_XGBo[col])>np.array(table_XGBo['TrueFishing']))))
#    under_xgbo.append((len(np.argwhere(np.array(table_XGBo[col])<np.array(table_XGBo['TrueFishing'])))))
#
#for col in table_XGBoBoat.columns[2:]:
#
#    acc_xgboB.append(explained_variance_score(table_XGBoBoat['TrueFishing'],table_XGBoBoat[col]))
#    over_xgboB.append(len(np.argwhere(np.array(table_XGBoBoat[col])>np.array(table_XGBoBoat['TrueFishing']))))
#    under_xgboB.append((len(np.argwhere(np.array(table_XGBoBoat[col])<np.array(table_XGBoBoat['TrueFishing'])))))
#
#for col in table_GrBoBoat.columns[2:]:
#
#    acc_grboB.append(explained_variance_score(table_GrBoBoat['TrueFishing'],table_GrBoBoat[col]))
#    over_grboB.append(len(np.argwhere(np.array(table_GrBoBoat[col])>np.array(table_GrBoBoat['TrueFishing']))))
#    under_grboB.append((len(np.argwhere(np.array(table_GrBoBoat[col])<np.array(table_GrBoBoat['TrueFishing'])))))
#
#for col in table_RaFoBoat.columns[2:]:
#
#    acc_rfB.append(explained_variance_score(table_RaFoBoat['TrueFishing'],table_RaFoBoat[col]))
#    over_rfB.append(len(np.argwhere(np.array(table_RaFoBoat[col])>np.array(table_RaFoBoat['TrueFishing']))))
#    under_rfB.append((len(np.argwhere(np.array(table_RaFoBoat[col])<np.array(table_RaFoBoat['TrueFishing'])))))

# Data Visualization

note that only RF can be plotted in this case due to the restrictions above

In [None]:
acc = [acc_rf,acc_xgbo,acc_grbo, acc_rfB, acc_xgboB, acc_grboB]
over = [over_rf,over_xgbo, over_grbo, over_rfB, over_xgboB, over_grboB]
under = [under_rf, under_xgbo, under_grbo, under_rfB, under_xgboB, under_grboB]
alg = ['RaFo','XGBo','GrBo','RaFoB','XGBoB','GrBoB']
plt.figure(figsize=(10,7))
labels = ['Isolated 0','Isolated1', 'Isolated2', 'Isolated3', 'Isolated4', 'Isolated5', 'Isolated6', 'Isolated7', 'Isolated8', 'Isolated9', 'Isolated10']
j = 1
for i in range(len(acc[3:])):
    

    plt.subplot(3,2,j)
    plt.title(alg[i])
    plt.plot(acc[3:][i],'o',linestyle = '--')
    plt.ylabel('VEcv')
    #if i == 0:
    #    plt.yticks(ticks=[0.80,0.85,0.90,0.95])
    #elif i == 1:
    #    plt.yticks(ticks=[0.70,0.75,0.80,0.85])
    #else:
    #    plt.yticks(ticks=[0.55,0.60,0.65,0.70])
    if i == len(acc[3:])-1:
        plt.xticks(ticks=[0,1,2,3,4,5,6,7,8,9,10],labels=labels,\
            rotation = 90)
    else:
        plt.xticks(color='white')
    plt.subplot(3,2,j+1)
    plt.title(alg[i])
    plt.bar(labels,height=over[i],bottom=under[i], label = 'Overestimated',color='salmon')
    plt.bar(labels,height=under[i],label='Underestimated',color='lightblue')
    if i == len(acc[3:])-1:
        plt.xticks(ticks=[0,1,2,3,4,5,6,7,8,9,10],labels=labels,\
            rotation = 90)
    else:
        plt.xticks(color='white')
    if i == 0:
        plt.legend(bbox_to_anchor = (1,1))
    j+=2