In [None]:
import pickle
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import CategoricalNB
from sklearn.preprocessing import OrdinalEncoder
import warnings
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

In [None]:
# Use own data

own_data = pd.read_csv()

# if data is a string and you want to obtain the month
# you can use the following code
# date format must be yyyy-mm-dd
month = []
for dia in own_data['day']:

    month.append(int(dia[5:7]))
month
own_data.insert(3,'Month',month)


# Defines parameters for the models

In [None]:
# Parameter grid for XGBOOST
param_grid = {'booster':['gbtree'],
              'eta':[0.001,0.01,0.1,0.2,0.3,0.002],
              'max_depth':[2,3,4,5,6,7,8,9,10],
              'gamma':[0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
              'subsample':[0.5,0.6,0.8,0.9,1],
              'colsample_bytree':[0.5,0.6,0.8,0.9,1],
              'objective':['binary:logistic']
}

# Defines Randm Grid Search
rscv = RandomizedSearchCV(XGBClassifier(),param_grid,cv = 3, n_iter=5,random_state=2)
rscv.__class__.__name__ = 'XGBOptimized'

# Parameter grid for Random Forest
param_grid = [
        {'n_estimators': [10,20,30,40,50,60,70,80,90,100,150,200],
         'criterion': ['gini','entropy'],
         'max_features': [None, 'sqrt', 'log2'],
         'min_samples_split' : [0,1,2,3,4,5,6,7,8,9,10],
         'min_samples_leaf': [0,1,2,3,4,5,6,7,8,9,10]},
        {'bootstrap': [True]},
]

# Defines Randm Grid Search
rscv2 = RandomizedSearchCV(RandomForestClassifier(), param_grid, cv = 3, n_iter=5,random_state=2)
rscv2.__class__.__name__ = 'RFOptimized'

# Parameter grid for Gradient Boosting
param_grid = [
        {'learning_rate': list(np.arange(0.1,1+0.1,0.1)),
         'n_estimators': list(np.arange(1,200,10)),
         'loss': ['log_loss','deviance','exponential'],
         'subsample' : list(np.arange(0,1+0.1,0.1)),
         'init': [KNeighborsClassifier,DecisionTreeClassifier()],
         'verbose':list(np.arange(0,10,1))}
        
]

# Defines Randm Grid Search
rscv3 = RandomizedSearchCV(GradientBoostingClassifier(), param_grid, cv = 3, n_iter=5,random_state=2)
rscv3.__class__.__name__ = 'GrBoOptimized'

# Training the models based on the different temporal resolutions

In [None]:
table = pd.DataFrame(columns=['Model','Temporal Resolution','accuracy','precision','recall'])
models = [XGBClassifier(),RandomForestClassifier(random_state=2),GradientBoostingClassifier(),rscv,rscv2,rscv3]
names = ['XGBo','RaFo','GrBo','XGBoOptmized','RaFoOptimized','GrBoOptimized']

# Res was a categorical variable that was defined to separate the trips that had diferent temporal resolutions (30 sec, 1 min, and so on)

for interval in pd.unique(own_data['\"res\"']):
    for model,name in zip(models,names):
        
        data = own_data[own_data['\"res\"']==interval].copy()

        # Variables used to train the models, change them considering your own dataset
        X = data[['\"lat\"','\"lon\"','\"speed\"','hours','Month']]
        y = data['\"fish.val\"']
        enconder = OrdinalEncoder()
        y = enconder.fit_transform(np.array(y).reshape(-1,1))
        xtrain, xtest, ytrain, ytest = train_test_split(X,y,train_size=.90,shuffle=True)
        model.fit(xtrain,ytrain)
        ypred = model.predict(xtest)
        acc = accuracy_score(ytest,ypred)
        pre = precision_score(ytest,ypred)
        rec = recall_score(ytest,ypred)
        table = table.append({'Model':name,'Temporal Resolution':interval, 'accuracy':acc, 'precision':pre,'recall':rec}, ignore_index=True)
table

# Visualize the results

acess2 is regarding sampling by boat approach, not considered for the sake of simplicity, but it was commented

In [None]:
plt.figure(figsize=(10,7))
#color =['salmon','salmon','yellowgreen','yellowgreen','lightblue','lightblue']
color = [(43/255,57/255,136/255),(42/255,196/255,244/255),(251/255,174/255,39/255)]
mod = ['GrBo','RaFo','XGBo']
markers = ['o','^','s']
#alpha = [1,0.5,1,0.5,1,0.5]
i = 0
for model in mod:
    acess = table[table['modelo'] == model]
    #acess2 = table2[table2['modelo'] == model]
   
    plt.subplot(3,2,1)
    plt.title('Sample unit by point')
    plt.ylabel('Accuracy (%)')
    plt.xticks(color='w')
    plt.plot(pd.unique(acess['Intervalo de Tempo']),(acess['accuracy']*100),marker = markers[i], color = color[i], linestyle = '--')
    
    #plt.yticks(ticks = [94,96,98,100])

    #plt.subplot(3,2,2)
    #plt.title('Sample unit by boat')
    #plt.xticks(color='w')
    #plt.plot(pd.unique(acess2['Intervalo de Tempo']),acess2['accuracy']*100,marker = markers[i], color = color[i], linestyle = '--')
    #plt.yticks(ticks = [91,92,93,94])

    plt.subplot(3,2,3)
    plt.ylabel('Precision (%)')
    plt.xticks(color='w')
    plt.plot(pd.unique(acess['Intervalo de Tempo']),acess['precision']*100,marker = markers[i], color = color[i], linestyle = '--')
    
    #plt.yticks(ticks = [94,96,98,100])

    #plt.subplot(3,2,4)
    #plt.xticks(color='w')
    #plt.plot(pd.unique(acess2['Intervalo de Tempo']),acess2['precision']*100,marker = markers[i], color = color[i], linestyle = '--')
    #plt.yticks(ticks = [85,88,91,94])

    plt.subplot(3,2,5)
    plt.ylabel('Recall (%)')
    plt.plot(pd.unique(acess['Intervalo de Tempo']),acess['recall']*100,marker = markers[i], color = color[i], linestyle = '--')
    
    #plt.yticks(ticks = [89,92,95,98])

    #plt.subplot(3,2,6)
    #plt.plot(pd.unique(acess2['Intervalo de Tempo']),acess2['recall']*100,marker = markers[i], color = color[i], linestyle = '--')
    #plt.xlabel('Time Resolution (minutes)',{'x':-0.1,'y':0})
    i+=1
    #plt.savefig('timeresolution.png')
    
plt.legend(mod,bbox_to_anchor = (1.3,3.45),ncol = 1)