In [None]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
from matplotlib import pyplot as plt

In [None]:
### PRE-PROCESSING OF INPUT DATA ###

df = pd.read_csv('detections.csv', sep = ";")

# Purpose: Make date column a pandas timestamp datetime64

df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%d %H:%M:%S")

df = df.replace("sea", 0) 
df = df.replace("colony", 1)

df = df.sort_values(by = ['rfid', 'date'])

print(df)

In [None]:
# Purpose: Bring in breeding data

breedingdata = pd.read_csv('GroundTruth_data_breeding_simple.csv', sep = ";")

# to have 2 classes : NB/B
breedingdata = breedingdata.replace("NB", -1) 
breedingdata = breedingdata.replace("F", 0)
breedingdata = breedingdata.replace("S", 1)

breedingdata = breedingdata.loc[breedingdata["value"] != -1]

pd.to_datetime(breedingdata['year'], format="%Y")
breedingdata['date'] = pd.to_datetime((breedingdata['year']-1).astype("str") + "-10-01", format="%Y-%m-%d")


## Remove all years where detections are missing (at the beginning or at the end)
list_to_remove = []
for i in range(len(breedingdata)):
    rfid, date = breedingdata.iloc[i, [0,3]]
    if df.loc[(df["rfid"] == rfid) & (df["date"] <= date)].empty:
        list_to_remove.append(i)
    elif df.loc[(df["rfid"] == rfid) & (df["date"] >= date + pd.to_timedelta("487d"))].empty:
        list_to_remove.append(i)
breedingdata = breedingdata.drop(breedingdata.index[list_to_remove])

print(len(breedingdata))
print("counts:", breedingdata['value'].value_counts())
print(breedingdata)

In [None]:
### Missing detection correction algorithms ###

import datetime

class DetectionsCorrection():
    def __init__(self):
        pass
    def transition_fun(self, data):
        """
        :param data: list of the detections of the bird for the season
        :param bd: database
        :return: the list composed of the departure, time and arrival of the birds (binary, 0 or 1)
        """
        # Init
        x = []
        temp = []
        time_estimated = 600  # time we estimate for a penguin to get through a gate (10 mns)

        # Convert the datetime in s, if datetime object doesnt exist we take the nearest value
        for z in range(len(data)):
            if data[z][1] is not None:
                temp.append(int(round(data[z][1].timestamp())))

        transit = [[0] * 3 for _ in range(len(data))]

        for z in range(len(data) - 1):
            # if the bird is coming from sea (0) to earth (1)
            if (data[z][2] - data[z + 1][2]) < 0:
                transit[z][0] = 0
                transit[z][2] = 1

                # if the bird is coming from earth to sea
            elif (data[z][2] - data[z + 1][2]) > 0:
                transit[z][0] = 1
                transit[z][2] = 0

            elif (data[z][2] - data[z + 1][2]) == 0:
                # if the bird is coming from sea to sea
                if (data[z][2]) == 0:
                    transit[z][0] = 0
                    transit[z][2] = 0

                # if the bird is coming from earth to earth
                elif (data[z][2]) == 1:
                    transit[z][0] = 1
                    transit[z][2] = 1

            # if the time is short (0) or long (1)
            if (temp[z + 1] - temp[z]) <= time_estimated:
                transit[z][1] = 0
            else:
                transit[z][1] = 1

        return transit


    def binary_fun(self, vect):
        """
        :param vect: list of the transition
        :return: the list of 3bit coming from transition
        """
        sum = [[0] * 1 for _ in range(len(vect))]

        for i in range(len(vect)):
            sum[i] = vect[i][0] * 4 + vect[i][1] * 2 + vect[i][2]
        return sum

    def repair_missing_data(self, list_detec, data_list, rfid):
        gap = list_detec
        diff_gate = 1  # diff between gate 1 and gate 2

        comp = len(gap)
        index_of_transition = [i for i in range(comp)]  # here we register the index of the the transition in the data_list before removing the 0 and 5 transitions

        x = 0
        # Here we remove the short transition "terre-terre" and "mer-mer" because it corresponds to a double detection
        while x < comp:
            if gap[x] == 0 or gap[x] == 5:
                del gap[x]
                del index_of_transition[x]  ## for the transition gap[i], we have date and gate information in data_list[index_of_transition[i]]
                comp = comp - 1
                x = x - 1
            x = x + 1

        data_rebuild = [] # the detections that we add
        i=2
        while i < comp - 5 : #stop 2 detections before the end
            # 162
            if gap[i] == 6 and gap[i - 1] == 1 and gap[i + 1] == 2:
                gap[i] = 7
                gap.insert(i + 1, 4)
                data_rebuild.append(
                    [data_list[index_of_transition[i-comp] + 1][2] + diff_gate, rfid,
                     data_list[index_of_transition[i-comp] + 1][1] - datetime.timedelta(
                         milliseconds=3000)])
                comp = comp + 1

            # 163 we correct only the 6
            elif gap[i] == 6 and gap[i - 1] == 1 and gap[i + 1] == 3:
                gap[i] = 7
                gap.insert(i + 1, 4)

                data_rebuild.append(
                    [data_list[index_of_transition[i-comp] + 1][2] + diff_gate, rfid,
                     data_list[index_of_transition[i-comp] + 1][1] - datetime.timedelta(
                         milliseconds=3000)])
                comp = comp + 1

            # 234
            elif gap[i] == 3 and gap[i - 1] == 2 and gap[i + 1] == 4:
                gap[i] = 1
                gap.insert(i + 1, 7)
                data_rebuild.append(
                    [data_list[index_of_transition[i-comp]][2] + diff_gate, rfid,
                     data_list[index_of_transition[i-comp]][1] + datetime.timedelta(
                         milliseconds=3000)])
                comp = comp + 1

            # 236
            elif gap[i] == 3 and gap[i - 1] == 2 and gap[i + 1] == 6:
                gap[i] = 1
                gap.insert(i + 1, 7)
                gap[i+2] = 4
                gap.insert(i+3,2)

                data_rebuild.append(
                    [data_list[index_of_transition[i-comp]][2] + diff_gate, rfid,
                     data_list[index_of_transition[i-comp]][1] + datetime.timedelta(
                         milliseconds=3000)])
                data_rebuild.append(
                    [data_list[index_of_transition[i + 1-comp]][2] - diff_gate, rfid,
                     data_list[index_of_transition[i + 1-comp]][1] + datetime.timedelta(
                         milliseconds=3000)])
                comp = comp + 2

            # 436
            elif gap[i] == 3 and gap[i - 1] == 4 and gap[i + 1] == 6:
                gap[i] = 2
                gap.insert(i + 1, 1)

                data_rebuild.append(
                    [data_list[index_of_transition[i-comp] + 1][2] - diff_gate, rfid,
                     data_list[index_of_transition[i-comp] + 1][1] - datetime.timedelta(
                         milliseconds=3000)])
                comp = comp + 1

            # 437
            elif gap[i] == 3 and gap[i - 1] == 4 and gap[i + 1] == 7:
                gap[i] = 2
                gap.insert(i + 1, 1)

                data_rebuild.append(
                    [data_list[index_of_transition[i-comp] + 1][2] - diff_gate, rfid,
                     data_list[index_of_transition[i-comp] + 1][1] - datetime.timedelta(
                         milliseconds=3000)])
                comp = comp + 1

            # 761
            elif gap[i] == 6 and gap[i - 1] == 7 and gap[i + 1] == 1 :
                gap[i] = 4
                gap.insert(i + 1, 2)

                data_rebuild.append(
                    [data_list[index_of_transition[i-comp]][2] - diff_gate, rfid,
                     data_list[index_of_transition[i-comp]][1] + datetime.timedelta(
                         milliseconds=3000)])
                comp = comp + 1

            # 763
            elif gap[i] == 6 and gap[i - 1] == 7 and gap[i + 1] == 3:
                gap[i] = 4
                gap.insert(i + 1, 2)
                gap[i+2] = 1
                gap.insert(i+3, 7)

                data_rebuild.append(
                    [data_list[index_of_transition[i-comp]][2] - diff_gate, rfid,
                     data_list[index_of_transition[i-comp]][1] + datetime.timedelta(
                         milliseconds=3000)])
                data_rebuild.append(
                    [data_list[index_of_transition[i + 1 - comp]][2] + diff_gate, rfid,
                     data_list[index_of_transition[i + 1 - comp]][1] + datetime.timedelta(
                         milliseconds=3000)])
                comp = comp + 2

            # successives 2
            elif gap[i] == 2 and gap[i+1] == 2:
                j = 0
                while i+j+1 <= comp-3 and gap[i+j] == gap[i+j+1]: ## count number of successives 2 and stop if it is at the end of the transition list
                    j=j+1
                if i+j+1 == comp:
                    break # don't repair successives 2-2 if it is at the end

                if j % 2 == 0: ## j even number so odd numbers of successives 2 transitions
                    if gap[i - 1] == 6 and gap[i +j +1] == 1: # 6-2-2-2-1
                        gap[i-1] = 7
                        del gap[i:i+j+1]
                        gap[i:i] = [4,2,1,7] * (j//2) + [4,2]

                        data_rebuild.append(
                            [data_list[index_of_transition[i - 1 - comp] + 1][2] + diff_gate, rfid,
                             data_list[index_of_transition[i - 1 - comp] + 1][1] - datetime.timedelta(
                                 milliseconds=3000)])
                        for k in range(1, j // 2 + 1):
                            data_rebuild.append(
                                [data_list[index_of_transition[i + 2 * k -1- comp]][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + 2 * k -1- comp]][1] + datetime.timedelta(
                                     milliseconds=3000)])
                            data_rebuild.append(
                                [data_list[index_of_transition[i + 2 * k -1- comp] + 1][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + 2 * k -1- comp] + 1][1] - datetime.timedelta(
                                     milliseconds=3000)])
                        comp += 1 + j

                    elif gap[i-1] == 4 and gap[i +j +1] == 3: # 4-2-2-2-3
                        del gap[i+1:i+j+2]
                        gap[i+1:i+1] = [1,7,4,2] * (j//2) + [1,7]

                        for k in range(0,(j//2)):
                            data_rebuild.append(
                                [data_list[index_of_transition[i + k*2 +1- comp]][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + k*2 +1- comp]][1] + datetime.timedelta(
                                     milliseconds=3000)])
                            data_rebuild.append(
                                [data_list[index_of_transition[i + k*2 +1- comp]+1][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + k*2 +1- comp]+1][1] - datetime.timedelta(
                                     milliseconds=3000)])
                        data_rebuild.append(
                            [data_list[index_of_transition[i + j +1- comp]][2] + diff_gate, rfid,
                             data_list[index_of_transition[i + j +1- comp]][1] + datetime.timedelta(
                                 milliseconds=3000)])
                        comp += j + 1

                    elif gap[i-1] == 6 and gap[i +j +1] == 3: # 6-2-2-2-3
                        del gap[i-1:i+j+2]
                        gap[i-1:i-1] = [4,2,1,7]*((j//2)+1)+[4,2,1]

                        data_rebuild.append(
                            [data_list[index_of_transition[i-1-comp]][2] - diff_gate, rfid,
                             data_list[index_of_transition[i-1-comp]][1] + datetime.timedelta(
                                 milliseconds=3000)])
                        for k in range(0, j // 2 + 1):
                            data_rebuild.append(
                                [data_list[index_of_transition[i + 2 * k -comp]][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + 2 * k -comp]][1] + datetime.timedelta(
                                     milliseconds=3000)])
                            data_rebuild.append(
                                [data_list[index_of_transition[i + 2 * k -comp] + 1][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + 2 * k -comp] + 1][1] - datetime.timedelta(
                                     milliseconds=3000)])

                        data_rebuild.append(
                            [data_list[index_of_transition[i + j + 1 - comp] + 1][2] - diff_gate, rfid,
                             data_list[index_of_transition[i + j + 1 - comp] + 1][1] - datetime.timedelta(
                                 milliseconds=3000)])
                        comp += j + 4

                    elif gap[i-1] == 4 and gap[i +j +1] == 1: # 4-2-2-2-1
                        del gap[i+1:i + j + 1]
                        gap[i +1:i + 1] = [1,7,4,2] * (j//2)

                        for k in range(0,(j//2)):
                            data_rebuild.append(
                                [data_list[index_of_transition[i + k*2 +1- comp]][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + k*2 +1- comp]][1] + datetime.timedelta(
                                     milliseconds=3000)])
                            data_rebuild.append(
                                [data_list[index_of_transition[i + k*2 +1- comp]+1][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + k*2 +1- comp]+1][1] - datetime.timedelta(
                                     milliseconds=3000)])
                        comp += j

                else:## j odd number so even number of successives 2 transitions
                    if gap[i - 1] == 6 and gap[i + j + 1] == 1:
                        gap[i-1] = 4
                        del gap[i:i+j+1]
                        gap[i:i] = [2,1,7,4] * ((j//2)+1) + [2]

                        data_rebuild.append(
                            [data_list[index_of_transition[i-1-comp]][2] - diff_gate, rfid,
                             data_list[index_of_transition[i-1-comp]][1] + datetime.timedelta(
                                 milliseconds=3000)])
                        for k in range(0, (j + 1) // 2):
                            data_rebuild.append(
                                [data_list[index_of_transition[i + 2 * k - comp]][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + 2 * k - comp]][1] + datetime.timedelta(
                                     milliseconds=3000)])
                            data_rebuild.append(
                                [data_list[index_of_transition[i + 2 * k - comp] + 1][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + 2 * k - comp] + 1][1] - datetime.timedelta(
                                     milliseconds=3000)])
                        comp += 2 + j


                    elif gap[i - 1] == 4 and gap[i + j + 1] == 3 and gap[i + j + 2] != 4:
                        del gap[i+1:i+j+2]
                        gap[i+1:i+1] = [1,7,4,2]*((j//2)+1)+[1]

                        for k in range((j+1)//2):
                            data_rebuild.append(
                                [data_list[index_of_transition[i + k * 2 + 1 - comp]][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + k * 2 + 1 - comp]][1] + datetime.timedelta(
                                     milliseconds=3000)])
                            data_rebuild.append(
                                [data_list[index_of_transition[i + k * 2 + 1 - comp] + 1][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + k * 2 + 1 - comp] + 1][1] - datetime.timedelta(
                                     milliseconds=3000)])
                        data_rebuild.append(
                            [data_list[index_of_transition[i + j + 1 - comp]+1][2] - diff_gate, rfid,
                             data_list[index_of_transition[i + j + 1 - comp]+1][1] - datetime.timedelta(
                                 milliseconds=3000)])

                        comp += j + 2

                    elif gap[i - 1] == 6 and gap[i + j + 1] == 3:
                        # normally always a 7 before as already corrected
                        del gap[i-1:i+j+2]
                        gap[i-1:i-1] = [4,2,1,7] * ((j//2)+2)

                        data_rebuild.append(
                            [data_list[index_of_transition[i-1-comp]][2] - diff_gate, rfid,
                             data_list[index_of_transition[i-1-comp]][1] + datetime.timedelta(
                                 milliseconds=3000)])
                        for k in range(0, (j + 1) // 2):
                            data_rebuild.append(
                                [data_list[index_of_transition[i + 2 * k-comp]][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + 2 * k-comp]][1] + datetime.timedelta(
                                     milliseconds=3000)])
                            data_rebuild.append(
                                [data_list[index_of_transition[i + 2 * k-comp] + 1][2] + diff_gate, rfid,
                                 data_list[index_of_transition[i + 2 * k-comp] + 1][1] - datetime.timedelta(
                                     milliseconds=3000)])

                        data_rebuild.append(
                            [data_list[index_of_transition[i + j + 1-comp]][2] + diff_gate, rfid,
                             data_list[index_of_transition[i + j + 1-comp]][1] + datetime.timedelta(
                                 milliseconds=3000)])

                        comp += j+3

                    ## we not repair if it is 4-2-2-1 because we don't know what transition is false

                # successives 7
            elif gap[i] == 7 and gap[i + 1] == 7:
                j = 0
                while i + j + 1 <= comp-3 and gap[i + j] == gap[i + j + 1] :  ## count number of successives 7 and stop if it is at the end of the transition list
                    j = j + 1
                if i+j+1 == comp:
                    break # don't repair successives 7-7 if it is at the end

                if j % 2 == 0:  ## j even number so odd numbers of successives 2 transitions
                    if gap[i - 1] == 3 and gap[i + j + 1] == 4:  # 3-7-7-7-4
                        gap[i - 1] = 2
                        del gap[i:i + j + 1]
                        gap[i:i] = [1, 7, 4, 2] * (j // 2) + [1, 7]

                        data_rebuild.append(
                            [data_list[index_of_transition[i - 1 - comp] + 1][2] - diff_gate, rfid,
                                data_list[index_of_transition[i - 1 - comp] + 1][1] - datetime.timedelta(
                                    milliseconds=3000)])
                        for k in range(1, j // 2 + 1):
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + 2 * k - 1 - comp]][2] - diff_gate, rfid,
                                         data_list[index_of_transition[i + 2 * k - 1 - comp]][1] + datetime.timedelta(
                                             milliseconds=3000)])
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + 2 * k - 1 - comp] + 1][2] - diff_gate,
                                         rfid,
                                         data_list[index_of_transition[i + 2 * k - 1 - comp] + 1][
                                             1] - datetime.timedelta(
                                             milliseconds=3000)])
                        comp += 1 + j

                    elif gap[i - 1] == 1 and gap[i + j + 1] == 6:  # 1-7-7-7-6
                        del gap[i + 1:i + j + 2]
                        gap[i + 1:i + 1] = [4, 2, 1, 7] * (j // 2) + [4, 2]

                        for k in range(0, (j // 2)):
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + k * 2 + 1 - comp]][2] - diff_gate, rfid,
                                         data_list[index_of_transition[i + k * 2 + 1 - comp]][1] + datetime.timedelta(
                                             milliseconds=3000)])
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + k * 2 + 1 - comp] + 1][2] - diff_gate,
                                         rfid,
                                         data_list[index_of_transition[i + k * 2 + 1 - comp] + 1][
                                             1] - datetime.timedelta(
                                             milliseconds=3000)])
                        data_rebuild.append(
                                    [data_list[index_of_transition[i + j + 1 - comp]][2] - diff_gate, rfid,
                                     data_list[index_of_transition[i + j + 1 - comp]][1] + datetime.timedelta(
                                         milliseconds=3000)])
                        comp += j + 1

                    elif gap[i - 1] == 3 and gap[i + j + 1] == 6:  # 3-7-7-7-6
                        del gap[i - 1:i + j + 2]
                        gap[i - 1:i - 1] = [1, 7, 4, 2] * ((j // 2) + 1) + [1, 7, 4]

                        data_rebuild.append(
                                    [data_list[index_of_transition[i - 1 - comp]][2] + diff_gate, rfid,
                                     data_list[index_of_transition[i - 1 - comp]][1] + datetime.timedelta(
                                         milliseconds=3000)])
                        for k in range(0, j // 2 + 1):
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + 2 * k - comp]][2] - diff_gate, rfid,
                                         data_list[index_of_transition[i + 2 * k - comp]][1] + datetime.timedelta(
                                             milliseconds=3000)])
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + 2 * k - comp] + 1][2] - diff_gate, rfid,
                                         data_list[index_of_transition[i + 2 * k - comp] + 1][1] - datetime.timedelta(
                                             milliseconds=3000)])

                        data_rebuild.append(
                                    [data_list[index_of_transition[i + j + 1 - comp] + 1][2] + diff_gate, rfid,
                                     data_list[index_of_transition[i + j + 1 - comp] + 1][1] - datetime.timedelta(
                                         milliseconds=3000)])
                        comp += j + 4

                    elif gap[i - 1] == 1 and gap[i + j + 1] == 4:  # 1-7-7-7-4
                        del gap[i + 1:i + j + 1]
                        gap[i + 1:i + 1] = [4, 2, 1, 7] * (j // 2)

                        for k in range(0, (j // 2)):
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + k * 2 + 1 - comp]][2] - diff_gate, rfid,
                                         data_list[index_of_transition[i + k * 2 + 1 - comp]][1] + datetime.timedelta(
                                             milliseconds=3000)])
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + k * 2 + 1 - comp] + 1][2] - diff_gate,
                                         rfid,
                                         data_list[index_of_transition[i + k * 2 + 1 - comp] + 1][
                                             1] - datetime.timedelta(
                                             milliseconds=3000)])
                        comp += j

                else:  ## j odd number so even number of successives 2 transitions
                    if gap[i - 1] == 3 and gap[i + j + 1] == 7:
                        gap[i - 1] = 1
                        del gap[i:i + j + 1]
                        gap[i:i] = [7, 4, 2, 1] * ((j // 2) + 1) + [7]

                        data_rebuild.append(
                                    [data_list[index_of_transition[i - 1 - comp]][2] + diff_gate, rfid,
                                     data_list[index_of_transition[i - 1 - comp]][1] + datetime.timedelta(
                                         milliseconds=3000)])
                        for k in range(0, (j + 1) // 2):
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + 2 * k - comp]][2] - diff_gate, rfid,
                                         data_list[index_of_transition[i + 2 * k - comp]][1] + datetime.timedelta(
                                             milliseconds=3000)])
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + 2 * k - comp] + 1][2] - diff_gate, rfid,
                                         data_list[index_of_transition[i + 2 * k - comp] + 1][1] - datetime.timedelta(
                                             milliseconds=3000)])
                        comp += 2 + j


                    elif gap[i - 1] == 1 and gap[i + j + 1] == 6 and gap[i + j + 1] != 1:
                        del gap[i+1:i + j + 2]
                        gap[i + 1:i + 1] = [4, 2, 1, 7] * ((j // 2) + 1) + [4]

                        for k in range((j + 1) // 2):
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + k * 2 + 1 - comp]][2] - diff_gate, rfid,
                                         data_list[index_of_transition[i + k * 2 + 1 - comp]][1] + datetime.timedelta(
                                             milliseconds=3000)])
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + k * 2 + 1 - comp] + 1][2] - diff_gate,
                                         rfid,
                                         data_list[index_of_transition[i + k * 2 + 1 - comp] + 1][
                                             1] - datetime.timedelta(
                                             milliseconds=3000)])
                        data_rebuild.append(
                                    [data_list[index_of_transition[i + j + 1 - comp] + 1][2] + diff_gate, rfid,
                                     data_list[index_of_transition[i + j + 1 - comp] + 1][1] - datetime.timedelta(
                                         milliseconds=3000)])

                        comp += j + 2

                    elif gap[i - 1] == 3 and gap[i + j + 1] == 6:
                                # normally always a 2 before as already corrected
                        del gap[i - 1:i + j + 2]
                        gap[i - 1:i - 1] = [1, 7, 4, 2] * ((j // 2) + 2)

                        data_rebuild.append(
                                    [data_list[index_of_transition[i - 1 - comp]][2] + diff_gate, rfid,
                                     data_list[index_of_transition[i - 1 - comp]][1] + datetime.timedelta(
                                         milliseconds=3000)])
                        for k in range(0, (j + 1) // 2):
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + 2 * k - comp]][2] - diff_gate, rfid,
                                         data_list[index_of_transition[i + 2 * k - comp]][1] + datetime.timedelta(
                                             milliseconds=3000)])
                            data_rebuild.append(
                                        [data_list[index_of_transition[i + 2 * k - comp] + 1][2] - diff_gate, rfid,
                                         data_list[index_of_transition[i + 2 * k - comp] + 1][1] - datetime.timedelta(
                                             milliseconds=3000)])

                        data_rebuild.append(
                                    [data_list[index_of_transition[i + j + 1 - comp]][2] - diff_gate, rfid,
                                     data_list[index_of_transition[i + j + 1 - comp]][1] + datetime.timedelta(
                                         milliseconds=3000)])

                        comp += j + 3

                            ## we not repair if it is 4-2-2-1 because we don't know what transition is false
            i = i+1

        if len(gap) != comp:
            print("Error")
        return data_rebuild

    def fill_gaps(self, rfid, detections):
        """
        :param rfid: rfid is a list define in the main file
        :return: dataframe of the gaps filled (4 row : antenne_id, RFID, date_arrivee, type)
        """

        data_return = []
        df = pd.DataFrame()
        
        if detections is not None:
            transition = self.transition_fun(detections)
            binary = self.binary_fun(transition)
            data_return = self.repair_missing_data(binary, detections, rfid)

        if data_return:
            data_return_sort = sorted(data_return, key=lambda colonnes: colonnes[2])
            data_return_inv = list(map(list, zip(*data_return_sort)))
            df = pd.DataFrame({'rfid': data_return_inv[1]})
            df['date'] = data_return_inv[2]
            df['antenna'] = data_return_inv[0]

        return df


In [None]:
### DATA GENERATOR ALGORITHM : DATA TREATMENT ALGORITHMS RUN AT EACH ITERATION ###

import numpy as np
from tensorflow import keras
import datetime
from datetime import timedelta
import random
import copy

class DataGenerator_penguins(keras.utils.Sequence, DetectionsCorrection):
    def __init__(self, breeding_dataframe, detection_dataframe, n_classes=3, shuffle=True, remove_detection_rate = 0.05, offset_range=30):
        'Initialization'
        self.breeding_df = breeding_dataframe
        self.detection_df = detection_dataframe
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.remove_detection_rate = remove_detection_rate
        self.offset_range = offset_range
        self.resolution = 12
        self.pengid_list = self.breeding_df['rfid'].unique().tolist()
        
    def __len__(self):
        return 1
    
    def __getitem__(self,index):
        'Generate one batch of data'
        df = copy.copy(self.detection_df)
        # Remove some detections and correct it with the correction algorithm
        df = df.loc[random.sample(range(len(df)), int(len(df)*(1-self.remove_detection_rate))),:]  #removing 10% of every detections
        df = df.sort_values(by = ['rfid', 'date'])
        for i in self.pengid_list:
            df_correct_id = df.loc[df['rfid'] == i,:]
            df_correct_id = list(df_correct_id.itertuples(index=False, name=None))
            dc = DetectionsCorrection()
            correction = dc.fill_gaps(rfid=i, detections = df_correct_id)
            df = pd.concat([df, correction])
        detections_corrected = df
        
        # Generate the binary lists with their label from the training datasets
        X_train, y_train = self.creation_dataset(self.breeding_df, detections_corrected)
        
        # Generate the random offset of the data: add 0's at the beginning or at the end
        for i in range(len(X_train)):
            r = random.randint(0, self.offset_range*(24/self.resolution))
            if r != 0:
                direction = random.randint(0,1)
                if direction:
                    X_train[i] =  r*[[0,0]] + list(X_train[i][0:-r])
                else:
                    X_train[i] =  list(X_train[i][r:]) + r*[[0,0]] 
                
        return X_train, y_train

            
    def get_Val(self, breeding_data, detections):
        'Used to generate the validation data into the right format'
        X_test, y_test = self.creation_dataset(breeding_data, detections)
#         X_test = X_test.reshape(len(X_test),X_test.shape[1],1)
        return X_test, y_test
    
    def creation_dataset(self, breedingdata, detections):
        breeding_data = copy.copy(breedingdata)
        df = detections
        merged_df = pd.DataFrame()
        for i in range(len(breeding_data)):
            rfid, year, value, date = breedingdata.iloc[i]
            temp_df = df.loc[(df["rfid"] == rfid) & (df["date"] >= date) & (df["date"] <= date + pd.to_timedelta("487d"))]
            temp_df.insert(loc=1,column='year',value=year)
            temp_df.insert(loc=2,column='value',value=value)
            merged_df = pd.concat([merged_df, temp_df])

        out = []
        out2 = []
        out3=[]
        for start_year in range(1998, 2021):
                start_date = datetime.datetime(year=start_year-1, month=9, day=30)
                end_date = start_date + timedelta(days=487)
                dRange = pd.date_range(start=start_date, end=end_date, freq="12H")

                    # perform the grid search on time/rfid values
                values = merged_df[merged_df["year"] == start_year].groupby([pd.Grouper(key="date", freq="12H"), "rfid"])["antenna"].last().unstack().reindex(dRange).fillna(method="ffill").fillna(False).astype(int).values
                    # look for the corresponding breeding labels for this season
                values_detect_number = merged_df[merged_df["year"] == start_year].groupby([pd.Grouper(key="date", freq="12H"), "rfid"])["antenna"].size().unstack().reindex(dRange).fillna(value=0).astype(int).values
                labels = merged_df[merged_df["year"] == start_year].groupby(["rfid"])["value"].last()
                out.extend(values.T)
                out3.extend(values_detect_number.T)
                out2.extend(labels)
                
        location = [[0] for x in range(len(out))]
        detect_number = [[0] for x in range(len(out3))]
        final_value = [[0] for x in range(len(out))]
        val = [[0,0] for x in range(len(out[0]))]        
        for i in range(len(out)):
            location[i] = out[i]
            detect_number[i] = out3[i]
            for j in range(len(out[i])):
                val[j] = [location[i][j],detect_number[i][j]]
            final_value[i]= np.array(val)
        return np.array(final_value), np.array(out2)

        
        

In [None]:
### BUILDING OF CNN MODEL ### 
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation,MaxPooling1D, MaxPooling2D, Dropout, Flatten, Reshape, LSTM, Embedding, AveragePooling1D
from tensorflow.keras.layers import Dense, Conv1D, SimpleRNN, GRU
from tensorflow.keras.callbacks import ModelCheckpoint

model = Sequential()

# CNN LAYERS
model.add(Conv1D(filters=128, kernel_size=20, activation="relu", padding='same', input_shape=(975,2)))
model.add(AveragePooling1D(pool_size=2))
model.add(Dropout(0.5))
model.add(Conv1D(filters=32, kernel_size=10,activation="relu", padding='same', input_shape=(975,2))) 
model.add(AveragePooling1D(pool_size=2))
model.add(Dropout(0.5))
model.add(Conv1D(filters=16, kernel_size=5,activation="relu", padding='same'))
model.add(AveragePooling1D(pool_size=2))
model.add(Dropout(0.5))
model.add(Flatten())

model.add(Dense(500, activation = 'relu'))
model.add(Dense(250, activation = 'relu'))
model.add(Dense(100, activation = 'relu'))

model.add(Dense(2, activation = 'softmax'))
# COMPILATION
model.compile(loss = 'sparse_categorical_crossentropy',optimizer = 'adam', metrics = ['accuracy'])

In [None]:
### MODEL FITTING ###

import numpy as np
import datetime
from tensorflow import device
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential


# Datasets
training_data, testing_data = train_test_split(breedingdata, test_size=0.2)

# Call of data generator
DG = DataGenerator_penguins(training_data, df)

# Building of validation dataset
y_test = DG.get_Val(testing_data, df)

# Train model on training dataset
with device("/device:GPU:0"):
    model.fit(DG, validation_data=y_test,
                    use_multiprocessing=False,
                    workers=6, epochs = 200, verbose=2)

In [None]:
### TRAINING ACCURACIES 

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# Prediction of testing dataset
pred_1 = np.argmax(model.predict(y_test[0]), axis=-1)

# Comparison with ground-truth data
print('Val accuracy', accuracy_score(y_test[1], pred_1))
print(confusion_matrix(y_test[1], pred_1))

In [None]:
### SAVING OF MODEL ARCHITECTURE AND PARAMETERS
model.save('model_determination_S_F')