# Imports

In [1]:
import os
import numpy as np

# Load the Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Download the `prediction_npy.zip ` from Google Drive (https://drive.google.com/drive/folders/1xzN19rTxewEiwzeBN3g88w5UBN2Z0Z0S?usp=share_link) and add it to your Drive or local directory if you are not using Google Colab.

Next step is to unzip the file.

In [2]:
! unzip "predictions_npy.zip"

Archive:  /content/drive/MyDrive/PhD/crickets/paper title/data/predictions_npy.zip
   creating: predictions_npy/
  inflating: __MACOSX/._predictions_npy  
   creating: predictions_npy/sugar/
   creating: predictions_npy/control/
   creating: predictions_npy/ammonia/
   creating: predictions_npy/sugar/test/
   creating: predictions_npy/sugar/train/
   creating: predictions_npy/sugar/val/
   creating: predictions_npy/control/test/
   creating: predictions_npy/control/train/
   creating: predictions_npy/control/val/
   creating: predictions_npy/ammonia/test/
   creating: predictions_npy/ammonia/train/
   creating: predictions_npy/ammonia/val/
  inflating: predictions_npy/sugar/test/C43_S.npy  
  inflating: predictions_npy/sugar/test/C45_S.npy  
  inflating: predictions_npy/sugar/test/C42_S.npy  
  inflating: predictions_npy/sugar/test/C46_S.npy  
  inflating: predictions_npy/sugar/test/C44_S.npy  
  inflating: predictions_npy/sugar/train/C30_S.npy  
  inflating: predictions_npy/sugar/trai

# Utility Functions/Classes
To perform the conversion from the numpy obtained converting the slp predictions to the final sequence where the head is the center of the cartesian plane some *functions* and a *class* are needed.

## Handling missing values
The class takes a one-dimensional sequence (aka, list) and can perform missing value analysis, and interpolate missing values.
In the code the class is located in `utils.missingvalues`.

In [3]:
class MissingValuesHandler:
    def __init__(self, pred):
        """
        Check if the first value of pred is not NaN, if so it is changed to the
        closest not NaN value.
        :param pred: Single prediction sequence
        """
        self.pred = pred
        if np.isnan(self.pred[0]):
            i = 1
            while True:
                if not np.isnan(self.pred[i]):
                    self.pred[0] = self.pred[i]
                    break
                i += 1

    def find_missing_values(self):
        """

        :return: a list of all the indexes where the keypoint is NaN
        """
        missing = []
        for i, v in enumerate(self.pred):
            if np.isnan(v):
                missing.append(i)
        return missing

    def fill_missing_values(self):
        """
        Filling the missing values using a simple average method between the antecedent
        and the first not-nan-value.
        :return: a list containing the predictions
        """
        new_pred = []
        for i, v in enumerate(self.pred):
            if np.isnan(v):
                prev = new_pred[i - 1]
                subs = None

                for j in range(i + 1, len(self.pred)):
                    subs = self.pred[j]
                    if np.isnan(subs):
                        subs = None  # generalize in the case is the last the one missing
                        continue
                    else:
                        break
                if subs is None:
                    new_pred.append(prev)
                else:
                    new_pred.append((prev + subs) / 2)
            else:
                new_pred.append(v)

        return new_pred

    def fill_missing_values_weighted(self):
        """
        Filling the missing values using a weighted average method between the antecedent
        and the first not-nan-value. This method gives more important to the value closest
        to the missing one.
        :return: a list containing the predictions
        """
        new_pred = []
        for i, v in enumerate(self.pred):
            if np.isnan(v):
                prev = new_pred[i - 1]
                subs = None
                k = 0

                for j in range(i + 1, len(self.pred)):
                    k += 1
                    subs = self.pred[j]
                    if np.isnan(subs):
                        subs = None  # generalize in the case is the last the one missing
                        continue
                    else:
                        break
                if subs is None:
                    new_pred.append(prev)
                else:
                    alpha = 1 / k
                    new_pred.append((prev + alpha * subs) / (1 + alpha))
            else:
                new_pred.append(v)

        return new_pred

Functions that uses the above class to perform the filling operation outputing also some information related to the missing values, such as how many missing values for a particular joint in a specific sequence.

The `prediction_npy_path` is the path to the unzipped file. On the other hand, `filled_prediction_path` is the path to a new directory (i.e., not yet existing) or an empty directory where the new sequences will be saved using the same structure used in the `predictions_npy` directory.

The code of this function can be found in `utils.missingvalues`.

In [4]:
def from_pred_to_filled_pred(prediction_npy_path, filled_prediction_path):
    if not os.path.exists(filled_prediction_path):
        os.mkdir(filled_prediction_path)

    classes = os.listdir(prediction_npy_path)
    for c in classes:  # control sugar ammonia
        if c not in ['control', 'sugar', 'ammonia']:
            continue
        class_path = os.path.join(prediction_npy_path, c)  # predictions_npy/control
        sets = os.listdir(class_path)

        filled_class_path = os.path.join(filled_prediction_path, c)
        os.mkdir(filled_class_path)

        for s in sets:
            if s not in ['train', 'test', 'val']:
                continue
            set_path = os.path.join(class_path, s)  # predictions_npy/control/train
            predictions = [prediction for prediction in os.listdir(set_path) if prediction.endswith('.npy')]

            filled_set_path = os.path.join(filled_class_path, s)
            os.mkdir(filled_set_path)

            for prediction in predictions:
                pred = np.load(os.path.join(set_path, prediction)).T
                print(prediction)
                print(f'\t-Shape: {pred.shape}')

                for i, row in enumerate(pred):
                    mvh = MissingValuesHandler(row)
                    missing = mvh.find_missing_values()
                    print(f'\t-Missing values for entry {i}: {len(missing)}')
                    if len(missing) == 0:
                        print(f'\tNo missing values for entry {i}, filling operation skipped')
                    else:
                        pred[i] = mvh.fill_missing_values_weighted()
                np.save(os.path.join(filled_set_path, prediction), pred)

## Obtain the centralized numpy
From the filled prediction directory we can use the following functions to centralized the head position to (0,0) and move all the keypoints in the frames accordingly to the new arragement. Since now the information related to the head (i.e., x and y position) is not meaningful the sequences related to it are removed from the overall sequence.

In the code these functions are located in `utils.head_centralization`

In [5]:
def centralized_head_in_sequence(sequence_path, output_path):
    pred = np.load(sequence_path).T

    for frame in pred:
        x_head, y_head = frame[0], frame[1]
        for i in range(0, len(frame), 2):
            frame[i] = frame[i] - x_head
            frame[i + 1] = frame[i + 1] - y_head
    pred = pred.T[2:, :]
    print(pred.shape)
    np.save(output_path, pred)


def centralized_head_sequence_from_dir(dir_path, output_dir_path):
    predictions = [prediction for prediction in os.listdir(dir_path) if prediction.endswith('.npy')]

    if not os.path.exists(output_dir_path):
        os.mkdir(output_dir_path)

    for prediction in predictions:
        centralized_head_in_sequence(os.path.join(dir_path, prediction), os.path.join(output_dir_path, prediction))


def centralized_head_sequence_from_project(project_path, output_path):
    if not os.path.exists(output_path):
        os.mkdir(output_path)

    classes = os.listdir(project_path)
    for c in classes:  # control sugar ammonia
        if c not in ['control', 'sugar', 'ammonia']:
            continue
        class_path = os.path.join(project_path, c)  # predictions_npy/control
        sets = os.listdir(class_path)

        filled_class_path = os.path.join(output_path, c)
        os.mkdir(filled_class_path)

        for s in sets:
            if s not in ['train', 'test', 'val']:
                continue
            centralized_head_sequence_from_dir(os.path.join(class_path, s), os.path.join(filled_class_path, s))

# Perform Filling and Transformation
Here the filling operation and the head centralization is performed. Change the path-variables to what you want to locate the file.

In [6]:
prediction_npy_path = 'predictions_npy' # CHANGE HERE
filled_prediction_path = 'predictions_filled' # CHANGE HERE
centered_prediction_path = 'prediction_head_centered' # CHANGE HERE

from_pred_to_filled_pred(prediction_npy_path, filled_prediction_path)
centralized_head_sequence_from_project(filled_prediction_path, centered_prediction_path)

C20_C.npy
	-Shape: (10, 3480)
	-Missing values for entry 0: 132
	-Missing values for entry 1: 132
	-Missing values for entry 2: 137
	-Missing values for entry 3: 137
	-Missing values for entry 4: 259
	-Missing values for entry 5: 259
	-Missing values for entry 6: 139
	-Missing values for entry 7: 139
	-Missing values for entry 8: 110
	-Missing values for entry 9: 110
C23_C.npy
	-Shape: (10, 3480)
	-Missing values for entry 0: 0
	No missing values for entry 0, filling operation skipped
	-Missing values for entry 1: 0
	No missing values for entry 1, filling operation skipped
	-Missing values for entry 2: 0
	No missing values for entry 2, filling operation skipped
	-Missing values for entry 3: 0
	No missing values for entry 3, filling operation skipped
	-Missing values for entry 4: 132
	-Missing values for entry 5: 132
	-Missing values for entry 6: 0
	No missing values for entry 6, filling operation skipped
	-Missing values for entry 7: 0
	No missing values for entry 7, filling operation 