## Data preprocessing

In [None]:
import movekit as mkit
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#### Read data input

In [None]:
# Enter path to CSV file
path = "./datasets/fish-5.csv"

# Alternative: enter path to Excel file
# path = "./datasets/fish-5.xlsx"

In [None]:
# Read in file using 
data = mkit.read_data(path)
data.head()

In [None]:
# Simple call of the preprocessing method
preprocessed_data = mkit.preprocess(data)

In [None]:
# OPTIONAL: more parameters to control the preprocessing of data

# preprocessed_data = mkit.preprocess(data, dropna=True, interpolation=False, limit=1, limit_direction="forward", inplace=False, method="linear")

# Paramters 
#  data: DataFrame to perform preprocessing on
#  dropna: Optional parameter to drop columns with  missing values for 'time' and 'animal_id'
#  interpolate: Optional parameter to perform linear interpolation
#  limit: Maximum number of consecutive NANs to fill
#  limit_direction: If limit is specified, consecutive NaNs will be filled in this direction.
#  method: Interpolation technique to use. Default is "linear".
#  order: To be used in case of polynomial interpolation.

In [None]:
# OPTIONAL: converting positional data into scale, defined by user
# preprocessed_data = mkit.convert_measueres(preprocessed_data, x_min = 0, x_max = 100, y_min = 0, y_max = 100)

In [None]:
# save cleaned features to csv 
preprocessed_data.to_csv("datasets/fish-5-cleaned.csv", index=False)

#### Support for geographic coordinates

`movekit` is able to project data from GPS coordinates in the latitude and longitude format to the cartesian coordinate system.

In [None]:
path = "./datasets/geo.csv"

# Read in file using 
geo_data = pd.read_csv(path, sep=';')
geo_data.head()

In [None]:
# convert and store in a new DataFrame
projected_data = mkit.convert_latlon(geo_data)
projected_data.head()

Often, it is helpful to normalize the data, e.g. for plotting.

In [None]:
projected_data = mkit.normalize(projected_data)