# Usage Examples

These are some examples on how to read and write TSDF data into and from a numpy array, using the `tsdf` library.

## How to run these examples

### Imports


In [1]:
import os
import numpy as np
import tsdf

/Users/peter/miniforge3/lib/python3.9/site-packages/tsdf


### Set data location

In [2]:
data_dir = "../tests/data"

## Process an existing binary file and write the new data
Read and process an existing binary data (accompanied by the TSDF metadata), process the data and save it in the new format, with the corresponding TSDF metadata file.

### Load dummy data

In [3]:
# The name of the data
data_name = "example_10_3_int16"

# Multiple metadata files (for each binary) are loaded into a dictionary
metadata_dict = tsdf.load_metadata_from_path(f"{data_dir}/{data_name}_meta.json")

# Retrieve the metadata object we want, using the name of the binary as key
metadata = metadata_dict[f"{data_name}.bin"]

# Load the data
data = metadata.load_binary()

# Print some info
print(f"Data type used for storing:\t {data.dtype}")
print(f"Data dimensions:\t\t {data.shape}")
print(f"Number of rows:\t\t\t {data.shape[0]}")

Data type used for storing:	 int16
Data dimensions:		 (10, 3)
Number of rows:			 10


### Perform basic data processing

In [4]:
# Perform an operation, resulting in a different data type
processed_data_1 = (data / 10).astype('float32')

# Print some info
print(f"Data type used for storing:\t {processed_data_1.dtype}")
print(f"Data dimensions:\t\t {processed_data_1.shape}")
print(f"Number of rows:\t\t\t {processed_data_1.shape[0]}")

Data type used for storing:	 float32
Data dimensions:		 (10, 3)
Number of rows:			 10


### Write the processed data 
Write the processed data in binary format. The call returns the corresponding metadata object.

In [5]:
# The new name of the file
processed_data_name_1 = "tmp_test_example_10_3_int16_to_float32"

# Write the data to a new binary file
processed_metadata_1 = tsdf.write_binary_file(
        data_dir,
        f"{processed_data_name_1}.bin",
        processed_data_1,
        metadata.get_plain_tsdf_dict_copy(),
    )

### Write the TSDF metadata file

In [6]:
# Write new metadata file
tsdf.write_metadata([processed_metadata_1], f"{processed_data_name_1}_meta.json")

### Write a metadata file that combines multiple binary files

In [7]:
# Preprocess the original data to generate another data source
processed_data_2 = (data * 1000).astype("int32")

# Adjust the metadata slightly
updated_metadata = metadata.get_plain_tsdf_dict_copy()
updated_metadata.pop("scale_factors")  # remove the 'scale_factors'

# Save the new binary file
processed_data_name_2 = "tmp_test_example_10_3_int16_to_int32"
processed_metadata_2 = tsdf.write_binary_file(
    data_dir,
    f"{processed_data_name_2}_.bin",
    processed_data_2,
    updated_metadata,
)

# Write a metadata file that combines the two binary files
tsdf.write_metadata(
    [processed_metadata_1, processed_metadata_2],
    "tmp_test_example_10_3_int16_to_int_n_float_meta.json",
)

## Generate and save data from scratch

In [8]:
# Generate random data
rs = np.random.RandomState(seed=42)
data_1 = rs.rand(17, 1).astype(np.float32)
data_2 = rs.rand(15, 2).astype(np.int16)
data_3 = rs.rand(10, 3).astype(np.int16)

# Define the metadata
new_metadata = {
    "subject_id": "example",
    "study_id": "example",
    "device_id": "example",
    "endianness": "little",
    "metadata_version": "0.1",
    "start_datetime_unix_ms": 1571135957025,
    "start_iso8601": "2019-10-15T10:39:17.025000+00:00",
    "end_datetime_unix_ms": 1571168851826,
    "end_iso8601": "2019-10-15T19:47:31.826000+00:00",
    "channels": ["x", "y", "z"],
    "units": ["m/s/s", "m/s/s", "m/s/s"]
}

# Write the three binary files based on the provided metadata
file_prefix = "tmp_test"
new_meta_1 = tsdf.write_binary_file(data_dir, f"{file_prefix}_1.bin", data_1, new_metadata)
new_meta_2 = tsdf.write_binary_file(data_dir, f"{file_prefix}_2.bin", data_2, new_metadata)
new_meta_3 = tsdf.write_binary_file(data_dir, f"{file_prefix}_3.bin", data_3, new_metadata)

# Write the metadata file, which references the three binary files
tsdf.write_metadata([new_meta_1, new_meta_2, new_meta_3], f"{file_prefix}_meta.json")

## Transform legacy (TSDB) format to the current TSDF v0.1
Transform one file (or all files within the given directory) from TSDB to TSDF format.

In [9]:
from tsdf.legacy_tsdf_utils import (
    generate_tsdf_metadata_from_tsdb,
    convert_file_tsdb_to_tsdf,
    convert_files_tsdb_to_tsdf,
)

# Path to the metadata file
path_to_file = os.path.join(data_dir, "ppp_format_meta_legacy.json")
path_to_new_file = os.path.join(data_dir, "tmp_ppp_format_meta.json")

# Generate a TSDF metadata file from TSDB
generate_tsdf_metadata_from_tsdb(path_to_file, path_to_new_file)

# Convert a TSDB metadata file to TSDB format
# convert_metadata_tsdb_to_tsdf(path_to_file)

# Convert all metadata files in the directory from TSDB to TSDF format
# convert_metadatas_tsdb_to_tsdf(path_to_dir)

## Validate TSDF file

Files can be validated using the validator module, which is also callable from the command line. The validator checks the metadata file and inspects whether the binary file is consistent with the metadata. This snippet shows how to use the validator from code.

In [10]:
# Import the validator
from tsdf import validator

# Verify the metadata file
path_to_metadata_file = os.path.join(data_dir, "ppp_format_meta.json")
validator.validate_tsdf_format(path_to_metadata_file)

Successfully loaded binary file ppp_format_time.bin, resulting shape: (17,)
Successfully loaded binary file ppp_format_samples.bin, resulting shape: (17, 6)
