# 1. Get feature curve
In this notebook we:
- Load the speech recognition data for each representation.
- We get a DPS feature curve and save for later processing.

## Setup

In [None]:
# Installs
import sys
!echo "Purging pip environment and installing packages..."
!{sys.executable} -m pip cache purge 
!{sys.executable} -m pip uninstall -y jhutils 
!{sys.executable} -m pip uninstall -y dps 
!{sys.executable} -m pip install -q git+https://github.com/jdchart/dps.git
!{sys.executable} -m pip install -q git+https://github.com/jdchart/jh-py-utils.git

# Imports
print("Importing packages...")
import os
from jhutils.local_files import read_json, collect_files
import dps
import numpy as np
import utils
import pandas as pd
print("Ready!")

## Load analyses

In [None]:
VOSK_ANALYSES = "/Users/jacob/Documents/Repos/dps/projects/data/input"

analysis_files = collect_files(VOSK_ANALYSES, ["json"])
print(f"Succesfully found {len(analysis_files)} files!")

## Analysis config
- `FPS`: frames per second in the raw curve. When loading the speech analysis and creating the 'raw curve' that represents speaking/silence, how many frames correspond to one second.
- `WINDOW_SIZE`: Number of frames in an analysis window. Should be at least the fps. When getting the DPS feature curve, window size in frames, of the rolling window that calculates DPS. Window size in seconds : WINDOW_SIZE / FPS
- `HOP_SIZE`: Size in frames of the rolling window.

In [None]:
FPS = 32
WINDOW_SIZE = 1920
HOP_SIZE = 20
DISPLAY_INTERVAL = 5

print(f"Rolling window size in seconds: {WINDOW_SIZE / FPS}")

## Display DPS feature curve
For testing purposes. Change `INDEX` to select a file from your list.

In [None]:
INDEX = 0

analysis_read = read_json(analysis_files[INDEX])
speech_recognition = dps.SpeechAnalysis(analysis_files[INDEX], fps = FPS)
dps_curve = speech_recognition.get_dps_feature_curve(WINDOW_SIZE, HOP_SIZE)

print(f"Raw curve num frames: {speech_recognition.raw_curve[0].shape}")
print(f"Media length: {analysis_read['media_length']}")
print(f"Media length in frames: {analysis_read['media_length'] * FPS}")
print(f"DPS feature curve shape: {dps_curve.shape}")

speech_recognition.display_raw_curve(0)
speech_recognition.display_raw_curve(1)
utils.display_curve(dps_curve, "time", FPS, DISPLAY_INTERVAL)

## Save feature curves

In [None]:
OUTPUT_DEST = f"/Users/jacob/Documents/Repos/dps/projects/data/output/dps_curves/dps_curve_{FPS}_{WINDOW_SIZE}_{HOP_SIZE}"

for i, file in enumerate(analysis_files):
    print(f"Treating file {i + 1}/{len(analysis_files)}")
    speech_recognition = dps.SpeechAnalysis(file, fps = FPS)
    dps_curve = speech_recognition.get_dps_feature_curve(WINDOW_SIZE, HOP_SIZE)

    os.makedirs(os.path.join(OUTPUT_DEST, "data"), exist_ok = True)
    os.makedirs(os.path.join(OUTPUT_DEST, "graphs"), exist_ok = True)

    file_name = os.path.splitext(os.path.basename(file))[0]
    np.save(os.path.join(OUTPUT_DEST, "data", f"{file_name}.npy"), dps_curve)
    utils.save_curve(os.path.join(OUTPUT_DEST, "graphs", f"{file_name}.png"), dps_curve, "time", FPS, DISPLAY_INTERVAL)

print("👍 Finished!")

## Test

In [None]:
np_files = collect_files(OUTPUT_DEST, ["npy"])
for np_file in np_files:
    loaded = np.load(np_file)
    print(f"{os.path.basename(np_file)}: {loaded.shape}")

# Process manual data

In [None]:
SOURCE_FILE = "/Users/jacob/Documents/Repos/dps/projects/data/Fusion/operate1curve/LMI_Stopwatch_20230509.csv"
df_stopwatch = pd.read_csv(SOURCE_FILE, decimal=',')

total_duration = df_stopwatch['end_time_s'].max()

wps_signal = np.full(int(np.ceil(total_duration)), np.nan)

for _, row in df_stopwatch.iterrows():
    start = int(np.floor(row['start_time_s']))
    end = int(np.ceil(row['end_time_s']))
    wps_signal[start:end] = row['wps_mean']

utils.display_curve(wps_signal)
utils.display_curve(wps_signal, "time", 1, DISPLAY_INTERVAL)

In [None]:
OUTPUT_DEST = f"/Users/jacob/Documents/Repos/dps/projects/data/output/dps_curves/dps_curve_LMI_Stopwatch_20230509"
os.makedirs(os.path.join(OUTPUT_DEST, "data"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DEST, "graphs"), exist_ok=True)
np.save(os.path.join(OUTPUT_DEST, "data", "LMI_Stopwatch_20230509.npy"), wps_signal)
utils.save_curve(os.path.join(OUTPUT_DEST, "graphs", "LMI_Stopwatch_20230509.png"), wps_signal, "time", 1, DISPLAY_INTERVAL)