In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import requests #type: ignore
import base64
import pandas as pd
import numpy as np
import json
from joblib import load # type: ignore
import sys;sys.path.append('../../')
from listener_effort_api import config
from listener_effort_api.utils import get_logger
logger = get_logger()

def load_wav_file(file_path):
    with open(file_path, 'rb') as f:
        return f.read()

### Load model

In [3]:
training_study = 'Speech_study' # 'Radcliff' # 'Speech_study' # 'Prilenia'
test_study = 'Prilenia'
model_name = 'LinearRegression05'

In [4]:
# Load model's metadata
model_metadata_save_name = f'{training_study}_{model_name}_model_metadata'
with open(f'{config.Models.models_path}/{model_metadata_save_name}.json', 'r') as f:
    model_metadata = json.load(f)

# Load model
regressor = load(f'{config.Models.models_path}/{training_study}_{model_name}_model.joblib')

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [5]:
# predictions_save_name = '../predictions/Speech_study_on_Prilenia_LinearRegression05.csv'
# df_run = pd.read_csv(predictions_save_name)
# df_run

### Get original predictions

In [6]:
average_feature_by_session = model_metadata['average_feature_by_session']
target_by_session = model_metadata['target_by_session']
features_train = model_metadata['features_train']

In [15]:
def string_to_np_array(s):
    if pd.isna(s):
        return np.nan
    # Removing brackets
    s = s.strip('[]')
    # Splitting string into list of strings
    s_list = s.split()
    # Converting list of strings into list of floats
    float_list = [float(i) for i in s_list]
    # Converting list of floats into numpy array
    np_array = np.array(float_list)
    return np_array

### Load dataset
path = f'../datasets/{test_study}_dataset_with_features_and_le.csv'
df = pd.read_csv(path)
df['effort_list'] = df['effort_list'].apply(string_to_np_array)
df['effort_list_session'] = df['effort_list_session'].apply(string_to_np_array)
print(df.shape)

(79062, 31)


In [16]:
np.random.seed(0)
print(df.shape)

### Select all wavs with SLPs labels
df_with_slps = df.dropna(subset=['effort_mean']).copy()

### Select three wavs without SLPs labels that are not in the same session as wavs with SLPs labels
df_without_slps = df[df.effort_mean.isna()].copy()

### This discards sessions in common with SLPs
df_without_slps = df_without_slps.query('session_id not in @df_with_slps.session_id')

### Filter by task
available_tasks = [f'sentenceRotating{i}' for i in range(1,6)]
df_without_slps = df_without_slps.query('task_name in @available_tasks')

### We only keep sessions with at least three tasks
use_sessions = df_without_slps.groupby('session_id').size()
use_sessions = use_sessions[use_sessions >= 3].index
df_without_slps = df_without_slps.query('session_id in @use_sessions')
df_without_slps = df_without_slps.groupby('session_id').sample(3)
df = pd.concat([df_with_slps, df_without_slps])

# df = df.query('task_name in @select_tasks')
print(df.shape)

(79062, 31)
(15783, 31)


In [17]:
# Drop WER > 0.8 # WARNING: this also drops SLPs for that audio
df = df.query('WER_large_v2 < 0.8') ### This guarantees feature quality
print(df.shape)

(14270, 31)


In [18]:
print(df.shape)
df.dropna(subset=features_train, inplace=True)
print(df.shape)

(14270, 31)
(14268, 31)


In [19]:
### B. Feature aggregation
if average_feature_by_session:
    df_features_by_session = df.groupby('session_id')[features_train].mean().reset_index()
    df_tasks_by_session = df.groupby('session_id').size().reset_index().rename(columns={0: 'n_tasks'})
    df_features_by_session = df_features_by_session.merge(df_tasks_by_session, on='session_id')
    df_run = df_features_by_session.copy()
else:
    df_features = df[['user_id', 'session_id', 'wav_path'] + features_train].copy()
    df_run = df_features.copy()

print(df_run.shape)

(4907, 4)


In [20]:
if target_by_session:
    target_name = 'effort_mean_session'
else:
    target_name = 'effort_mean'

In [21]:
target_name_predicted = f'{target_name}_predicted'
df_run[target_name_predicted] = regressor.predict(df_run[features_train])
print(df_run.shape)

(4907, 5)


### Do our own predictions

In [34]:
# session_id = df_run.query('n_tasks==3').sample(1).session_id.values[0]
session_id = df_run.query('n_tasks==3').sort_values('effort_mean_session_predicted').iloc[-1].session_id
print(session_id)
print('Effort predicted: ', df_run.query('session_id==@session_id').effort_mean_session_predicted.values[0])
display(df_run.query('session_id==@session_id'))

DB52F0FE-5902-4CAF-AF9A-C0DBE3111D5F
Effort predicted:  104.99297384292863


Unnamed: 0,session_id,whisper_confidence_base,speaking_rate_large_v2,n_tasks,effort_mean_session_predicted
4386,DB52F0FE-5902-4CAF-AF9A-C0DBE3111D5F,0.333191,0.579095,3,104.992974


In [35]:
import base64
import requests

def load_wav_file(file_path):
    with open(file_path, 'rb') as f:
        return f.read()

payload = {
    "input": [
        {
            "audios": [
                {
                   "wav": base64.b64encode(load_wav_file(df.query('session_id==@session_id').wav_path.values[0])).decode(),
                    "transcript": "The forest near my grandpa's cabin is said to contain mythical creatures."
                },
                {
                    "wav": base64.b64encode(load_wav_file(df.query('session_id==@session_id').wav_path.values[1])).decode(),
                    "transcript": None
                },
                {
                    "wav": base64.b64encode(load_wav_file(df.query('session_id==@session_id').wav_path.values[2])).decode(),
                    "transcript": None
                }
            ]
        }
    ]
}

response = requests.post('http://localhost:8000/v1/listener-effort', json=payload)
print(response.status_code)
response.json()

200


{'status': 'ok',
 'result': [{'status': 'ok',
   'listener_effort': 100.0,
   'listener_effort_stddev': 7.322407086303299,
   'audio_results': [{'status': 'ok', 'listener_effort': 84.46682888399953},
    {'status': 'ok', 'listener_effort': 100.0},
    {'status': 'ok', 'listener_effort': 100.0}]}]}