In [6]:
import pandas as pd

from emonet import DATA_DIR, THERAPISTS
from emonet.data_prep import from_json, to_records

In [160]:
meta = from_json(DATA_DIR.joinpath('metadata.json'))
labels = pd.DataFrame.from_records(to_records(meta))
labels

Unnamed: 0,key,duration,therapist,fear,anger,happy,neutral,sadness
0,1173_GM1001_1326493712,75.324687,Michelle Lyn,low,low,medium,none,low
1,1173_GM1001_434684536,25.440000,Michelle Lyn,high,none,low,medium,none
2,1173_KH1001_3032271714,57.474000,Michelle Lyn,low,low,medium,none,low
3,1173_KH1001_765576337,62.180687,Michelle Lyn,low,low,none,none,none
4,1173_NM1001_2147987829,51.924000,Michelle Lyn,low,high,none,low,medium
...,...,...,...,...,...,...,...,...
4203,9926_39117_852367505,36.360000,Yared Alemu,medium,medium,none,none,medium
4204,9926_39117_896761626,34.307375,Yared Alemu,medium,low,none,none,medium
4205,9926_39117_941005323,38.820000,Yared Alemu,medium,medium,none,none,high
4206,9926_39117_948319469,36.948687,Yared Alemu,high,medium,none,none,high


In [161]:
def rated_same(meta, key, emotion, quorum=3):
    raters = [t for t in THERAPISTS if t in meta[key].keys()]
    if len(raters) > quorum:
        return len(set([meta[key].get(t, 'n/a')[emotion] for t in raters])) == 1
    return False

In [162]:
rated_same(meta, '1173_GM1001_1326493712', 'anger')

False

In [163]:
def get_consensus_keys(meta, emotion, quorum=3):
    keys = list(meta.keys())
    agree = [rated_same(meta, k, emotion, quorum) for k in keys]
    return [k for i, k in enumerate(keys) if agree[i]]
    

In [164]:
get_consensus_keys(meta, 'anger')

['4205_39117_3395725370', '4403_39117_1568678400', '54205_53113_1587945600']

In [165]:
def consensus(meta, quorum=3):
    consensus = {}
    for emotion in ['anger', 'fear', 'sadness']:
        consensus[emotion] = get_consensus_keys(meta, emotion, quorum)
    return consensus

In [176]:
con2 = consensus(meta, 2)
con2

{'anger': ['1940_39117_3957491636',
  '4205_39117_3395725370',
  '4403_39117_1568678400',
  '54205_53113_1587945600',
  '6531_53113_1588204800',
  '6531_53113_1588809600',
  '8161_39117_1219643178',
  '8161_39117_3344890079',
  '8161_39117_3355640551',
  '8161_39117_3423956819',
  '8161_39117_592551866'],
 'fear': ['150648_35465_1569974400',
  '3789_53113_1587081600',
  '4205_39117_2876564785',
  '4205_39117_3390161853',
  '4403_39117_1593388800',
  '4644_53113_1596412800',
  '4645_53113_1603670400',
  '46791_53796_1568332800',
  '52756_53113_1576454400',
  '52756_53113_1577059200',
  '54205_53113_1587945600',
  '55119_53113_1570492800',
  '6461_39117_1075533122',
  '6461_39117_3385805924',
  '6461_39117_541825201',
  '6461_39117_744390683',
  '7010_53113_1574294400',
  '8161_39117_1219643178',
  '8161_39117_2379070509',
  '8161_39117_2794214016',
  '8161_39117_3355640551',
  '8161_39117_33944129',
  '8161_39117_863047773',
  '8646_53113_4289373464',
  '8953_39117_1631602027',
  '8953_

In [167]:
def grab_score(meta, key):
    for k, v in meta.items():
        if k == key:
            return v
        if isinstance(v, dict):
            result = grab_score(v, key)
            if isinstance(result, str):
                return result

In [168]:
def grab_score_gen(meta, key):
    for k, v in meta.items():
        if k == key:
            if isinstance(v, str):
                yield v
        if isinstance(v, dict):
            for result in grab_score(v, key):
                yield result

In [187]:
scores = [grab_score(meta[key], 'anger') for key in con2['anger']]

In [188]:
len(scores)

11

In [189]:
len(meta)

1052

In [190]:
scores

['none',
 'low',
 'low',
 'none',
 'none',
 'none',
 'low',
 'medium',
 'low',
 'low',
 'low']

**There are no high scores (consensus=2) so label propagation is useless.**

<hr>

In [68]:
wav2vec = from_json(DATA_DIR.joinpath('wav2vec_splits.json'))

In [152]:
stems = set([x.split('--')[0] for x in wav2vec['train']['keys']])
anger_keys = [x for x in stems if x in consensus(meta, 2)['anger']]
anger_keys

['8161_39117_3355640551',
 '6531_53113_1588204800',
 '8161_39117_1219643178',
 '4205_39117_3395725370',
 '8161_39117_3423956819',
 '54205_53113_1587945600',
 '6531_53113_1588809600',
 '8161_39117_592551866',
 '4403_39117_1568678400']

In [158]:
meta[anger_keys[1]]

{'file_path': 'vad_wavs/6531_53113_1588204800.wav',
 'file_name': '6531_53113_1588204800.wav',
 'sample_rate': 16000,
 'duration': 34.885375,
 'stem': '6531_53113_1588204800',
 'Sedara Burson': {'fear': 'none',
  'anger': 'none',
  'happy': 'none',
  'neutral': 'high',
  'sadness': 'none'},
 'Michelle Lyn': {'fear': 'none',
  'anger': 'none',
  'happy': 'medium',
  'neutral': 'high',
  'sadness': 'none'},
 'Yared Alemu': {'fear': 'low',
  'anger': 'none',
  'happy': 'none',
  'neutral': 'none',
  'sadness': 'low'},
 'avg_score': {'anger': 0.0,
  'fear': 0.3333333333333333,
  'sadness': 0.3333333333333333}}