In [4]:
import pandas as pd
import torchaudio
from transformers import Wav2Vec2Processor
from tqdm import tqdm
tqdm.pandas()


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def speech_file_to_array_fn(path , target_sampling_rate):

    speech_array, sampling_rate = torchaudio.load(path)
    resampler = torchaudio.transforms.Resample(sampling_rate, target_sampling_rate)
    speech = resampler(speech_array).squeeze().numpy()
    return speech

In [3]:
model_path = "facebook/wav2vec2-large-960h"
processor = Wav2Vec2Processor.from_pretrained(model_path)
target_sampling_rate = processor.feature_extractor.sampling_rate

In [5]:
def shape(path , target_sampling_rate):
    inp = [speech_file_to_array_fn(path , target_sampling_rate)]
    result = processor(inp, sampling_rate=target_sampling_rate )
    result = result['input_values'][0]
    return result

In [7]:
df = pd.read_pickle("../data/emotion_audio_text.pkl")
df['path'][0]

'../../data/train_splits_wav/dia0_utt0.wav'

In [7]:
speech_file_to_array_fn("../data/test_splits_wav/dia0_utt0.wav" , target_sampling_rate).shape[1]

36181

In [8]:
df['audio_shape'] = df['path'].progress_apply(lambda x: shape(x[3:] , target_sampling_rate).shape[1])

100%|██████████| 13704/13704 [05:33<00:00, 41.07it/s]


In [9]:
df

Unnamed: 0,emotion,dialog,utterance,text,num_words,split,path,name,label,num_channels,audio_shape
0,0,0,0,also i was the point person on my company s tr...,19,train,../../data/train_splits_wav/dia0_utt0.wav,dia0_utt0,neutral,2,90795
1,0,0,1,you must ve had your hands full,7,train,../../data/train_splits_wav/dia0_utt1.wav,dia0_utt1,neutral,2,23552
2,0,0,2,that i did that i did,6,train,../../data/train_splits_wav/dia0_utt2.wav,dia0_utt2,neutral,2,47104
3,0,0,3,so let s talk a little bit about your duties,10,train,../../data/train_splits_wav/dia0_utt3.wav,dia0_utt3,neutral,2,44373
4,0,0,5,"now you ll be heading a whole division , so yo...",17,train,../../data/train_splits_wav/dia0_utt5.wav,dia0_utt5,neutral,2,51541
...,...,...,...,...,...,...,...,...,...,...,...
13703,6,279,2,you stole them from me !,6,test,../../data/test_splits_wav/dia279_utt2.wav,dia279_utt2,anger,6,14677
13704,6,279,3,you stole them from me ! !,7,test,../../data/test_splits_wav/dia279_utt3.wav,dia279_utt3,anger,6,19797
13705,6,279,4,gimme them !,3,test,../../data/test_splits_wav/dia279_utt4.wav,dia279_utt4,anger,6,25600
13706,6,279,7,"look , i really need some help , okay ? why ? ...",29,test,../../data/test_splits_wav/dia279_utt7.wav,dia279_utt7,anger,6,134827


In [10]:
df.to_pickle("../data/emotion_audio_text.pkl")

In [11]:
df_6 = df[df['num_channels'] == 6]

In [12]:
len(df_6) , len(df)

(12107, 13704)

In [13]:
lst = []
for val in df_6['audio_shape'].values:
    lst.append(val)
    

In [24]:
max(lst)

4879360

4879360 -> 5 minutes and 4 seconds \\
3761152 -> 3 minutes 55 seconds \\
 656725 -> 41 seconds \\
 321877 -> 20 seconds \\


In [19]:
df_time_sorted = df.sort_values('audio_shape' , ascending=False)

In [25]:
df_time_sorted

Unnamed: 0,emotion,dialog,utterance,text,num_words,split,path,name,label,num_channels,audio_shape
11396,4,38,4,"oh it s great , it s a role on",10,test,../../data/test_splits_wav/dia38_utt4.wav,dia38_utt4,joy,6,4879360
12087,5,220,0,what s that smell ?,5,test,../../data/test_splits_wav/dia220_utt0.wav,dia220_utt0,disgust,6,3761152
9943,4,309,0,kathy ! kathy ! hi ! ! kathy ! kathy ! ( she d...,29,train,../../data/train_splits_wav/dia309_utt0.wav,dia309_utt0,joy,6,656725
13321,6,74,19,"i m pregnant ross ? ross ? okay , whenever you...",25,val,../../data/val_splits_wav/dia74_utt19.wav,dia74_utt19,anger,2,456704
10195,4,485,0,"hey , this heyy",4,train,../../data/train_splits_wav/dia485_utt0.wav,dia485_utt0,joy,6,412672
...,...,...,...,...,...,...,...,...,...,...,...
10253,4,523,10,hi pheebs !,3,train,../../data/train_splits_wav/dia523_utt10.wav,dia523_utt10,joy,6,1024
12429,6,312,9,noo !,2,train,../../data/train_splits_wav/dia312_utt9.wav,dia312_utt9,anger,6,1024
1477,0,332,1,shh,1,train,../../data/train_splits_wav/dia332_utt1.wav,dia332_utt1,neutral,2,1024
790,0,179,8,sagittarius ?,2,train,../../data/train_splits_wav/dia179_utt8.wav,dia179_utt8,neutral,6,1024


In [34]:
df_time_sorted['size_padding'] = df['num_channels'] * df['audio_shape']

In [38]:
df_padding_sorted = df_time_sorted.sort_values("size_padding" , ascending=False)

In [43]:
df_padding_sorted[df_padding_sorted["size_padding"] < 1000000  ]

Unnamed: 0,emotion,dialog,utterance,text,num_words,split,path,name,label,num_channels,audio_shape,size_padding
8535,3,216,0,"hey , uh , i m really , really sorry about wha...",17,train,../../data/train_splits_wav/dia216_utt0.wav,dia216_utt0,sadness,6,166571,999426
9128,3,19,18,"i mean , well , 'cause when i was growing up ,...",43,val,../../data/val_splits_wav/dia19_utt18.wav,dia19_utt18,sadness,6,166229,997374
6246,0,243,3,"okay , very cute braces anyway y know what , the",11,test,../../data/test_splits_wav/dia243_utt3.wav,dia243_utt3,neutral,6,166229,997374
8074,2,12,6,"well , i i got this blinding pain in my stomac...",34,train,../../data/train_splits_wav/dia12_utt6.wav,dia12_utt6,fear,6,166229,997374
7650,1,4,11,"ross geller , why do i know that name ? it s u...",20,val,../../data/val_splits_wav/dia4_utt11.wav,dia4_utt11,surprise,6,166229,997374
...,...,...,...,...,...,...,...,...,...,...,...,...
7037,1,516,2,oh i m sorry ! do you need a break ?,11,train,../../data/train_splits_wav/dia516_utt2.wav,dia516_utt2,surprise,2,2048,4096
7031,1,509,14,ow ! ow ! contraction ow ow ! ow ow !,11,train,../../data/train_splits_wav/dia509_utt14.wav,dia509_utt14,surprise,2,2048,4096
10167,4,468,4,i m so glad you could make it,8,train,../../data/train_splits_wav/dia468_utt4.wav,dia468_utt4,joy,2,2048,4096
9618,4,103,6,i love hiking !,4,train,../../data/train_splits_wav/dia103_utt6.wav,dia103_utt6,joy,2,2048,4096


In [44]:
df_padding_sorted.to_pickle("../data/emotion_audio_text.pkl")

In [48]:
pd.read_pickle("/home/prsood/projects/def-whkchun/prsood/multi-modal-emotion/data/emotion_audio_text.pkl")

Unnamed: 0,emotion,dialog,utterance,text,num_words,split,path,name,label,num_channels,audio_shape,size_padding
11396,4,38,4,"oh it s great , it s a role on",10,test,../../data/test_splits_wav/dia38_utt4.wav,dia38_utt4,joy,6,4879360,29276160
12087,5,220,0,what s that smell ?,5,test,../../data/test_splits_wav/dia220_utt0.wav,dia220_utt0,disgust,6,3761152,22566912
9943,4,309,0,kathy ! kathy ! hi ! ! kathy ! kathy ! ( she d...,29,train,../../data/train_splits_wav/dia309_utt0.wav,dia309_utt0,joy,6,656725,3940350
10195,4,485,0,"hey , this heyy",4,train,../../data/train_splits_wav/dia485_utt0.wav,dia485_utt0,joy,6,412672,2476032
3411,0,757,2,"y'know , i do n't know if you 've ever looked ...",50,train,../../data/train_splits_wav/dia757_utt2.wav,dia757_utt2,neutral,6,388437,2330622
...,...,...,...,...,...,...,...,...,...,...,...,...
7037,1,516,2,oh i m sorry ! do you need a break ?,11,train,../../data/train_splits_wav/dia516_utt2.wav,dia516_utt2,surprise,2,2048,4096
7031,1,509,14,ow ! ow ! contraction ow ow ! ow ow !,11,train,../../data/train_splits_wav/dia509_utt14.wav,dia509_utt14,surprise,2,2048,4096
10167,4,468,4,i m so glad you could make it,8,train,../../data/train_splits_wav/dia468_utt4.wav,dia468_utt4,joy,2,2048,4096
9618,4,103,6,i love hiking !,4,train,../../data/train_splits_wav/dia103_utt6.wav,dia103_utt6,joy,2,2048,4096


In [5]:
pd.read_csv("/home/prsood/projects/def-whkchun/prsood/multi-modal-emotion/data/train_sent_emo.csv")

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime
0,1,also I was the point person on my companys tr...,Chandler,neutral,neutral,0,0,8,21,"00:16:16,059","00:16:21,731"
1,2,You mustve had your hands full.,The Interviewer,neutral,neutral,0,1,8,21,"00:16:21,940","00:16:23,442"
2,3,That I did. That I did.,Chandler,neutral,neutral,0,2,8,21,"00:16:23,442","00:16:26,389"
3,4,So lets talk a little bit about your duties.,The Interviewer,neutral,neutral,0,3,8,21,"00:16:26,820","00:16:29,572"
4,5,My duties? All right.,Chandler,surprise,positive,0,4,8,21,"00:16:34,452","00:16:40,917"
...,...,...,...,...,...,...,...,...,...,...,...
9984,10474,You or me?,Chandler,neutral,neutral,1038,13,2,3,"00:00:48,173","00:00:50,799"
9985,10475,"I got it. Uh, Joey, women don't have Adam's ap...",Ross,neutral,neutral,1038,14,2,3,"00:00:51,009","00:00:53,594"
9986,10476,"You guys are messing with me, right?",Joey,surprise,positive,1038,15,2,3,"00:01:00,518","00:01:03,520"
9987,10477,Yeah.,All,neutral,neutral,1038,16,2,3,"00:01:05,398","00:01:07,274"


In [2]:
from sklearn.metrics import f1_score,precision_score , recall_score
def compute_scores(truth , preds):
    f1 = f1_score(truth,preds , average='weighted' , zero_division= 1)
    prec = precision_score(truth,preds , average='weighted' , zero_division= 1)
    recall = recall_score(truth,preds , average='weighted' , zero_division= 1)
    return f1 , prec , recall

In [1]:
from torchmetrics.classification import MulticlassF1Score , MulticlassRecall , MulticlassPrecision , MulticlassAccuracy
from torchmetrics import MetricCollection
import torch
target = torch.tensor([2, 1, 0, 0])
preds = torch.tensor([2, 1, 0, 1])
metric = MulticlassF1Score(num_classes=3)
metric(preds, target)

  from .autonotebook import tqdm as notebook_tqdm


tensor(0.7778)

In [2]:
metric.update(preds , target)

In [3]:
metric.compute()

tensor(0.7778)

In [5]:
compute_scores(target , preds)


(0.75, 0.875, 0.75)

In [4]:
metric = MulticlassF1Score(num_classes=3, average=None)
metric(preds, target)

tensor([0.6667, 0.6667, 1.0000])

In [5]:
metric.update(preds , target)
metric.compute()

tensor([0.6667, 0.6667, 1.0000])

In [26]:
metric.

odict_keys([])

In [9]:
metricsMulti = MetricCollection(MulticlassF1Score(3, 1, 'none', 'global', ) , MulticlassRecall(3, 1, 'none', 'global', ) , MulticlassPrecision(3, 1, 'none', 'global', ) , MulticlassAccuracy(3, 1, 'none', 'global', ))
metricsScalar = MetricCollection(MulticlassF1Score(3, 1, 'weighted', 'global', ) , MulticlassRecall(3, 1, 'weighted', 'global', ) , MulticlassPrecision(3, 1, 'weighted', 'global', ) , MulticlassAccuracy(3, 1, 'weighted', 'global', ))

In [10]:
metricsMulti.update(preds , target)

In [11]:
metricsMulti.compute()

{'MulticlassF1Score': tensor([0.6667, 0.6667, 1.0000]),
 'MulticlassRecall': tensor([0.5000, 1.0000, 1.0000]),
 'MulticlassPrecision': tensor([1.0000, 0.5000, 1.0000]),
 'MulticlassAccuracy': tensor([0.5000, 1.0000, 1.0000])}

In [18]:
metricsMulti.update(torch.tensor([2, 0, 0, 1]) , target)
metricsMulti.compute()

{'MulticlassF1Score': tensor([0.6400, 0.6364, 0.9412]),
 'MulticlassRecall': tensor([0.5000, 1.0000, 1.0000]),
 'MulticlassPrecision': tensor([1.0000, 0.5000, 1.0000]),
 'MulticlassAccuracy': tensor([0.5000, 1.0000, 1.0000])}

Here i am just checking the multi threading GPUS computed the stuff properly before it got logged


In [12]:
preds0 = torch.Tensor([4, 1, 4, 3, 4, 4, 3, 3])
output0 = torch.Tensor([1., 0., 1., 1., 0., 6., 6., 0.])
#  on rank = 0

preds1 = torch.Tensor([4, 1, 4, 4, 3, 4, 4, 3])
output1 = torch.Tensor([0., 5., 4., 4., 0., 4., 4., 0.])
#  on rank = 1

preds2 = torch.Tensor([3, 3, 1, 1, 4, 3, 4, 4])
output2 = torch.Tensor([3., 4., 3., 6., 3., 0., 0., 0.])
#  on rank = 2

preds3 = torch.Tensor([4, 4, 4, 3, 4, 3, 1, 1])
output3 = torch.Tensor([6., 0., 4., 4., 3., 4., 3., 0.])
#  on rank = 3 

In [20]:
multiF1 = MulticlassF1Score(7, top_k = 1 , average = 'none' , multidim_average='global', ignore_index=None, validate_args=True)

In [21]:
multiF1.update(preds0, output0)
multiF1.compute()


tensor([0., 0., 0., 0., 0., 0., 0.])

In [22]:
multiF1.update(preds1, output1)
multiF1.compute()


tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.6154, 0.0000, 0.0000])

In [23]:
multiF1.update(preds2, output2)
multiF1.compute()


tensor([0.0000, 0.0000, 0.0000, 0.1818, 0.4706, 0.0000, 0.0000])

In [24]:
multiF1.update(preds3, output3)
multiF1.compute()

tensor([0.0000, 0.0000, 0.0000, 0.1333, 0.4167, 0.0000, 0.0000])

"label2id": {
    "neutral" \\
    "surprise" \\
    "fear" \\
    "sadness" \\
    "joy" \\
    "disgust" \\
    "anger" \\
  },

In [25]:
valpreds0 =  torch.Tensor([4, 4, 4, 4, 4, 4, 4, 4])
valtruth0 =  torch.Tensor([0., 1., 4., 1., 6., 0., 0., 4.])
# on rank = 0

valpreds1 =  torch.Tensor([4, 4, 4, 4, 4, 4, 4, 4])
valtruth1 =  torch.Tensor([4., 0., 4., 1., 4., 1., 6., 0.])
# on rank = 1

valpreds2 =  torch.Tensor([4, 4, 4, 4, 4, 4, 4, 4])
valtruth2 =  torch.Tensor([6., 0., 0., 0., 6., 4., 6., 1.])
# on rank = 2

valpreds3 =  torch.Tensor([4, 4, 4, 4, 4, 4, 4, 4])
valtruth3 =  torch.Tensor([4., 0., 0., 3., 1., 0., 4., 0.])
# on rank = 3

In [35]:
valmultiF1 = MulticlassF1Score(7, top_k = 1 , average = 'none' , multidim_average='global', ignore_index=None, validate_args=True)

In [36]:
valmultiF1.compute()



tensor([0., 0., 0., 0., 0., 0., 0.])

In [27]:
multiF1.update(valpreds0, valtruth0)
multiF1.compute()

tensor([0.0000, 0.0000, 0.0000, 0.1333, 0.4118, 0.0000, 0.0000])

In [28]:
multiF1.update(valpreds1, valtruth1)
multiF1.compute()


tensor([0.0000, 0.0000, 0.0000, 0.1333, 0.4444, 0.0000, 0.0000])

In [29]:
multiF1.update(valpreds2, valtruth2)
multiF1.compute()


tensor([0.0000, 0.0000, 0.0000, 0.1333, 0.4074, 0.0000, 0.0000])

In [30]:
multiF1.update(valpreds3, valtruth3)
multiF1.compute()

tensor([0.0000, 0.0000, 0.0000, 0.1250, 0.4062, 0.0000, 0.0000])

VALIDATES


In [37]:
valmultiF1.update(valpreds0, valtruth0)
valmultiF1.compute()

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.4000, 0.0000, 0.0000])

In [38]:
valmultiF1.update(valpreds1, valtruth1)
valmultiF1.compute()

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.4762, 0.0000, 0.0000])

In [39]:
valmultiF1.update(valpreds2, valtruth2)
valmultiF1.compute()

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.4000, 0.0000, 0.0000])

In [40]:
valmultiF1.update(valpreds3, valtruth3)
valmultiF1.compute()

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.4000, 0.0000, 0.0000])