In [1]:
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch
from torch.utils.data import Dataset, DataLoader
from torch import optim
from torch.optim import lr_scheduler
import torchmetrics
import numpy as np
import collections
import json
from tqdm.auto import tqdm, trange
import matplotlib.pyplot as plt
from transformers import AutoConfig, AutoTokenizer, BertModel, RobertaModel


ModuleNotFoundError: No module named 'torchmetrics'

In [2]:
import sys
import os
sys.path.append(f'{os.getcwd()}/SentEval')
PATH_TO_DATA = f'{os.getcwd()}/SentEval/data'

# Import SentEval
import senteval

In [3]:
# https://github.com/huggingface/transformers/issues/5486
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
with open('tasks.json', 'r') as f:
    tasks = json.load(f)

In [6]:
tasks

{'CrowdFlower': 13,
 'DailyDialog': 7,
 'EmoBank_Valence': 1,
 'EmoBank_Arousal': 1,
 'EmoBank_Dominance': 1,
 'HateOffensive': 3,
 'PASTEL_age': 8,
 'PASTEL_country': 2,
 'PASTEL_education': 10,
 'PASTEL_ethnic': 10,
 'PASTEL_gender': 3,
 'PASTEL_politics': 3,
 'PASTEL_tod': 5,
 'SARC': 2,
 'SarcasmGhosh': 2,
 'SentiTreeBank': 1,
 'ShortHumor': 2,
 'ShortJokeKaggle': 2,
 'ShortRomance': 2,
 'StanfordPoliteness': 1,
 'TroFi': 2,
 'VUA': 2}

In [64]:
tasks = {'EmoBank_Valence': 1,
}

# multi-task dataloader

In [65]:
class MyDataset(Dataset): 
    # currently it's a Mapping-style dataset. Not sure if a Iterable-style dataset will be better
    def __init__(self, tsv_file):
        self.df = pd.read_csv(tsv_file, sep='\t')
        self.df = self.df.dropna()
        self.df = self.df.reset_index(drop=True)
        if self.df['label'].dtype == 'float64':
            self.df['label'] = self.df['label'].astype('float32')
            
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        dataslice = self.df.iloc[idx]
        sample = {'text':dataslice['text'], 'label':dataslice['label']}
        return sample


In [66]:
class MultiTaskTrainDataLoader():
    '''
    Each time, a random integer selects a dataset and load a batch of data {text, label} from it. Return i_task and data
    
    a iterator
    Known issue: large dataset may have not iterate once, small datasets may have been iterated many times
    '''
    
    def __init__(self, tasks, batch_size, shuffle, num_workers):
        self.tasks = tasks
        self.split = 'train'
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_workers = num_workers
        
        self.num_tasks = len(tasks)
        self.datasets = []
        self.dataloaders = []
        self.dataloaderiters = []
        self.len = 0
        for task in tasks:
            self.datasets.append(MyDataset('./processed/'+self.split+'/'+task+'.tsv'))
            self.dataloaders.append(DataLoader(self.datasets[-1], batch_size=self.batch_size, shuffle=self.shuffle, num_workers=self.num_workers)) 
            self.dataloaderiters.append(self.dataloaders[-1]._get_iterator())
            self.len += len(self.dataloaders[-1])
    def __len__(self):   
        return self.len

    def __iter__(self):
        self.n = 0
        return self
    
    def __next__(self):
        i_task = np.random.randint(self.num_tasks)
        if self.n < self.len:
            self.n += 1
        else:
            raise StopIteration
        dataloaderiter = self.dataloaderiters[i_task]    
        try: 
            batch = next(dataloaderiter)
        except StopIteration:
#             self.dataloaderiters[i_task]._reset(self.dataloaders[i_task])
#             dataloaderiter = self.dataloaderiters[i_task]
            self.dataloaderiters[i_task] = iter(self.dataloaders[i_task])
            dataloaderiter = self.dataloaderiters[i_task]
            batch = next(dataloaderiter)
        return i_task, batch

In [67]:
class MultiTaskTestDataLoader():
    '''
    For dev and test
    
    a generator
    '''
    
    def __init__(self, tasks, split, batch_size, shuffle, num_workers):
        assert split in ['dev', 'test'], 'not implemented'
        self.tasks = tasks
        self.split = split
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_workers = num_workers
        
        self.num_tasks = len(tasks)
        self.datasets = []
        self.dataloaders = []
        self.dataloaderiters = []
        self.len = 0
        for task in tasks:
            self.datasets.append(MyDataset('./processed/'+self.split+'/'+task+'.tsv'))
            self.dataloaders.append(DataLoader(self.datasets[-1], batch_size=self.batch_size, shuffle=self.shuffle, num_workers=self.num_workers))
            self.len += len(self.dataloaders[-1])
        self.i_task = 0
    def __len__(self):   
        return self.len

    def __iter__(self):
        for i_task in range(self.num_tasks):
            dataloader = self.dataloaders[i_task]
            for batch in dataloader:
                yield i_task, batch
    
        

In [68]:
def validate(mt_val_dataloader):
    val_loss = collections.defaultdict(float)
    val_size = collections.defaultdict(int)
    acc = torchmetrics.Accuracy() # todo
    mt_model.eval()
    for data in tqdm(mt_val_dataloader):  
        i_task, batch = data
        label = batch['label'].to(device)
        size = len(label)
        del batch['label']
        tokens = tokenizer(**batch, return_tensors='pt', padding=True, truncation=True, max_length=64).to(device)
        output, loss = mt_model(**tokens, i_task=i_task,  label=label)
        tokens = None
        output = None
        val_loss[i_task] += loss.detach().item()*size
        val_size[i_task] += size
    for i_task in val_loss:
        val_loss[i_task] /= val_size[i_task]
    mt_model.train()

    return val_loss


# multi-task model

In [69]:
from transformers.models.bert.modeling_bert import BertPreTrainedModel
from transformers.models.roberta.modeling_roberta import RobertaPreTrainedModel

In [70]:
class RegressionHead(nn.Module):
    def __init__(self, embedding_dim = 768, hidden_dim = 128):
        super().__init__()
#         self.hidden1 = nn.Linear(embedding_dim, hidden_dim)
#         self.hidden2 = nn.Linear(hidden_dim, 1)
        self.hidden = nn.Linear(embedding_dim, 1)
        self.activation = nn.Sigmoid()
        
        self.loss_fn = nn.MSELoss()
    def forward(self, sent_emb, label):
#         output = self.activation(self.hidden2(self.hidden1(sent_emb))).squeeze(1)
        output = self.activation(self.hidden(sent_emb)).squeeze(1)
        
        loss = self.loss_fn(output, label)
        return output, loss

In [71]:
class ClassificationHead(nn.Module):
    def __init__(self, num_labels, embedding_dim = 768, hidden_dim = 128):
        super().__init__()
#         self.hidden1 = nn.Linear(embedding_dim, hidden_dim)
#         self.hidden2 = nn.Linear(hidden_dim, num_labels)
        self.hidden = nn.Linear(embedding_dim, num_labels)
        self.activation = nn.Tanh()
        
        self.loss_fn = nn.CrossEntropyLoss()
    def forward(self, sent_emb, label):
        output = self.activation(self.hidden2(self.hidden1(sent_emb)))
        
        loss = self.loss_fn(output, label)
        return output, loss

In [72]:
class MultiTaskBert(BertPreTrainedModel):
    def __init__(self, config, tasks, use_pooler=True):
        super().__init__(config)
        self.use_pooler = use_pooler
        self.basemodel = BertModel(config)
        self.style_heads = nn.ModuleList()
        for task in tasks:
            if tasks[task] == 1:
                self.style_heads.append(RegressionHead())
            else:
                self.style_heads.append(ClassificationHead(tasks[task]))
    def forward(self, input_ids, token_type_ids, attention_mask, i_task=None, label=None, return_sent_emb=False):
        output = self.basemodel(input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)
        if self.use_pooler:
            sent_emb = output['pooler_output']
        else:
            sent_emb = output['last_hidden_state'][:,0,:]
        
        if return_sent_emb:
            return sent_emb  
        output, loss = self.style_heads[i_task](sent_emb, label)
        return output, loss

In [73]:
class MultiTaskRoberta(RobertaPreTrainedModel):
    def __init__(self, config, tasks, use_pooler=True):
        super().__init__(config)
        self.use_pooler = use_pooler
        self.basemodel = RobertaModel(config)
        self.style_heads = nn.ModuleList()
        for task in tasks:
            if tasks[task] == 1:
                self.style_heads.append(RegressionHead())
            else:
                self.style_heads.append(ClassificationHead(tasks[task]))
    def forward(self, input_ids, attention_mask, i_task=None, label=None, return_sent_emb=False):
        output = self.basemodel(input_ids=input_ids, attention_mask=attention_mask)
        if self.use_pooler:
            sent_emb = output['pooler_output']
        else:
            sent_emb = output['last_hidden_state'][:,0,:]
        if return_sent_emb:
            return sent_emb
        
        output, loss = self.style_heads[i_task](sent_emb, label)
        
        return output, loss, sent_emb

# train

In [74]:
def print_loss(losses):
    for k in losses:
        print(f'{losses[k]:4.2f}', end=' ')
    print('')

In [75]:
# larger batch_size will definitely lead to memory issue
mt_dataloader = MultiTaskTrainDataLoader(tasks, batch_size = 16, shuffle = True, num_workers = 4)
mt_dev_dataloader = MultiTaskTestDataLoader(tasks, split='dev', batch_size = 16, shuffle = True, num_workers = 4)
mt_test_dataloader = MultiTaskTestDataLoader(tasks, split='test', batch_size = 16, shuffle = True, num_workers = 4)

In [89]:
base_model = "bert-base-uncased"
# base_model = 'roberta-base'

config = AutoConfig.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model)

mt_model = MultiTaskBert(config, tasks, use_pooler=False).to(device)
# mt_model = MultiTaskRoberta(config, tasks).to(device)


RuntimeError: CUDA out of memory. Tried to allocate 90.00 MiB (GPU 0; 7.92 GiB total capacity; 6.41 GiB already allocated; 90.62 MiB free; 6.57 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [77]:
optimizer = optim.AdamW(mt_model.parameters(), lr=0.03)
scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=0.03, total_steps=len(mt_dataloader)) 

In [78]:
for param in mt_model.basemodel.parameters():
    param.requires_grad = False

In [79]:
losses = collections.defaultdict(list)
df_dev = pd.DataFrame(columns=np.arange(0,len(tasks)))
training_acc = torchmetrics.Accuracy() 


test_embs = []

for i_iter, data in enumerate(tqdm(mt_dataloader)):  
    if i_iter == 1000:
        for param in mt_model.basemodel.parameters():
            param.requires_grad = True
    i_task, batch = data
    optimizer.zero_grad()
    label = batch['label'].to(device)
    del batch['label']
    tokens = tokenizer(**batch, return_tensors='pt', padding=True, truncation=True, max_length=64).to(device)
    output, loss = mt_model(**tokens, i_task=i_task,  label=label)
    loss.backward()
    optimizer.step()
    scheduler.step()
    losses[i_task].append(loss.detach().item())
    
    if i_iter in [0,10,100,200,210,300,400,500]:
        sent_emb = mt_model(**tokens, i_task=i_task,  label=label, return_sent_emb=True)
        test_embs.append(sent_emb)
#     if i_iter > 500:
#         break
    if i_iter%300 == 0 and i_iter != 0:
        dev_loss = validate(mt_dev_dataloader)
        df_dev = df_dev.append(dev_loss , ignore_index=True)
        print(f'#####training iter {i_iter}/{len(mt_dataloader)}')
        print_loss(dev_loss)

  0%|          | 0/551 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

#####training iter 300/551
0.28 


In [80]:
mt_model.eval()

MultiTaskBert(
  (basemodel): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine

In [81]:
output = mt_model.basemodel(**tokens)

In [83]:
output['last_hidden_state'][:,0,:]

tensor([[ 0.2252, -0.1909, -0.8188,  ..., -0.5485, -1.8978,  1.4473],
        [ 0.4681, -0.0052, -0.6890,  ..., -0.4932, -2.0251,  1.4433],
        [ 0.4378, -0.3210, -0.4945,  ..., -0.6542, -2.0153,  1.2084],
        ...,
        [ 0.6605, -0.3394, -0.4702,  ..., -0.1526, -1.6630,  1.5776],
        [ 0.3609, -0.3827, -0.8274,  ..., -0.6290, -1.8401,  1.2784],
        [ 0.2989, -0.2560, -0.4781,  ..., -0.4163, -1.9857,  1.4385]],
       device='cuda:0')

In [85]:
label

tensor([0.2188, 0.3438, 0.5000, 0.3125, 0.6375, 0.5000, 0.4688, 0.5312, 0.5938,
        0.5000, 0.5938, 0.4688, 0.5312, 0.5344, 0.5938, 0.3750],
       device='cuda:0')

In [84]:
mt_model.style_heads[0](output['last_hidden_state'][:,0,:], label)

(tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        device='cuda:0', grad_fn=<SqueezeBackward1>),
 tensor(0.2813, device='cuda:0', grad_fn=<MseLossBackward0>))

In [86]:
logits, loss = mt_model(**tokens, i_task=i_task,  label=label)

In [88]:
logits, loss

(tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        device='cuda:0', grad_fn=<SqueezeBackward1>),
 tensor(0.2813, device='cuda:0', grad_fn=<MseLossBackward0>))

In [24]:
label

tensor([ 8, 12, 12, 12,  7, 10,  7, 10, 12, 10], device='cuda:0')

In [23]:
sent_emb = mt_model(**tokens, i_task=i_task,  label=label, return_sent_emb=True)
sent_emb

tensor([[ 5.0471e-01,  1.2167e+00, -4.7918e-01,  ..., -1.3093e-01,
         -4.7092e-03,  1.0579e+00],
        [ 4.8265e-01,  1.1955e+00, -4.8833e-01,  ..., -9.8464e-02,
         -2.6491e-02,  1.0363e+00],
        [ 4.9495e-01,  1.2084e+00, -4.8237e-01,  ..., -8.0255e-02,
          1.3555e-02,  1.0508e+00],
        ...,
        [ 5.0307e-01,  3.3286e-01, -4.7845e-01,  ..., -1.3443e-01,
          1.0501e-02,  1.0487e+00],
        [ 5.2614e-01,  1.2204e+00, -4.6653e-01,  ..., -7.9352e-02,
         -9.1928e-03,  1.0598e+00],
        [ 5.0942e-01,  1.2224e+00, -4.7927e-01,  ..., -1.2814e-01,
          1.0450e-03,  1.0637e+00]], device='cuda:0', grad_fn=<SliceBackward0>)

In [23]:
test_embs

[tensor([[ 0.5282, -0.6056,  0.0627,  ...,  1.1953, -0.4895, -1.3894],
         [-1.1636, -0.2303, -0.3400,  ...,  1.1154, -0.4572, -0.1489],
         [ 0.8764, -0.8679, -0.4821,  ...,  0.5399, -0.7865, -0.8485],
         ...,
         [-0.5016,  0.7449,  0.5383,  ...,  1.0368,  0.0035, -0.8615],
         [ 0.3498, -0.2952, -0.1204,  ...,  1.7765, -0.3754, -0.4325],
         [ 0.0862, -0.0140,  0.3767,  ...,  0.8000, -0.8127, -0.0237]],
        device='cuda:0'),
 tensor([[ 0.2193, -0.6518,  0.9127,  ..., -0.8741, -0.2050, -1.2656],
         [-0.3398, -0.1049,  0.5103,  ...,  1.6578, -0.6212, -0.9448],
         [ 0.0572, -1.1511,  0.3264,  ...,  1.1537, -1.6816, -0.7146],
         ...,
         [ 0.8545, -0.3801,  0.1675,  ...,  1.0831, -0.4000, -0.0018],
         [ 0.1369, -0.5419, -0.0932,  ...,  0.6743, -0.0577, -1.0107],
         [ 1.2012, -0.2522,  0.3095,  ...,  1.0965, -1.0189, -0.6714]],
        device='cuda:0'),
 tensor([[ 0.3787, -0.5289,  0.9931,  ...,  0.2567, -0.1725, -1.45

torch metrics
https://torchmetrics.readthedocs.io/en/stable/pages/overview.html

In [51]:
metric = torchmetrics.Accuracy()

In [60]:
acc = metric(torch.softmax(output, dim=-1).cpu(), torch.LongTensor([6, 7,  7, 12,  5, 12, 10,  5,  8,  8, 8, 8,  9,  5,  7, 3]))
acc.item()

0.1875

In [63]:
total_train_accuracy = metric.compute()

In [65]:
total_train_accuracy

tensor(0.0833)

In [22]:
df_dev.columns = tasks.items()
df_dev

Unnamed: 0_level_0,CrowdFlower,DailyDialog,EmoBank_Valence,EmoBank_Arousal,EmoBank_Dominance,HateOffensive,PASTEL_age,PASTEL_country,PASTEL_education,PASTEL_ethnic,...,PASTEL_tod,SARC,SarcasmGhosh,SentiTreeBank,ShortHumor,ShortJokeKaggle,ShortRomance,StanfordPoliteness,TroFi,VUA
Unnamed: 0_level_1,13,7,1,1,1,3,8,2,10,10,...,5,2,2,1,2,2,2,1,2,2
0,2.312776,0.832633,0.01766,0.064081,0.007568,0.701575,1.832837,0.693147,2.570806,1.628639,...,1.576885,1.125411,0.200766,0.065751,0.693147,0.693147,0.693147,0.012265,0.693147,0.693147
1,2.312776,0.832633,0.035017,0.012292,0.02621,0.701575,1.832837,0.693147,2.570806,1.628639,...,1.576885,1.125411,0.200766,1.380987,0.693147,0.693147,0.693147,0.023418,0.693147,0.693147
2,2.312776,0.832633,0.016358,0.056178,0.019477,0.701575,1.832837,0.693147,2.570802,1.628639,...,1.576885,1.125411,0.200766,0.075151,0.693147,0.693147,0.693147,0.01274,0.693147,0.693147
3,3.281408,3.575869,10356.68317,3.35056,202.949844,2.296593,2.876076,0.693147,2.906558,1.622863,...,1.576885,0.693147,0.693147,0.243174,0.693147,0.693147,0.693147,117.78529,0.693147,1.586025
4,3.19974,3.575869,0.013024,0.009631,0.009826,2.296593,2.929388,0.693147,2.869238,1.804569,...,1.576885,0.693147,0.693147,0.073756,0.693147,0.693147,0.693147,0.012265,0.693147,0.693147
5,3.19974,3.575869,0.01752,0.009645,0.007379,2.296593,2.929388,0.693147,2.869238,1.804569,...,1.576885,0.693147,0.693147,0.082852,0.693147,0.693147,0.693147,0.02192,0.693147,0.693147
6,3.19974,3.575869,44.343632,39.389981,0.009247,2.296593,2.929388,0.693147,2.869238,1.804569,...,1.576885,0.693147,0.693147,0.066447,0.693147,0.693147,0.693147,0.040102,0.693147,0.693147
7,3.19974,3.575869,0.013002,0.009748,0.008249,2.296593,2.929388,0.693147,2.869238,1.804569,...,1.576885,0.693147,0.693147,243.021282,0.693147,0.693147,0.693147,0.014671,0.693147,0.693147
8,2.531922,3.575869,0.014539,0.009767,0.007498,1.902214,2.563523,0.693147,2.282474,1.289514,...,1.970627,0.693147,0.693147,0.063148,0.693147,1.125329,0.693147,0.011814,0.693147,0.693147
9,2.531922,3.056301,0.013438,0.010019,0.00848,1.902214,2.563523,0.693147,2.282474,1.289514,...,1.970627,0.693147,0.693147,0.06321,0.693147,1.125329,0.693147,0.012274,0.693147,0.693147


In [23]:
PATH = './mt_model_runs/mt_2.bin'
torch.save(mt_model.state_dict(), PATH)

In [24]:
import json

with open('losses.json', 'w') as f:
    json.dump(losses, f)


In [25]:
df_dev.to_csv('dev_losses.csv')

# senteval

In [25]:
def prepare(params, samples):
    return
def batcher(params, batch):
    sentences = [' '.join(s) for s in batch]
    batch = tokenizer(
        sentences,
        return_tensors='pt',
        padding=True,
    )
    
    for k in batch:
        batch[k] = batch[k].to(device)
    with torch.no_grad():
        sent_emb = mt_model(**batch, return_sent_emb=True)
    
    return sent_emb.cpu()

# Set params for SentEval (fastmode)
params = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5}
params['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,
                                    'tenacity': 3, 'epoch_size': 2}

task_set = 'sts'
if task_set == 'sts':
    senteval_tasks = ['STS12', 'STS13', 'STS14', 'STS15', 'STS16', 'STSBenchmark', 'SICKRelatedness']
elif task_set == 'transfer':
    senteval_tasks = ['MR', 'CR', 'MPQA', 'SUBJ', 'SST2', 'TREC', 'MRPC']
elif task_set == 'full':
    senteval_tasks = ['STS12', 'STS13', 'STS14', 'STS15', 'STS16', 'STSBenchmark', 'SICKRelatedness']
    senteval_tasks += ['MR', 'CR', 'MPQA', 'SUBJ', 'SST2', 'TREC', 'MRPC']

se = senteval.engine.SE(params, batcher, prepare)
mt_model.eval()
results = se.eval(senteval_tasks)

In [26]:
results

{'STS12': {'MSRpar': {'pearson': (0.04511842111176674, 0.21713343634765625),
   'spearman': SpearmanrResult(correlation=0.05829879048588996, pvalue=0.11065140077710642),
   'nsamples': 750},
  'MSRvid': {'pearson': (-0.04719690809332701, 0.19666754119304142),
   'spearman': SpearmanrResult(correlation=-0.05791055536831769, pvalue=0.11305028382924873),
   'nsamples': 750},
  'SMTeuroparl': {'pearson': (0.05465063166493182, 0.24259241634429948),
   'spearman': SpearmanrResult(correlation=0.04601455241561682, pvalue=0.3252817731161748),
   'nsamples': 459},
  'surprise.OnWN': {'pearson': (0.028684333440108387, 0.43280310929667104),
   'spearman': SpearmanrResult(correlation=0.0486239359795535, pvalue=0.183456625977699),
   'nsamples': 750},
  'surprise.SMTnews': {'pearson': (0.004681029491783591, 0.9257362156479326),
   'spearman': SpearmanrResult(correlation=-0.013892916608515183, pvalue=0.7820461848374392),
   'nsamples': 399},
  'all': {'pearson': {'all': 0.014868264700974198,
    'mea