In [3]:
from segmenters.structure import StructuredCorpus
import segmenters.iterator as it
from gensim.models import TfidfModel, LsiModel, LdaMulticore, LdaModel
from gensim.corpora import Dictionary, MmCorpus
from gensim.models.word2vec import LineSentence
from gensim.models import KeyedVectors

import os
from segmenters.structure import StructuredCorpus
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn import svm
from sklearn import tree
from sklearn import ensemble
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, f1_score, accuracy_score

import torch
import torch.nn as nn
from torch import Tensor as T
import torch.nn.functional as F

import importlib
import numpy as np
import os
import module
import itertools

In [6]:
dirs = [
    'swda_mfs_100w',
]

dirname = dirs[0]

ctrain = StructuredCorpus.load(f'{dirname}/corpus/train')
ctest = StructuredCorpus.load(f'{dirname}/corpus/test')
cdev = StructuredCorpus.load(f'{dirname}/corpus/dev')

targets_train = [da[1][0] for _, da in ctrain[['default', 'act_tag']]]
targets_dev = [da[1][0] for _, da in cdev[['default', 'act_tag']]]
targets_test = [da[1][0] for _, da in ctest[['default', 'act_tag']]]

targets_counts = dict(zip(*np.unique(targets_test, return_counts=True)))    
targets_counts = sorted(targets_counts.items(), key=lambda x: x[1], reverse=True)
def simplify_target(tg, most_common=n_labels-1, unk_label='?'):
    labels = [lab for lab, count in targets_counts[:most_common]]
    return tg[0] if tg[0] in labels else unk_label

simple_targets_train = [simplify_target(t) for t in targets_train]
simple_targets_dev = [simplify_target(t) for t in targets_dev]
simple_targets_test = [simplify_target(t) for t in targets_test]

simple_counts_train = list(zip(*np.unique(simple_targets_train, return_counts=True)))
simple_counts_train = sorted(simple_counts_train, key=lambda x: x[1], reverse=True)
simple_counts_dev = dict(zip(*np.unique(simple_targets_dev, return_counts=True)))
simple_counts_test = dict(zip(*np.unique(simple_targets_test, return_counts=True)))
freq_table = []
for label, count in simple_counts_train: 
    freq_table.append({'label': label, 'train': count, 'dev': simple_counts_dev[label], 'test': simple_counts_test[label]})
freq_table = pd.DataFrame(freq_table)
freq_table['train%'] = (freq_table['train'] / freq_table['train'].sum())*100
freq_table['dev%'] = (freq_table['dev'] / freq_table['dev'].sum())*100
freq_table['test%'] = (freq_table['test'] / freq_table['test'].sum())*100
freq_table = freq_table.round(2)
print(freq_table)
freq_table.to_csv(f'{dirname}/label_freq.csv', sep='\t')

w2v_models=[
    '../models-gensim/glove-twitter-25',
    #'../models-gensim/glove-wiki-gigaword-100',
]

n_labels=5
n_samples=5000
classifiers = {
    #'lr': lambda: LogisticRegression(random_state=1, max_iter=2000),
    'lrcv10': lambda: LogisticRegressionCV(random_state=1, max_iter=10000, cv=10),
    #'svm_rbf': lambda: svm.SVC(kernel='rbf'),
    #'tree5': lambda: tree.DecisionTreeClassifier(random_state=1, max_depth=5),
    'tree10': lambda: tree.DecisionTreeClassifier(random_state=1, max_depth=10),
    #'tree20': lambda: tree.DecisionTreeClassifier(random_state=1, max_depth=20),
    #'forest5': lambda: ensemble.RandomForestClassifier(random_state=1, max_depth=5),
    #'forest10': lambda: ensemble.RandomForestClassifier(random_state=1, max_depth=10),
    #'forest15': lambda: ensemble.RandomForestClassifier(random_state=1, max_depth=15),
    #'knn10': lambda: KNeighborsClassifier(n_neighbors=10),
    #'knn20': lambda: KNeighborsClassifier(n_neighbors=20),
    #'knn30': lambda: KNeighborsClassifier(n_neighbors=30),
    #'knn40': lambda: KNeighborsClassifier(n_neighbors=40),
}

  label  train    dev   test  train%   dev%  test%
0     s  87748  10656  10808   46.28  44.96  45.60
1     b  39901   5415   5933   21.04  22.85  25.03
2     ?  33198   4065   3140   17.51  17.15  13.25
3     +  15184   2184   2063    8.01   9.21   8.70
4     %  13589   1382   1759    7.17   5.83   7.42


In [45]:
def gensim_corpus_to_numpy(corpus):
    x = []
    for res in corpus:
        vec = np.zeros(num_topics)
        for i, val in res: vec[i] = val
        x.append(list(vec))
    x = np.array(x)
    return x

def corpus2wv(corpus, w2v, agg=False, dim=10, max_words=20):
    x = []
    for seq, labs in corpus[['default', 'act_tag']]:
        seq = seq[:max_words]
        seq_w2v = [w2v.key_to_index.get(ctest.idx_to_word[i], 0) for i in seq]
        _x = np.zeros((max_words, dim))
        for i, vec in enumerate(w2v[seq_w2v]):
            _x[i] = vec[:dim]
        if agg: 
            _x = _x.mean(0)
        else: 
            _x = _x.flatten()
        x.append(_x)
    x = np.array(x)
    return x 

dictionary = Dictionary()
dictionary.id2token = ctest.idx_to_word
dictionary.token2id = ctest.word_to_idx

w2v = KeyedVectors.load(w2v_models[0])

result_table = []
for dim in [2, 4, 8, 16, 25]:
    for agg in [True, False]: 
        #x_train = corpus2wv(ctrain, w2v, agg)
        x_dev = corpus2wv(cdev, w2v, agg, dim)
        x_test = corpus2wv(ctest, w2v, agg, dim)
        result = {'dim': dim, 'agg': agg}        
        for clf_name, clf_init in classifiers.items():
            clf = clf_init()
            clf.fit(x_dev[:n_samples], simple_targets_dev[:n_samples])
            preds = clf.predict(x_test)
            acc = accuracy_score(simple_targets_test, preds)
            f1_micro = f1_score(simple_targets_test, preds, average='micro')
            f1_macro = f1_score(simple_targets_test, preds, average='macro')
            #print(f'{clf_name}\tacc={acc}\tf1_micro={f1_micro}\tf1_macro={f1_macro}')
            result[f'{clf_name}_acc'] = acc
            #result[f'{clf_name}_f1ma'] = f1_macro
            #result[f'{clf_name}_f1mi'] = f1_micro
        result_table.append(result)

result_table = pd.DataFrame(result_table)
result_table = result_table.round(2)
result_table

  label  train    dev   test  train%   dev%  test%
0     s  87748  10656  10808   46.28  44.96  45.60
1     b  39901   5415   5933   21.04  22.85  25.03
2     ?  33198   4065   3140   17.51  17.15  13.25
3     +  15184   2184   2063    8.01   9.21   8.70
4     %  13589   1382   1759    7.17   5.83   7.42


In [46]:
result_table

Unnamed: 0,dim,agg,lr_acc,lrcv_acc,tree10_acc,knn20_acc
0,2,True,0.45,0.6,0.55,0.6
1,2,False,0.6,0.63,0.64,0.45
2,4,True,0.54,0.58,0.54,0.59
3,4,False,0.59,0.64,0.63,0.33
4,8,True,0.64,0.64,0.57,0.63
5,8,False,0.61,0.65,0.63,0.46
6,16,True,0.66,0.66,0.61,0.64
7,16,False,0.65,0.67,0.64,0.64
8,25,True,0.66,0.67,0.59,0.65
9,25,False,0.66,0.67,0.64,0.64


In [6]:
### Form vectors

In [18]:
emb = module.init_embedding(nin=len(ctest), nout=4)
word_vectors = emb.weight.detach().numpy()
np.unique(word_vectors.flatten())

array([-5., -4., -3., -2., -1.,  0.,  1.,  2.,  3.,  4.,  5.],
      dtype=float32)

In [51]:
def corpus2emb(corpus, word_vectors, agg, dim, max_words=20):
    x = []
    for seq, labs in corpus[['default', 'act_tag']]:
        seq = seq[:max_words]
        _x = np.zeros((max_words, dim))
        _x[:len(seq)] = word_vectors[seq][:, :dim]
        if agg: 
            _x = _x.mean(0)
        else: 
            _x = _x.flatten()
        x.append(_x)
    x = np.array(x)
    return x 

result_table = []
for dim in [2, 4, 8]:
    for agg in [True, False]: 
        emb = module.init_embedding(nin=len(ctest), nout=dim)
        word_vectors = emb.weight.detach().numpy()

        x_dev = corpus2emb(cdev, word_vectors, agg, dim)
        x_test = corpus2emb(ctest, word_vectors, agg, dim)

        result = {'dim':dim ,'agg':agg}
        for clf_name, clf_init in classifiers.items():
            clf = clf_init()
            clf.fit(x_dev[:n_samples], simple_targets_dev[:n_samples])
            preds = clf.predict(x_test)
            acc = accuracy_score(simple_targets_test, preds)
            f1_micro = f1_score(simple_targets_test, preds, average='micro')
            f1_macro = f1_score(simple_targets_test, preds, average='macro')
            #print(f'{clf_name}\tacc={acc}\tf1_micro={f1_micro}\tf1_macro={f1_macro}')
            result[f'{clf_name}_acc'] = acc
            #result[f'{clf_name}_f1ma'] = f1_macro
            #result[f'{clf_name}_f1mi'] = f1_micro
        result_table.append(result)

result_table = pd.DataFrame(result_table)
result_table = result_table.round(2)
result_table

Unnamed: 0,dim,agg,lrcv10_acc,tree10_acc
0,2,True,0.46,0.6
1,2,False,0.61,0.68
2,4,True,0.48,0.59
3,4,False,0.61,0.66
4,8,True,0.56,0.61
5,8,False,0.65,0.66


### Skipgram, LSTM

In [7]:
segmentation_train = list(ctrain.derive_segment_boundaries('conversation_no', 'default'))
segmentation_dev = list(cdev.derive_segment_boundaries('conversation_no', 'default'))

In [13]:
def cut_and_pad_sequence(seq, max_words=20):
    seq = seq[:max_words]
    res = np.repeat(ctest.word_to_idx['<PAD>'], max_words)
    res[:len(seq)] = seq
    return res

idx_train = np.array([cut_and_pad_sequence(seq) for seq in ctrain.sequences])
idx_dev = np.array([cut_and_pad_sequence(seq) for seq in cdev.sequences])
idx_test = np.array([cut_and_pad_sequence(seq) for seq in ctest.sequences])

In [42]:
def make_skipgrams(x: np.array, segmentation:list, context_size:int, limit=10000000):
    seg_simple = np.array([-1] + segmentation + [x.shape[0]-1]) + 1
    ct = 0
    for left, right in zip(seg_simple[:-1], seg_simple[1:]): 
        for pivot in range(left+context_size, right-context_size):
            for i in range(1, context_size+1):
                if ct >= limit: 
                    return
                yield (x[pivot], x[pivot+i])
                yield (x[pivot], x[pivot-i])
                ct+=2

context_size=2
batch_size=32

batches_train = it.RestartableCallableIterator(make_skipgrams, fn_args=[idx_train, segmentation_train, context_size])
batches_train = it.RestartableBatchIterator(batches_train, batch_size=batch_size)
batches_dev = it.RestartableBatchIterator(batches_dev, batch_size=batch_size)
batches_dev = it.RestartableCallableIterator(make_skipgrams, fn_args=[idx_dev, segmentation_dev, context_size, 32*100])
batches_dev = it.RestartableBatchIterator(batches_dev, batch_size=batch_size)
print(*zip(*next(iter(batches_dev))))

(array([   16,     0, 13453, 13453, 13453, 13453, 13453, 13453, 13453,
       13453, 13453, 13453, 13453, 13453, 13453, 13453, 13453, 13453,
       13453, 13453]), array([   16,     0, 13453, 13453, 13453, 13453, 13453, 13453, 13453,
       13453, 13453, 13453, 13453, 13453, 13453, 13453, 13453, 13453,
       13453, 13453]), array([   16,     0, 13453, 13453, 13453, 13453, 13453, 13453, 13453,
       13453, 13453, 13453, 13453, 13453, 13453, 13453, 13453, 13453,
       13453, 13453]), array([   16,     0, 13453, 13453, 13453, 13453, 13453, 13453, 13453,
       13453, 13453, 13453, 13453, 13453, 13453, 13453, 13453, 13453,
       13453, 13453]), array([    6,     0,    12,     0, 13453, 13453, 13453, 13453, 13453,
       13453, 13453, 13453, 13453, 13453, 13453, 13453, 13453, 13453,
       13453, 13453]), array([    6,     0,    12,     0, 13453, 13453, 13453, 13453, 13453,
       13453, 13453, 13453, 13453, 13453, 13453, 13453, 13453, 13453,
       13453, 13453]), array([    6,     0, 

In [43]:
class SequenceSG(module.ReconstructionModel):
    def __init__(self, dim_token, dim_latent, hyperparams={}, emb_layer=None):
        super().__init__()
        self.emb_layer = emb_layer
        self.encoder = module.LSTMEncoder(dim_token, dim_latent, **hyperparams)
        decoder_lstm = module.LSTMEncoder(dim_token+dim_latent, dim_token, **hyperparams)
        self.decoder = module.TurboSequencer(decoder_lstm, dim_token, dim_latent)

    def forward(self, batch):
        pivot, neighbor = zip(*batch)
        if self.emb_layer is None: 
            pivot = T(pivot).float()
            neighbor = T(neighbor).float()
        else: 
            pivot = T(pivot).long()
            neighbor = T(neighbor).long()
            pivot = self.emb_layer(pivot)
            neighbor = self.emb_layer(neighbor)
        pivot = pivot.transpose(0, 1).contiguous()
        neighbor = neighbor.transpose(0, 1).contiguous()
        lat = self.encoder(pivot)
        neighbor_hat = self.decoder.decode(
            x_static=lat, 
            teacher_force_y=neighbor,
            #decoding_steps=
            )
        return neighbor_hat, neighbor
    
    def loss_fn(self, neighbor_hat, neighbor):
        return F.mse_loss(neighbor_hat, neighbor, reduction='mean')

dim_latent = 8
dim_token = 8
emb_layer = module.init_embedding(len(ctest), dim_token)
hyperparams = {
    'bidirectional': True, 
    'hidden_size': 64,
    'num_layers': 2,
    'dropout': .1,
}
model = SequenceSG(dim_token, dim_latent, hyperparams, emb_layer)

In [44]:
model.train_batches(
    batches_train=batches_train,
    batches_test=batches_dev,
    epochs=-1, 
    lr=3e-3, 
    print_every=10,
    test_every=30,
    patience=20,
    min_improvement=.0
)

using a test set of 100 batches (BS=32)
Epoch 00	Batch 10	0.1601s/batch	train_loss = 12.4807
Epoch 00	Batch 20	0.1565s/batch	train_loss = 10.3378
Epoch 00	Batch 30	0.1526s/batch	train_loss = 9.2731
Epoch 00	Batch 30	test_loss = 7.2454
new best loss (inf -> 7.245416)
Epoch 00	Batch 40	0.1522s/batch	train_loss = 7.7006
Epoch 00	Batch 50	0.1541s/batch	train_loss = 7.9230
Epoch 00	Batch 60	0.1630s/batch	train_loss = 8.2300
Epoch 00	Batch 60	test_loss = 6.6803
new best loss (7.245416 -> 6.680335)
Epoch 00	Batch 70	0.1520s/batch	train_loss = 8.0776
Epoch 00	Batch 80	0.1514s/batch	train_loss = 6.8082
Epoch 00	Batch 90	0.1514s/batch	train_loss = 7.3202
Epoch 00	Batch 90	test_loss = 6.4707
new best loss (6.680335 -> 6.470734)
Epoch 00	Batch 100	0.1515s/batch	train_loss = 8.4447
Epoch 00	Batch 110	0.1503s/batch	train_loss = 7.3512
Epoch 00	Batch 120	0.1498s/batch	train_loss = 7.3515
Epoch 00	Batch 120	test_loss = 6.3187
new best loss (6.470734 -> 6.318651)
Epoch 00	Batch 130	0.1491s/batch	train_

([12.480655002593995,
  10.337840366363526,
  9.27308497428894,
  7.700550651550293,
  7.923036003112793,
  8.229965114593506,
  8.077570152282714,
  6.80824384689331,
  7.3201847791671755,
  8.444734334945679,
  7.351213073730468,
  7.3514951229095455,
  5.748010849952697,
  6.407313013076783,
  7.217664480209351,
  8.230033826828002,
  6.200826740264892,
  7.017363166809082,
  5.73690938949585,
  5.752015089988708,
  5.38360013961792,
  7.702769136428833,
  6.270129585266114,
  5.957297468185425,
  5.78797287940979,
  6.573450517654419,
  6.6606817722320555,
  5.652288770675659,
  6.038947057723999,
  6.832077836990356,
  6.597845268249512,
  6.2784895420074465,
  7.243807315826416,
  6.0229291915893555,
  6.245186185836792,
  5.169253802299499,
  6.317964649200439,
  4.909299206733704,
  5.594918990135193,
  6.524342489242554,
  7.971635246276856,
  8.263182353973388,
  5.837405157089234,
  6.134846305847168,
  6.8851581573486325,
  5.681737208366394,
  6.365463542938232,
  4.946884

In [45]:
batches_test = it.RestartableBatchIterator(list(idx_test), batch_size*4)
batches_test = it.RestartableMapIterator(batches_test, lambda batch: T(batch).long().transpose(0, 1))

embs_test=[]
for batch in batches_test: 
    batch = emb_layer(batch)
    with torch.no_grad(): 
        model.eval()
        batch = model.encoder(batch)
    embs_test.append(batch)
embs_test = torch.vstack(embs_test)
embs_test.shape, len(simple_targets_test)

batches_dev = it.RestartableBatchIterator(list(idx_dev), batch_size*4)
batches_dev = it.RestartableMapIterator(batches_dev, lambda batch: T(batch).long().transpose(0, 1))

embs_dev=[]
for batch in batches_dev: 
    batch = emb_layer(batch)
    with torch.no_grad(): 
        model.eval()
        batch = model.encoder(batch)
    embs_dev.append(batch)
embs_dev = torch.vstack(embs_dev)
embs_dev.shape, len(simple_targets_dev)

(torch.Size([23702, 8]), 23702)

In [51]:
result={}
for clf_name, clf_init in classifiers.items():
    clf = clf_init()
    clf.fit(embs_dev[:n_samples], simple_targets_dev[:n_samples])
    preds = clf.predict(embs_test)
    acc = accuracy_score(simple_targets_test, preds)
    f1_micro = f1_score(simple_targets_test, preds, average='micro')
    f1_macro = f1_score(simple_targets_test, preds, average='macro')
    #print(f'{clf_name}\tacc={acc}\tf1_micro={f1_micro}\tf1_macro={f1_macro}')
    result[f'{clf_name}_acc'] = acc
    #result[f'{clf_name}_f1ma'] = f1_macro
    #result[f'{clf_name}_f1mi'] = f1_micro
result

{'lrcv10_acc': 0.6535037758933468, 'tree10_acc': 0.674851284647513}

### now autoencoding

In [66]:
batch_size=32
duplicate = lambda x: (x, x)
batches_train = it.RestartableMapIterator(idx_train, duplicate)
batches_train = it.RestartableBatchIterator(batches_train, batch_size=batch_size)

batches_dev = it.RestartableMapIterator(idx_dev[:2000], duplicate)
batches_dev = it.RestartableBatchIterator(batches_dev, batch_size=batch_size*4)
#print(*zip(*next(iter(batches_dev))))

In [67]:
dim_latent = 8
dim_token = 8
emb_layer = module.init_embedding(len(ctest), dim_token)
hyperparams = {
    'bidirectional': True, 
    'hidden_size': 64,
    'num_layers': 2,
    'dropout': .1,
}
model_ae = SequenceSG(dim_token, dim_latent, hyperparams, emb_layer)

In [68]:
model_ae.train_batches(
    batches_train=batches_train,
    batches_test=batches_dev,
    epochs=-1, 
    lr=3e-3, 
    print_every=10,
    test_every=30,
    patience=20,
    min_improvement=.0
)

using a test set of 16 batches (BS=128)
Epoch 00	Batch 10	0.1582s/batch	train_loss = 10.9084
Epoch 00	Batch 20	0.1540s/batch	train_loss = 7.9610
Epoch 00	Batch 30	0.1561s/batch	train_loss = 7.4731
Epoch 00	Batch 30	test_loss = 6.2067
new best loss (inf -> 6.206695)
Epoch 00	Batch 40	0.1537s/batch	train_loss = 6.4377
Epoch 00	Batch 50	0.1543s/batch	train_loss = 5.8636
Epoch 00	Batch 60	0.1522s/batch	train_loss = 5.6789
Epoch 00	Batch 60	test_loss = 5.8271
new best loss (6.206695 -> 5.827077)
Epoch 00	Batch 70	0.1526s/batch	train_loss = 5.7004
Epoch 00	Batch 80	0.1533s/batch	train_loss = 5.6049
Epoch 00	Batch 90	0.1550s/batch	train_loss = 5.6420
Epoch 00	Batch 90	test_loss = 5.7064
new best loss (5.827077 -> 5.706404)
Epoch 00	Batch 100	0.1528s/batch	train_loss = 5.0152
Epoch 00	Batch 110	0.1555s/batch	train_loss = 6.4473
Epoch 00	Batch 120	0.1534s/batch	train_loss = 5.4879
Epoch 00	Batch 120	test_loss = 5.6296
new best loss (5.706404 -> 5.629607)
Epoch 00	Batch 130	0.1538s/batch	train_l

([10.908362197875977,
  7.960996103286743,
  7.473121881484985,
  6.43770432472229,
  5.8635783195495605,
  5.67886176109314,
  5.700396394729614,
  5.604948711395264,
  5.64202241897583,
  5.015197682380676,
  6.447308301925659,
  5.487941932678223,
  4.994870853424072,
  5.2878772735595705,
  5.577517032623291,
  5.978104734420777,
  5.560517168045044,
  4.919181227684021,
  5.108117723464966,
  6.589050149917602,
  5.573911523818969,
  4.809597206115723,
  5.549636125564575,
  5.71639997959137,
  4.929488158226013,
  4.116895723342895,
  5.124610590934753,
  5.227909851074219,
  4.887757086753846,
  5.389058804512024,
  5.828646898269653,
  4.889252758026123,
  4.511975026130676,
  5.610156583786011,
  5.362890672683716,
  4.957969427108765,
  6.647210216522216,
  5.781266880035401,
  4.842542123794556,
  5.532192373275757,
  5.650252103805542,
  5.113083553314209,
  4.639243340492248,
  5.073835611343384,
  4.904958629608155,
  4.416230010986328,
  5.251512670516968,
  4.9937451839

In [69]:
batches_test = it.RestartableBatchIterator(list(idx_test), batch_size*4)
batches_test = it.RestartableMapIterator(batches_test, lambda batch: T(batch).long().transpose(0, 1))

embs_test=[]
for batch in batches_test: 
    batch = emb_layer(batch)
    with torch.no_grad(): 
        model_ae.eval()
        batch = model_ae.encoder(batch)
    embs_test.append(batch)
embs_test = torch.vstack(embs_test)
embs_test.shape, len(simple_targets_test)

batches_dev = it.RestartableBatchIterator(list(idx_dev), batch_size*4)
batches_dev = it.RestartableMapIterator(batches_dev, lambda batch: T(batch).long().transpose(0, 1))

embs_dev=[]
for batch in batches_dev: 
    batch = emb_layer(batch)
    with torch.no_grad(): 
        model_ae.eval()
        batch = model_ae.encoder(batch)
    embs_dev.append(batch)
embs_dev = torch.vstack(embs_dev)
embs_dev.shape, len(simple_targets_dev)

result={}
for clf_name, clf_init in classifiers.items():
    clf = clf_init()
    clf.fit(embs_dev[:n_samples], simple_targets_dev[:n_samples])
    preds = clf.predict(embs_test)
    acc = accuracy_score(simple_targets_test, preds)
    f1_micro = f1_score(simple_targets_test, preds, average='micro')
    f1_macro = f1_score(simple_targets_test, preds, average='macro')
    #print(f'{clf_name}\tacc={acc}\tf1_micro={f1_micro}\tf1_macro={f1_macro}')
    result[f'{clf_name}_acc'] = acc
    #result[f'{clf_name}_f1ma'] = f1_macro
    #result[f'{clf_name}_f1mi'] = f1_micro
result

{'lrcv10_acc': 0.6932877694806564, 'tree10_acc': 0.6740496983504197}