In [1]:
# IMPORTS

import os
import warnings
from transformers.utils import logging as hf_logging

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings(
    "ignore",
    category=FutureWarning,
    message=r".*torch\.utils\._pytree\._register_pytree_node.*"
)
os.environ["PYTHONWARNINGS"] = "ignore"
hf_logging.set_verbosity_error()

import glob
import pandas as pd
import numpy as np
import librosa
import librosa.effects
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification, get_linear_schedule_with_warmup, AutoProcessor
from sklearn.metrics import accuracy_score, f1_score, classification_report, roc_auc_score
from collections import defaultdict
import copy
from scipy.signal import resample
import random
from audiomentations import Compose, AddGaussianNoise, PitchShift, TimeStretch, Gain, HighPassFilter
import noisereduce as nr

In [2]:
# PARAMETERS

CHUNK_DURATION = 30
CHUNK_STRIDE = 0.5
SAMPLE_RATE = 16000
BATCH_SIZE = 4
NUM_EPOCHS = 60
LEARNING_RATE = 1e-6
PATIENCE = 30
MIN_DELTA = 0.001
N_TRAIN_CHUNKS = 10
CLASS_WEIGHTS = [1.0, 2.7]
DROPOUT=0.2

In [3]:
# Seeder

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [4]:
# Load CSVs and build label dicts

train_file = pd.read_csv('00_dataset_daicwoz/train_split_Depression_AVEC2017.csv')
val_file = pd.read_csv('00_dataset_daicwoz/dev_split_Depression_AVEC2017.csv')
test_file = pd.read_csv('00_dataset_daicwoz/full_test_split.csv')

train_label_dict = train_file.set_index('Participant_ID')['PHQ8_Binary'].to_dict()
val_label_dict = val_file.set_index('Participant_ID')['PHQ8_Binary'].to_dict()
test_label_dict = test_file.set_index('Participant_ID')['PHQ_Binary'].to_dict()

print("Train label distribution:\n", pd.Series(list(train_label_dict.values())).value_counts().to_string(index=True))
print("\nVal label distribution:\n", pd.Series(list(val_label_dict.values())).value_counts().to_string(index=True))
print("\nTest label distribution:\n", pd.Series(list(test_label_dict.values())).value_counts().to_string(index=True))

Train label distribution:
 0    77
1    30

Val label distribution:
 0    23
1    12

Test label distribution:
 0    33
1    14


In [5]:
# Match audio files to train/val splits

all_files = glob.glob(os.path.join("01_audio", '*'))
train_files, val_files, test_files = [], [], []

for file in all_files:
    basename = os.path.basename(file)
    participant_id_str = basename.split('_')[0]
    try:
        participant_id = int(participant_id_str)
    except ValueError:
        continue
    if participant_id in train_label_dict:
        train_files.append(file)
    elif participant_id in val_label_dict:
        val_files.append(file)
    elif participant_id in test_label_dict:
        test_files.append(file)

print(f"Train files: {len(train_files)}, Val files: {len(val_files)}, Test files: {len(test_files)}")

Train files: 106, Val files: 35, Test files: 46


In [6]:
# Dataset class with chunk diversity and augmentation

class AudioChunkDataset(Dataset):
    def __init__(self, file_list, label_dict, chunk_duration, sample_rate, mode='train', n_chunks=1):
        self.file_list = file_list
        self.label_dict = label_dict
        self.chunk_duration = chunk_duration
        self.sample_rate = sample_rate
        self.mode = mode
        self.n_chunks = n_chunks
        self.index_map = []

        chunk_size = int(chunk_duration * sample_rate)
        stride = int(chunk_size * CHUNK_STRIDE)
        
        if self.mode == 'train':
            self.augmenter = Compose([
                AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.005, p=0.2),
                Gain(min_gain_db=-5.0, max_gain_db=10.0, p=0.3),
                PitchShift(min_semitones=-1, max_semitones=1, p=0.3),
                TimeStretch(min_rate=0.90, max_rate=1.1, p=0.3),
                HighPassFilter(min_cutoff_freq=100, max_cutoff_freq=300, p=0.4),
            ])

        for file_path in self.file_list:
            y, sr = librosa.load(file_path, sr=sample_rate)
            total_samples = len(y)

            if self.mode == 'train':
                for _ in range(n_chunks):
                    self.index_map.append((file_path, None))
            else:
                starts = list(range(0, total_samples - chunk_size + 1, stride))
                if not starts:
                    starts = [0]
                for i, _ in enumerate(starts):
                    self.index_map.append((file_path, i))

    def __len__(self):
        return len(self.index_map)

    def __getitem__(self, idx):
        file_path, chunk_idx = self.index_map[idx]
        participant_id = int(os.path.basename(file_path).split('_')[0])
        label = self.label_dict[participant_id]

        y, sr = librosa.load(file_path, sr=self.sample_rate)
        chunk_samples = int(self.chunk_duration * sr)

        if len(y) < chunk_samples:
            y = np.pad(y, (0, chunk_samples - len(y)))
            start = 0
        else:
            if self.mode == 'train':
                max_start = len(y) - chunk_samples
                start = np.random.randint(0, max_start + 1)
            else:
                stride = int(chunk_samples * CHUNK_STRIDE)
                start = chunk_idx * stride
                start = min(start, len(y) - chunk_samples)

        y = y[start:start + chunk_samples]
        if self.mode != 'train':
            y = nr.reduce_noise(y=y, sr=self.sample_rate)
        y = self.normalize_volume(y)

        if self.mode == 'train':
            y = self.augmenter(samples=y, sample_rate=self.sample_rate)

        return torch.tensor(y, dtype=torch.float32), label, participant_id
    
    def normalize_volume(self, y, target_dBFS=-20):
        rms = np.sqrt(np.mean(y**2))
        scalar = 10 ** (target_dBFS / 20) / (rms + 1e-6)
        y = y * scalar
        return np.clip(y, -1.0, 1.0)

In [7]:
# Model class for inference

class Wav2Vec2Classifier(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.model = Wav2Vec2ForSequenceClassification.from_pretrained(
            "facebook/wav2vec2-base",
            num_labels=2,
            problem_type="single_label_classification",
            classifier_dropout=DROPOUT,
            ignore_mismatched_sizes=True
        )

    def forward(self, input_values, attention_mask=None):
        return self.model(input_values=input_values, attention_mask=attention_mask).logits

In [8]:
# Collate function

feature_extractor = AutoProcessor.from_pretrained("facebook/wav2vec2-base")

def collate_fn(batch):
    audios, labels, participant_ids = zip(*batch)
    audios = [a.numpy() for a in audios]
    inputs = feature_extractor(
        audios,
        sampling_rate=SAMPLE_RATE,
        padding=True,
        return_attention_mask=True,
        return_tensors="pt"
    )
    labels = torch.tensor(labels, dtype=torch.long)
    participant_ids = torch.tensor(participant_ids, dtype=torch.long)
    return {**inputs, "labels": labels, "participant_ids": participant_ids}

In [9]:
# DataLoaders

train_dataset = AudioChunkDataset(train_files, train_label_dict, CHUNK_DURATION, SAMPLE_RATE, mode='train', n_chunks=N_TRAIN_CHUNKS)
val_dataset = AudioChunkDataset(val_files, val_label_dict, CHUNK_DURATION, SAMPLE_RATE, mode='val')
test_dataset = AudioChunkDataset(test_files, test_label_dict, CHUNK_DURATION, SAMPLE_RATE, mode='test')

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, collate_fn=collate_fn)

In [10]:
# Early Stopping

class EarlyStopping:
    def __init__(self, patience=PATIENCE, min_delta=MIN_DELTA):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_value = None
        self.early_stop = False

    def __call__(self, value):
        if self.best_value is None:
            self.best_value = value
        elif self.best_value - value > self.min_delta:
            self.best_value = value
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

In [11]:
# Model

model = Wav2Vec2ForSequenceClassification.from_pretrained(
    "facebook/wav2vec2-base",
    num_labels=2,
    problem_type="single_label_classification",
    ignore_mismatched_sizes=True
);
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"));

In [12]:
# Values

class_weights = torch.tensor(CLASS_WEIGHTS, dtype=torch.float32).to(model.device)
loss_fn = torch.nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=0.01)
total_steps = len(train_loader) * NUM_EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

In [13]:
# Check chunk count

def count_chunks(file_path, chunk_duration, sample_rate, stride_fraction):
    y, sr = librosa.load(file_path, sr=sample_rate)
    total_samples = len(y)
    chunk_samples = int(chunk_duration * sample_rate)
    stride_samples = int(chunk_samples * stride_fraction)

    if total_samples < chunk_samples:
        return 1
    
    return 1 + (total_samples - chunk_samples) // stride_samples

chunk_counts = [
    count_chunks(f, CHUNK_DURATION, SAMPLE_RATE, CHUNK_STRIDE)
    for f in tqdm(train_files, desc="Calculate chunk count")
]

print(f"Min chunk count: {min(chunk_counts)}")
print(f"Max chunk count: {max(chunk_counts)}")
print(f"average chunk count: {sum(chunk_counts) / len(chunk_counts):.2f}")

Calculate chunk count: 100%|██████████████████████████████████████████████████████████| 106/106 [00:18<00:00,  5.65it/s]

Min chunk count: 26
Max chunk count: 130
average chunk count: 58.78





In [13]:
# Best Values

best_model_state = None
best_macro_f1 = 0
best_epoch = 0
best_val_probs = None
best_val_labels = None
best_val_part_ids = None
best_val_auroc = None
best_threshold = 0.5

filename_best = "01_best_audio_model.pth"
try:    
    checkpoint = torch.load(filename_best, map_location="cuda", weights_only=False)

    model.load_state_dict(checkpoint['model_state_dict'])

    best_model_state = checkpoint['model_state_dict']
    best_macro_f1 = checkpoint['best_macro_f1']
    best_threshold = checkpoint['best_threshold']
    best_epoch = checkpoint['best_epoch']
    best_val_probs = checkpoint['best_val_probs']
    best_val_labels = checkpoint['best_val_labels']
    best_val_part_ids = checkpoint['best_val_part_ids']
    best_val_auroc = checkpoint['best_val_auroc']
    
    print(f"Best Model loaded from: {filename_best}")
except FileNotFoundError:
    print(f"Warning: {filename_best} not found. Starting without preloaded model.")

Best Model loaded from: 01_best_audio_model.pth


In [14]:
# Training code

early_stopping = EarlyStopping()

for epoch in range(NUM_EPOCHS):
    # Training
    model.train()
    train_loss = 0.0
    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        batch = {k: v.to(model.device) if isinstance(v, torch.Tensor) else v for k, v in batch.items()}
        outputs = model(
            input_values=batch["input_values"],
            attention_mask=batch["attention_mask"],
            labels=None
        )
        loss = loss_fn(outputs.logits, batch["labels"])
        train_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
    avg_train_loss = train_loss / len(train_loader)
    print(f"Epoch {epoch+1} train loss: {avg_train_loss:.4f}")

    # Validation
    model.eval()
    val_loss = 0.0
    all_probs, all_preds, all_labels, all_part_ids = [], [], [], []
    with torch.no_grad():
        for batch in val_loader:
            batch = {k: v.to(model.device) if isinstance(v, torch.Tensor) else v for k, v in batch.items()}
            outputs = model(
                input_values=batch["input_values"],
                attention_mask=batch["attention_mask"],
                labels=None
            )
            loss = loss_fn(outputs.logits, batch["labels"])
            val_loss += loss.item()
            logits = outputs.logits
            probs = torch.softmax(logits, dim=-1)[:, 1].cpu().numpy()
            preds = torch.argmax(logits, dim=-1).cpu().numpy()
            labels = batch["labels"].cpu().numpy()
            part_ids = batch["participant_ids"].cpu().numpy()
            all_probs.extend(probs)
            all_preds.extend(preds)
            all_labels.extend(labels)
            all_part_ids.extend(part_ids)
    avg_val_loss = val_loss / len(val_loader)

    try:
        auroc = roc_auc_score(all_labels, all_probs)
    except ValueError:
        auroc = float('nan')
    print(f"Validation AUROC: {auroc:.4f}")

    # Checking for best threshold
    best_macro_f1_epoch = 0
    best_thresh_epoch = 0.5
    for thresh in np.arange(0.2, 0.81, 0.01):
        tuned_preds = (np.array(all_probs) >= thresh).astype(int)
        macro_f1 = f1_score(all_labels, tuned_preds, average='macro')
        if macro_f1 > best_macro_f1_epoch:
            best_macro_f1_epoch = macro_f1
            best_thresh_epoch = thresh

    tuned_preds = (np.array(all_probs) >= best_thresh_epoch).astype(int)
    acc = accuracy_score(all_labels, tuned_preds)
    macro_f1 = f1_score(all_labels, tuned_preds, average='macro')
    print(f"Validation loss: {avg_val_loss:.4f} (Macro F1={best_macro_f1_epoch:.4f}, threshold={best_thresh_epoch:.2f})")
    print("Validation classification report:\n", classification_report(all_labels, tuned_preds, digits=4))

    # Checking if its the new best Model
    if best_macro_f1_epoch > best_macro_f1:
        best_model_state = copy.deepcopy(model.state_dict())
        
        best_macro_f1 = best_macro_f1_epoch
        best_threshold = best_thresh_epoch
        best_epoch = epoch + 1
        best_val_probs = np.array(all_probs)
        best_val_labels = np.array(all_labels)
        best_val_part_ids = np.array(all_part_ids)
        best_val_auroc = auroc
        
        checkpoint = {
            'model_state_dict': best_model_state,
            'best_macro_f1': best_macro_f1,
            'best_threshold': best_threshold,
            'best_epoch': best_epoch,
            'best_val_probs': best_val_probs,
            'best_val_labels': best_val_labels,
            'best_val_part_ids': best_val_part_ids,
            'best_val_auroc': best_val_auroc
        }

        torch.save(checkpoint, filename_best)

        print("\nNew best Model saved!\n")

    early_stopping(avg_val_loss)
    if early_stopping.early_stop:
        print(f"Early stopping triggered at epoch {epoch+1}")
        break

Epoch 1: 100%|████████████████████████████████████████████████████████████████████████| 265/265 [02:36<00:00,  1.69it/s]

Epoch 1 train loss: 0.5574





Validation AUROC: 0.6037
Validation loss: 0.8547 (Macro F1=0.5813, threshold=0.73)
Validation classification report:
               precision    recall  f1-score   support

           0     0.6679    0.6580    0.6629      1415
           1     0.4943    0.5053    0.4997       936

    accuracy                         0.5972      2351
   macro avg     0.5811    0.5816    0.5813      2351
weighted avg     0.5987    0.5972    0.5979      2351


New best Model saved!



Epoch 2: 100%|████████████████████████████████████████████████████████████████████████| 265/265 [02:37<00:00,  1.68it/s]

Epoch 2 train loss: 0.5624





Validation AUROC: 0.5927
Validation loss: 0.9561 (Macro F1=0.5606, threshold=0.78)
Validation classification report:
               precision    recall  f1-score   support

           0     0.6470    0.6890    0.6674      1415
           1     0.4787    0.4316    0.4539       936

    accuracy                         0.5866      2351
   macro avg     0.5628    0.5603    0.5606      2351
weighted avg     0.5800    0.5866    0.5824      2351



Epoch 3: 100%|████████████████████████████████████████████████████████████████████████| 265/265 [02:40<00:00,  1.66it/s]

Epoch 3 train loss: 0.5230





Validation AUROC: 0.5778
Validation loss: 1.0334 (Macro F1=0.5447, threshold=0.81)
Validation classification report:
               precision    recall  f1-score   support

           0     0.6447    0.5746    0.6076      1415
           1     0.4477    0.5214    0.4817       936

    accuracy                         0.5534      2351
   macro avg     0.5462    0.5480    0.5447      2351
weighted avg     0.5663    0.5534    0.5575      2351



Epoch 4: 100%|████████████████████████████████████████████████████████████████████████| 265/265 [02:41<00:00,  1.65it/s]

Epoch 4 train loss: 0.5330





Validation AUROC: 0.5712
Validation loss: 1.1044 (Macro F1=0.4845, threshold=0.81)
Validation classification report:
               precision    recall  f1-score   support

           0     0.7089    0.2806    0.4020      1415
           1     0.4316    0.8259    0.5669       936

    accuracy                         0.4977      2351
   macro avg     0.5703    0.5532    0.4845      2351
weighted avg     0.5985    0.4977    0.4677      2351



Epoch 5:  43%|██████████████████████████████▉                                         | 114/265 [01:08<01:30,  1.66it/s]


KeyboardInterrupt: 

In [15]:
# Reloading Best Model

print(f"Reloading best model from epoch {best_epoch} (Macro F1={best_macro_f1:.4f}, threshold={best_threshold:.2f}) for final evaluation...")
print(f"\n[Best Model] Validation AUROC: {best_val_auroc:.4f}")

participant_pred_chunks = defaultdict(list)
participant_true_label = {}

for prob, label, part_id in zip(best_val_probs, best_val_labels, best_val_part_ids):
    participant_pred_chunks[part_id].append(prob)
    participant_true_label[part_id] = label

agg_preds, agg_labels, agg_probs = [], [], []
for part_id in participant_pred_chunks:
    chunk_probs = participant_pred_chunks[part_id]
    mean_prob = np.mean(chunk_probs)
    maj_pred = int(mean_prob >= best_threshold)
    agg_preds.append(maj_pred)
    agg_labels.append(participant_true_label[part_id])
    agg_probs.append(mean_prob)

agg_acc = accuracy_score(agg_labels, agg_preds)
agg_macro_f1 = f1_score(agg_labels, agg_preds, average='macro')
try:
    agg_auroc = roc_auc_score(agg_labels, agg_probs)
except ValueError:
    agg_auroc = float('nan')

print(f"[Best Model] Participant AUROC: {agg_auroc:.4f}")
print("\n[Best Model] Participant-level classification report:\n\n", classification_report(agg_labels, agg_preds, digits=4))

Reloading best model from epoch 1 (Macro F1=0.5813, threshold=0.73) for final evaluation...

[Best Model] Validation AUROC: 0.6037
[Best Model] Participant AUROC: 0.6848

[Best Model] Participant-level classification report:

               precision    recall  f1-score   support

           0     0.7727    0.7391    0.7556        23
           1     0.5385    0.5833    0.5600        12

    accuracy                         0.6857        35
   macro avg     0.6556    0.6612    0.6578        35
weighted avg     0.6924    0.6857    0.6885        35



In [None]:
# Reloading best model from epoch 11 (Macro F1=0.5615, threshold=0.31) for final evaluation...

# [Best Model] Validation AUROC: 0.5611
# [Best Model] Participant AUROC: 0.6667

# [Best Model] Participant-level classification report:

#                precision    recall  f1-score   support

#            0     0.8235    0.6087    0.7000        23
#            1     0.5000    0.7500    0.6000        12

#     accuracy                         0.6571        35
#    macro avg     0.6618    0.6793    0.6500        35
# weighted avg     0.7126    0.6571    0.6657        35

In [None]:
# Reloading best model from epoch 8 (Macro F1=0.5802, threshold=0.47) for final evaluation...

# [Best Model] Validation AUROC: 0.5797
# [Best Model] Participant AUROC: 0.6667

# [Best Model] Participant-level classification report:

#                precision    recall  f1-score   support

#            0     0.7826    0.7826    0.7826        23
#            1     0.5833    0.5833    0.5833        12

#     accuracy                         0.7143        35
#    macro avg     0.6830    0.6830    0.6830        35
# weighted avg     0.7143    0.7143    0.7143        35

In [16]:
print("Evaluating on test set with best model...")

model.eval()
test_probs, test_preds, test_labels, test_part_ids = [], [], [], []

with torch.no_grad():
    for batch in test_loader:
        batch = {k: v.to(model.device) if isinstance(v, torch.Tensor) else v for k, v in batch.items()}
        outputs = model(input_values=batch["input_values"], attention_mask=batch["attention_mask"])
        logits = outputs.logits
        probs = torch.softmax(logits, dim=-1)[:, 1].cpu().numpy()
        preds = torch.argmax(logits, dim=-1).cpu().numpy()
        labels = batch["labels"].cpu().numpy()
        part_ids = batch["participant_ids"].cpu().numpy()

        test_probs.extend(probs)
        test_preds.extend(preds)
        test_labels.extend(labels)
        test_part_ids.extend(part_ids)

# Aggregate like in validation
participant_pred_chunks = defaultdict(list)
participant_true_label = {}

for prob, label, part_id in zip(test_probs, test_labels, test_part_ids):
    participant_pred_chunks[part_id].append(prob)
    participant_true_label[part_id] = label

agg_preds, agg_labels, agg_probs = [], [], []
for part_id in participant_pred_chunks:
    chunk_probs = participant_pred_chunks[part_id]
    mean_prob = np.mean(chunk_probs)
    maj_pred = int(mean_prob >= best_threshold)  # Use same threshold as validation
    agg_preds.append(maj_pred)
    agg_labels.append(participant_true_label[part_id])
    agg_probs.append(mean_prob)

# Metrics
agg_acc = accuracy_score(agg_labels, agg_preds)
agg_macro_f1 = f1_score(agg_labels, agg_preds, average='macro')
try:
    agg_auroc = roc_auc_score(agg_labels, agg_probs)
except ValueError:
    agg_auroc = float('nan')

print(f"\n[Test Set] Participant AUROC: {agg_auroc:.4f}")
print(f"[Test Set] Participant Accuracy: {agg_acc:.4f}")
print(f"[Test Set] Participant Macro F1: {agg_macro_f1:.4f}")
print("\n[Test Set] Participant-level classification report:\n")
print(classification_report(agg_labels, agg_preds, digits=4))

Evaluating on test set with best model...

[Test Set] Participant AUROC: 0.4241
[Test Set] Participant Accuracy: 0.3261
[Test Set] Participant Macro F1: 0.2676

[Test Set] Participant-level classification report:

              precision    recall  f1-score   support

           0     1.0000    0.0312    0.0606        32
           1     0.3111    1.0000    0.4746        14

    accuracy                         0.3261        46
   macro avg     0.6556    0.5156    0.2676        46
weighted avg     0.7903    0.3261    0.1866        46



In [None]:
# Evaluating on test set with best model...

# [Test Set] Participant AUROC: 0.6004
# [Test Set] Participant Accuracy: 0.6087
# [Test Set] Participant Macro F1: 0.5893

# [Test Set] Participant-level classification report:

#               precision    recall  f1-score   support

#            0     0.7917    0.5938    0.6786        32
#            1     0.4091    0.6429    0.5000        14

#     accuracy                         0.6087        46
#    macro avg     0.6004    0.6183    0.5893        46
# weighted avg     0.6752    0.6087    0.6242        46

In [None]:
# Evaluating on test set with best model...

# [Test Set] Participant AUROC: 0.6094
# [Test Set] Participant Accuracy: 0.6304
# [Test Set] Participant Macro F1: 0.6080

# [Test Set] Participant-level classification report:

#               precision    recall  f1-score   support

#            0     0.8000    0.6250    0.7018        32
#            1     0.4286    0.6429    0.5143        14

#     accuracy                         0.6304        46
#    macro avg     0.6143    0.6339    0.6080        46
# weighted avg     0.6870    0.6304    0.6447        46