In [3]:
import sys
sys.path.append(r'C:/Program Files (zk)/PythonFiles/AClassification/SoundDL-CoughVID')
import os
import yaml
import time
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchaudio
from pretrained.wav2vec import Wav2Vec
from models.conv_vae import ConvVAE, vae_loss
from models.classifiers import LSTM_Classifier, LSTM_Attn_Classifier
from modules.loss import FocalLoss
from readers.coughvid_reader import CoughVID_Class, CoughVID_Dataset
from readers.featurizer import Wave2Mel
from readers.collate_fn import collate_fn
from tools.plotter import calc_accuracy, plot_heatmap

In [4]:
import pandas as pd
src_data = pd.read_csv("./datasets/waveinfo_labedfine_forcls.csv", header=0, index_col=0, delimiter=',')
print("原始数据：", src_data.shape)
print(src_data.iloc[:, [0, 6]].groupby("status_full").count())

原始数据： (6341, 7)
             filename
status_full          
0                2114
1                3288
2                 939


In [6]:
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"

encoder = Wav2Vec(pretrained=True).to(device)
print("Load Pretrained model Wav2Vec...")

criterion = FocalLoss(class_num=3)
print("Create CrossEntropyLoss...")

print("All model and loss are on device:", device)

model = LSTM_Classifier(inp_size=298, hidden_size=64, n_classes=3).to(device)

# model loss_function optimizer scheduler
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-4, max_lr=1e-1, step_size_up=10)
print("Create TDNN, Adam with lr=1e-3, CosineAnnealingLR Shceduler")


Load Pretrained model Wav2Vec...
Create CrossEntropyLoss...
All model and loss are on device: cuda
Create TDNN, Adam with lr=1e-3, CosineAnnealingLR Shceduler


In [7]:
configs = {
    "run_save_dir": "./runs/wav2vec_coughvid/",
    "model":{
        "num_class": 3,
        "input_length": 94,
        "wav_length": 48000,
        "input_dim": 512,
        "n_mels": 128,
        },
    "fit":{
        "batch_size": 32,
        "epochs" : 23,
        "start_scheduler_epoch": 6
        },
}

num_epoch = configs["fit"]["epochs"]
# klw = 0.00025
# istrain: 如果是评估环节，设为False，读取测试集，并且不创建optimizer
# isdemo: 如果只是测试一下，设为True，仅读取32条数据方便快速测试是否有bug
# istrain, isdemo = True, False

In [8]:
from torch.utils.data import DataLoader
train_x, train_y, test_x, test_y = CoughVID_Class(isdemo=False)

tic = time.time()
cough_dataset = CoughVID_Dataset(path_list=train_x, label_list=train_y)
toc = time.time()
print("Train Dataset Creat Completely, cost time:", toc-tic)

tic = time.time()
valid_dataset = CoughVID_Dataset(path_list=test_x, label_list=test_y)
toc = time.time()
print("Valid Dataset Creat Completely, cost time:", toc-tic)

num of trainingset:  6044 6044
num of testingset: 297 297


  samples, sample_rate = librosa.load(file)  # , dtype='float32')
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  samples, sample_rate = librosa.load(file)  # , dtype='float32')
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  samples, sample_rate = librosa.load(file)  # , dtype='float32')
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
Loading: 100%|█████████████████████████████████████████████████████████████████████| 6044/6044 [06:55<00:00, 14.54it/s]


Train Dataset Creat Completely, cost time: 415.5861785411835


Loading: 100%|███████████████████████████████████████████████████████████████████████| 297/297 [00:21<00:00, 13.94it/s]

Valid Dataset Creat Completely, cost time: 21.309322834014893





In [9]:

train_loader = DataLoader(cough_dataset, batch_size=32, shuffle=True,
                          collate_fn=collate_fn)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=True,
                          collate_fn=collate_fn)
# for i, (x_wav, y_label, max_len_rate) in enumerate(train_loader):
#     print(x_wav.shape)
#     print(y_label)
#     print(max_len_rate)
#     x_mel = w2m(x_wav)
#     print(x_mel[0])
#     break
print("Create Training Loader and Valid Loader.")

# w2m = Wave2Mel(sr=16000)

Create Training Loader and Valid Loader.


In [20]:
for i, (x_wav, y_label, max_len_rate) in enumerate(valid_loader):
    # print(x_wav.shape)
    print(y_label)
    # print(max_len_rate)
    # x_mel = w2m(x_wav)
    print(x_mel[0])
    if i>3:
        break

tensor([0, 0, 0, 0, 1, 0, 2, 0, 1, 2, 1, 1, 1, 1, 2, 1, 0, 1, 0, 0, 1, 1, 2, 2,
        2, 0, 0, 2, 1, 2, 2, 1])
tensor([[0.0305, 0.0000, 0.0000,  ..., 0.0246, 0.0000, 0.0000],
        [0.0454, 0.0000, 0.0000,  ..., 0.0314, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0833, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0588, 0.0000, 0.0000],
        [0.0000, 0.0059, 0.0000,  ..., 0.0290, 0.0000, 0.0000]],
       device='cuda:0', grad_fn=<SelectBackward0>)
tensor([1, 0, 0, 1, 1, 0, 2, 2, 2, 0, 1, 2, 2, 1, 2, 0, 0, 0, 2, 0, 2, 2, 2, 0,
        1, 2, 0, 2, 1, 0, 1, 0])
tensor([[0.0305, 0.0000, 0.0000,  ..., 0.0246, 0.0000, 0.0000],
        [0.0454, 0.0000, 0.0000,  ..., 0.0314, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0833, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0588, 0.0000, 0

In [10]:
timestr = time.strftime("%Y%m%d%H%M", time.localtime())
run_save_dir = configs["run_save_dir"] + timestr + f'_tdnn_focalloss/'
os.makedirs(run_save_dir, exist_ok=True)
print("创建运行保存文件", run_save_dir)
with open("setting.txt", 'w') as fout:
    fout.write("")

创建运行保存文件 ./runs/wav2vec_coughvid/202404301024_tdnn_focalloss/


In [11]:
history1 = []
for epoch_id in range(configs["fit"]["epochs"]):
    # ---------------------------
    # -----------TRAIN-----------
    # ---------------------------
    model.train()
    for x_idx, (x_wav, y_label, _) in enumerate(tqdm(train_loader, desc="Training")):
        x_wav = x_wav.to(device)
        x_mel = encoder(x_wav).transpose(1,2)
        y_label = torch.tensor(y_label, device=device)
        # print("shape of x_mel:", x_mel.shape)
        
        optimizer.zero_grad()
        y_hat = model(x_mel)
        pred_loss = criterion(y_hat, y_label)
        pred_loss.backward()
        optimizer.step()

        if x_idx > 2:
            history1.append(pred_loss.item())
        if x_idx % 60 == 0:
            print(f"Epoch[{epoch_id}], mtid pred loss:{pred_loss.item():.4f}")
    if epoch_id >= configs["fit"]["start_scheduler_epoch"]:
        scheduler.step()

    # ---------------------------
    # -----------SAVE------------
    # ---------------------------
    plt.figure(0)
    plt.plot(range(len(history1)), history1, c="green", alpha=0.7)
    plt.savefig(run_save_dir + f'cls_loss_iter_{epoch_id}.png')
    plt.close()
    # if epoch > 6 and epoch % 2 == 0:
    os.makedirs(run_save_dir + f"model_epoch_{epoch_id}/", exist_ok=True)
    tmp_model_path = "{model}model_{epoch}.pth".format(
        model=run_save_dir + f"model_epoch_{epoch_id}/",
        epoch=epoch_id)
    torch.save(model.state_dict(), tmp_model_path)
    # ---------------------------
    # -----------TEST------------
    # ---------------------------
    model.eval()
    heatmap_input = None
    labels = None
    for x_idx, (x_wav, y_label, _) in enumerate(tqdm(valid_loader, desc="Validate")):
        x_wav = x_wav.to(device)
        x_mel = encoder(x_wav).transpose(1,2)
        print(x_mel.shape)
        y_label = torch.tensor(y_label, device=device)
        
        y_pred = model(x_mel)
        pred_loss = criterion(y_pred, y_label)
        
        if x_idx == 0:
            heatmap_input, labels = y_pred, y_label
        else:
            heatmap_input = torch.concat((heatmap_input, y_pred), dim=0)
            labels = torch.concat((labels, y_label), dim=0)
        # if x_idx * configs["fit"]["batch_size"] > 800:
        #     break
    print("heatmap_input shape:", heatmap_input.shape)
    print("lables shape:", labels.shape)
    # if epoch > 3:
    #     self.plot_reduction(resume_path="", load_epoch=epoch, reducers=["heatmap"])
    heatmap_input = heatmap_input.detach().cpu().numpy()
    labels = labels.detach().cpu().numpy()
    calc_accuracy(pred_matrix=heatmap_input, label_vec=labels,
                  save_path=run_save_dir + f"/accuracy_epoch_{epoch_id}.txt")
    plot_heatmap(pred_matrix=heatmap_input, label_vec=labels,
                 ticks=["healthy", "symptomatic", "COVID-19"],
                 save_path=run_save_dir + f"/heatmap_epoch_{epoch_id}.png")
print("============== END TRAINING ==============")

  y_label = torch.tensor(y_label, device=device)
Training:   2%|█▌                                                                      | 4/189 [00:01<01:11,  2.59it/s]

Epoch[0], mtid pred loss:0.4710


Training:  33%|███████████████████████▎                                               | 62/189 [00:06<00:09, 13.40it/s]

Epoch[0], mtid pred loss:0.4593


Training:  65%|█████████████████████████████████████████████▏                        | 122/189 [00:10<00:04, 13.64it/s]

Epoch[0], mtid pred loss:0.4791


Training:  96%|███████████████████████████████████████████████████████████████████▍  | 182/189 [00:14<00:00, 13.94it/s]

Epoch[0], mtid pred loss:0.4651


Training: 100%|██████████████████████████████████████████████████████████████████████| 189/189 [00:15<00:00, 12.24it/s]
  y_label = torch.tensor(y_label, device=device)
Validate:  30%|█████████████████████▉                                                   | 3/10 [00:00<00:00, 19.41it/s]

torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])


Validate:  50%|████████████████████████████████████▌                                    | 5/10 [00:00<00:00, 17.00it/s]

torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])


Validate: 100%|████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 17.34it/s]
  prec = tp_vec / cfm.sum(axis=0)


torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([9, 298, 512])
heatmap_input shape: torch.Size([297, 3])
lables shape: torch.Size([297])
(297, 3)
acc: 0.3333333333333333
precision: ['nan', '0.3333', 'nan']
recall: ['0.0000', '1.0000', '0.0000']


  y_label = torch.tensor(y_label, device=device)
Training:   1%|▊                                                                       | 2/189 [00:00<00:10, 18.02it/s]

Epoch[1], mtid pred loss:0.4415


Training:  33%|███████████████████████▎                                               | 62/189 [00:04<00:08, 14.15it/s]

Epoch[1], mtid pred loss:0.4510


Training:  65%|█████████████████████████████████████████████▏                        | 122/189 [00:08<00:04, 13.75it/s]

Epoch[1], mtid pred loss:0.4436


Training:  96%|███████████████████████████████████████████████████████████████████▍  | 182/189 [00:13<00:00, 14.07it/s]

Epoch[1], mtid pred loss:0.4424


Training: 100%|██████████████████████████████████████████████████████████████████████| 189/189 [00:13<00:00, 13.80it/s]
  y_label = torch.tensor(y_label, device=device)
Validate:  40%|█████████████████████████████▏                                           | 4/10 [00:00<00:00, 32.99it/s]

torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])


Validate: 100%|████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 34.25it/s]
  prec = tp_vec / cfm.sum(axis=0)


torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([9, 298, 512])
heatmap_input shape: torch.Size([297, 3])
lables shape: torch.Size([297])
(297, 3)
acc: 0.3333333333333333
precision: ['nan', '0.3333', 'nan']
recall: ['0.0000', '1.0000', '0.0000']


  y_label = torch.tensor(y_label, device=device)
Training:   1%|▊                                                                       | 2/189 [00:00<00:09, 19.23it/s]

Epoch[2], mtid pred loss:0.4530


Training:  33%|███████████████████████▎                                               | 62/189 [00:04<00:09, 13.29it/s]

Epoch[2], mtid pred loss:0.4525


Training:  65%|█████████████████████████████████████████████▏                        | 122/189 [00:08<00:04, 13.55it/s]

Epoch[2], mtid pred loss:0.4237


Training:  97%|████████████████████████████████████████████████████████████████████▏ | 184/189 [00:13<00:00, 14.15it/s]

Epoch[2], mtid pred loss:0.4216


Training: 100%|██████████████████████████████████████████████████████████████████████| 189/189 [00:13<00:00, 13.99it/s]
  y_label = torch.tensor(y_label, device=device)
Validate:  40%|█████████████████████████████▏                                           | 4/10 [00:00<00:00, 30.89it/s]

torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])


Validate: 100%|████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 33.79it/s]
  prec = tp_vec / cfm.sum(axis=0)


torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([9, 298, 512])
heatmap_input shape: torch.Size([297, 3])
lables shape: torch.Size([297])
(297, 3)
acc: 0.3333333333333333
precision: ['nan', '0.3333', 'nan']
recall: ['0.0000', '1.0000', '0.0000']


  y_label = torch.tensor(y_label, device=device)
Training:   1%|▊                                                                       | 2/189 [00:00<00:10, 18.02it/s]

Epoch[3], mtid pred loss:0.4525


Training:  33%|███████████████████████▎                                               | 62/189 [00:04<00:09, 13.64it/s]

Epoch[3], mtid pred loss:0.4431


Training:  65%|█████████████████████████████████████████████▏                        | 122/189 [00:08<00:04, 13.53it/s]

Epoch[3], mtid pred loss:0.4219


Training:  96%|███████████████████████████████████████████████████████████████████▍  | 182/189 [00:13<00:00, 13.63it/s]

Epoch[3], mtid pred loss:0.4467


Training: 100%|██████████████████████████████████████████████████████████████████████| 189/189 [00:13<00:00, 13.69it/s]
  y_label = torch.tensor(y_label, device=device)
Validate:  40%|█████████████████████████████▏                                           | 4/10 [00:00<00:00, 31.25it/s]

torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([32, 298, 512])


Validate: 100%|████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 33.24it/s]
  prec = tp_vec / cfm.sum(axis=0)


torch.Size([32, 298, 512])
torch.Size([32, 298, 512])
torch.Size([9, 298, 512])
heatmap_input shape: torch.Size([297, 3])
lables shape: torch.Size([297])
(297, 3)
acc: 0.3333333333333333
precision: ['nan', '0.3333', 'nan']
recall: ['0.0000', '1.0000', '0.0000']


  y_label = torch.tensor(y_label, device=device)
Training:   1%|▊                                                                       | 2/189 [00:00<00:10, 17.15it/s]

Epoch[4], mtid pred loss:0.4392


Training:  30%|█████████████████████▍                                                 | 57/189 [00:04<00:09, 13.29it/s]


KeyboardInterrupt: 