In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchaudio
from load_data import *
from transformers import Wav2Vec2Processor
from transformers import CamembertTokenizer

raw_data = load_all_ipus("Dataset/transcr")
filepath = "Dataset/audio/2_channels/"

wave2vec_name = "facebook/wav2vec2-base-960h"
processor = Wav2Vec2Processor.from_pretrained(wave2vec_name)

bert_name = 'camembert-base'
tokenizer = CamembertTokenizer.from_pretrained(bert_name)

def get_audio(i, raw_data, filepath):
    audio_file_path =  filepath + raw_data["dyad"][i].replace("transcr\\","") + ".wav"
    audio_tensor, sampling_rate = torchaudio.load(audio_file_path)
    audio_tensor = processor(audio_tensor, return_tensors="pt", sampling_rate = sampling_rate).input_values.squeeze(0)
    stop = int(sampling_rate*raw_data["stop"][i])
    start = stop-sampling_rate*1
    if start < 0:
        sample_tensor = audio_tensor[:,0:stop]
        sample_tensor = torch.cat((torch.zeros((2,abs(start))),sample_tensor),dim=1)
    else:
        sample_tensor = audio_tensor[:,start:stop]
    return sample_tensor

def get_text(i, raw_data):
    text = raw_data["text"][i]
    text_tokenized = tokenizer(text, return_tensors="pt")['input_ids']
    text_tokenized = torch.cat((torch.tensor([[1]*20]),text_tokenized),dim=1)
    text_tokenized = text_tokenized[:,-20:]
    return text_tokenized.squeeze(0)

def get_label(i, raw_data):
    return raw_data["turn_after"].astype("float32")[i]

class DataGenerator(Dataset):
    
    def __init__(self, raw_data, filepath):
        self.raw_data = raw_data
        self.filepath = filepath
    
    def __getitem__(self, i):
        return {"audio" : get_audio(i, self.raw_data, self.filepath),
                "text" : get_text(i, self.raw_data),
                "label" : get_label(i, self.raw_data)}
        
    def __len__(self):
        return len(self.raw_data)
    
def create_dataloader(generator):
    
    dataloader = DataLoader(generator,
                            batch_size=64,
                            shuffle=True,
                            drop_last=True,)
    
    return dataloader

(16400, 12)
is_main_speaker      0.739207
turn_at_start        0.184756
turn_after           0.184756
turn_start_word     10.250372
yield_at_end         0.188415
request_at_start     0.198537
dtype: float64


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
import torch
from tqdm import tqdm
from colorama import Fore, Style
from data_processing import *
from transformers import Wav2Vec2Model
from transformers import CamembertModel
from icecream import ic

########################################################################

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

wave2vec_name = "facebook/wav2vec2-base-960h"
wave2vec_model = Wav2Vec2Model.from_pretrained(wave2vec_name)
wave2vec_model = wave2vec_model.to(device)

bert_name = 'camembert-base'
bert_model = CamembertModel.from_pretrained(bert_name)
bert_model = bert_model.to(device)

for param in wave2vec_model.parameters():
    param.requires_grad = False

for param in bert_model.parameters():
    param.requires_grad = False
    
########################################################################

class Model(torch.nn.Module):
    """
    Model using covolutional neural net architecture.
    """
    def __init__(self, input_dim, hidden_dim, output_dim, device):
        
        super(Model, self).__init__()

        self.device = device
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        # Define model components
        self.linear1 = torch.nn.Linear(input_dim, hidden_dim)
        self.dropout = torch.nn.Dropout(p=0.1)
        self.linear2 = torch.nn.Linear(hidden_dim, output_dim)
        
###################
        
    def parameters_number(self):
        return(sum(p.numel() for p in self.parameters() if p.requires_grad))
        
    def forward(self, input_data):
        # Define the forward pass using the model components
        
        input_data['audio'] = input_data['audio'].to(self.device)
        input_data['text'] = input_data['text'].to(self.device)
        input_data['label'] = input_data['label'].to(self.device)

        channel0 = input_data['audio'][:,0,:]
        channel1 = input_data['audio'][:,1,:]
        
        wave2vec_output0 = wave2vec_model(channel0)
        wave2vec_output1 = wave2vec_model(channel1)
        
        wave2vec_output0 = wave2vec_output0.last_hidden_state
        wave2vec_output1 = wave2vec_output1.last_hidden_state
        
        wave2vec_output0 = torch.nn.functional.max_pool1d(wave2vec_output0, kernel_size=6)
        wave2vec_output1 = torch.nn.functional.max_pool1d(wave2vec_output1, kernel_size=6)
        
        bert_output = bert_model(input_data['text'])
        bert_output = bert_output.last_hidden_state
        
        wave2vec_output0 = torch.flatten(wave2vec_output0, start_dim=1)
        wave2vec_output1 = torch.flatten(wave2vec_output1, start_dim=1)
        bert_output = torch.flatten(bert_output, start_dim=1)

        # Concatenate or combine the outputs as needed
        combined_output = torch.cat((wave2vec_output0, wave2vec_output1, bert_output), dim=1)

        # Apply linear layers
        linear1_output = torch.relu(self.linear1(combined_output))
        final_output = self.linear2(self.dropout(linear1_output))

        return torch.softmax(final_output, dim=1)
    
    def evaluate(self, dataloader):
        self.eval()

        # Define the loss function
        loss_function = torch.nn.CrossEntropyLoss()

        total_loss = 0.0
        true_positive_1 = torch.tensor(0).to(self.device)
        false_positive_1 = torch.tensor(0).to(self.device)
        false_negative_1 = torch.tensor(0).to(self.device)
        
        true_positive_0 = torch.tensor(0).to(self.device)
        false_positive_0 = torch.tensor(0).to(self.device)
        false_negative_0 = torch.tensor(0).to(self.device)

        with torch.no_grad():  # Disable gradient computation during validation
            for _, batch in enumerate(tqdm(dataloader, desc="Validating")):
                
                output = self.forward(batch)
                labels = batch["label"].long()

                loss = loss_function(output, labels)
                total_loss += loss.item()

                _, predicted = torch.max(output, 1)
                
                true_positive_1 += torch.sum((predicted == labels) * (labels == 1))
                false_positive_1 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 1))
                false_negative_1 += torch.sum(((1-predicted) == labels) * (labels == 1))
                
                true_positive_0 += torch.sum((predicted == labels) * (labels == 0))
                false_positive_0 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 0))
                false_negative_0 += torch.sum(((1-predicted) == labels) * (labels == 0))
                
        precision_1 = true_positive_1 / max((true_positive_1 + false_positive_1), 1)
        recall_1 = true_positive_1 / max((true_positive_1 + false_negative_1), 1) 
        
        precision_0 = true_positive_0 / max((true_positive_0 + false_positive_0), 1)
        recall_0 = true_positive_0 / max((true_positive_0 + false_negative_0), 1) 
        
        f1_1 = 2 * (precision_1 * recall_1) / max((precision_1 + recall_1), 1)
        f1_0 = 2 * (precision_0 * recall_0) / max((precision_0 + recall_0), 1)
        
        print(f"Classe {Fore.RED}0{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_0 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_0 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_0 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Classe {Fore.RED}1{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_1 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_1 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_1 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Score : {(f1_0*0.18 + f1_1*(1-0.18))}")
                        
    def train_one_epoch(self, dataloader):
        
        self.train(True)
        
        optimizer = torch.optim.Adam(self.parameters())
        # loss_function = torch.nn.CrossEntropyLoss()
        
        try:
            for _, batch in enumerate(tqdm(dataloader, desc="Training")):
                
                try:
                
                    optimizer.zero_grad()
                    
                    output = self.forward(batch)
                    labels = batch["label"].long()
                    _, predicted = torch.max(output, 1)
                    
                    true_positive_0 = (output * (1 - labels).unsqueeze(1))[:, 0].sum()
                    false_positive_0 = ((1 - output) * labels.unsqueeze(1))[:, 0].sum()
                    false_negative_0 = ((1 - output) * (1 - labels).unsqueeze(1))[:, 0].sum()

                    true_positive_1 = (output * labels.unsqueeze(1))[:, 1].sum()
                    false_positive_1 = (output * (1 - labels).unsqueeze(1))[:, 1].sum()
                    false_negative_1 = ((1 - output) * labels.unsqueeze(1))[:, 1].sum()

                    epsilon = torch.tensor([1e-7]).to(self.device) # 1e-7
                    
                    precision_0 = true_positive_0 / (true_positive_0 + false_positive_0 + epsilon)
                    recall_0 = true_positive_0 / (true_positive_0 + false_negative_0 + epsilon)
                    f1_0 = 2 * (precision_0 * recall_0) / (precision_0 + recall_0 + epsilon)

                    precision_1 = true_positive_1 / (true_positive_1 + false_positive_1 + epsilon)
                    recall_1 = true_positive_1 / (true_positive_1 + false_negative_1 + epsilon)
                    f1_1 = 2 * (precision_1 * recall_1) / (precision_1 + recall_1 + epsilon)
                
                    loss = -torch.log((f1_0*0.18 + f1_1*(1-0.18)) + epsilon)
                    
                    # ic(output, labels)
                    # ic(true_positive_0, false_positive_0, false_negative_0, true_positive_1, false_positive_1, false_negative_1)
                    # ic(precision_0, recall_0, f1_0, precision_1, recall_1, f1_1)
                    # print("Successfully trained a batch")
                    
                    ic(loss)
                    loss.backward()
                    
                    # labels = batch["label"].long()
                    # loss = loss_function(output, labels)
                    # loss.backward()
                    
                    optimizer.step()
                    
                except StopIteration as e:
                    print(f"An error occurred: {e}")   
                    # Catch StopIteration and continue to the next batch
                    pass
        except Exception as e:
            # Handle other exceptions if needed
            print(f"An error occurred: {e}")      

    def train_loop(self, generator, nb_epoch):
        
        self.to(self.device)

        data = generator.raw_data

        train_data = data.sample(frac=0.9,random_state=200)
        test_data = data.drop(train_data.index)

        test_data.reset_index(drop=True, inplace=True)
        train_data.reset_index(drop=True, inplace=True)

        test_generator = DataGenerator(test_data, filepath)
        test_loader = create_dataloader(test_generator)
        
        for epoch_number in range(nb_epoch):

            train_subdata = train_data.sample(frac=0.9,random_state=200)
            val_subdata = train_data.drop(train_subdata.index)
            
            train_subdata.reset_index(drop=True, inplace=True)
            val_subdata.reset_index(drop=True, inplace=True)
            
            train_subgenerator = DataGenerator(train_subdata, filepath)
            val_subgenerator = DataGenerator(val_subdata, filepath)

            train_loader = create_dataloader(train_subgenerator)
            val_loader = create_dataloader(val_subgenerator)

            print("")    
            print(f'{Fore.GREEN}EPOCH {epoch_number + 1}:{Style.RESET_ALL}')
            
            # Train for one epoch
            self.train_one_epoch(train_loader)

            # Validate on the validation subset
            print(f'{Fore.CYAN}Validation :{Style.RESET_ALL}')
            self.evaluate(val_loader)
            print(f'{Fore.YELLOW}Test :{Style.RESET_ALL}')
            self.evaluate(test_loader)

########################################################################

generator = DataGenerator(raw_data.iloc[:2048], filepath)
model = Model(27904, 16, 2, device)

# dataloader = create_dataloader(generator)
# model.evaluate(dataloader)

print("Nombre de paramètres du model:", model.parameters_number())

model.train_loop(generator, 5)

(16400, 12)
is_main_speaker      0.739207
turn_at_start        0.184756
turn_after           0.184756
turn_start_word     10.250372
yield_at_end         0.188415
request_at_start     0.198537
dtype: float64


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Nombre de paramètres du model: 446514

[32mEPOCH 1:[0m


Training:   0%|          | 0/25 [00:00<?, ?it/s]We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.
ic| loss: tensor([1.0399], device='cuda:0', grad_fn=<NegBackward0>)
Training:   4%|▍         | 1/25 [00:29<11:49, 29.56s/it]ic| loss: tensor([1.1900], device='cuda:0', grad_fn=<NegBackward0>)
Training:   8%|▊         | 2/25 [00:51<09:41, 25.29s/it]ic| loss: tensor([1.1519], device='cuda:0', grad_fn=<NegBackward0>)
Training:  12%|█▏        | 3/25 [01:13<08:42, 23.76s/it]ic| loss: tensor([1.1701], device='cuda:0', grad_fn=<NegBackward0>)
Training:  16%|█▌        | 4/25 [01:35<08:02, 22.97s/it]ic| loss: tensor([0.8514], device='cuda:0', grad_fn=<NegBackward0>)
Training:  20%|██        | 5/25 [02:07<08:44, 26.24s/it]ic| loss: tensor([1.2737], device='cuda:0', grad_fn=<NegBackward0>)
Training:  24%|██▍       | 6/25 [02:31<08:03, 25.45s/it]ic| los

[36mValidation :[0m


Validating: 100%|██████████| 2/2 [01:00<00:00, 30.06s/it]


Classe [31m0[0m | Precision: [32m88.31%[0m, Recall: [32m65.38%[0m, F1 Score: [32m75.14%[0m
Classe [31m1[0m | Precision: [32m29.41%[0m, Recall: [32m62.50%[0m, F1 Score: [32m36.76%[0m
Score : 0.43671923875808716
[33mTest :[0m


Validating: 100%|██████████| 3/3 [01:14<00:00, 24.90s/it]


Classe [31m0[0m | Precision: [32m84.26%[0m, Recall: [32m58.33%[0m, F1 Score: [32m68.94%[0m
Classe [31m1[0m | Precision: [32m22.62%[0m, Recall: [32m52.78%[0m, F1 Score: [32m23.88%[0m
Score : 0.31987133622169495

[32mEPOCH 2:[0m


Training:   0%|          | 0/25 [00:00<?, ?it/s]ic| loss: tensor([0.8456], device='cuda:0', grad_fn=<NegBackward0>)
Training:   4%|▍         | 1/25 [00:21<08:34, 21.44s/it]ic| loss: tensor([0.8885], device='cuda:0', grad_fn=<NegBackward0>)
Training:   8%|▊         | 2/25 [00:43<08:16, 21.59s/it]ic| loss: tensor([1.0998], device='cuda:0', grad_fn=<NegBackward0>)
Training:  12%|█▏        | 3/25 [01:04<07:50, 21.41s/it]ic| loss: tensor([0.9241], device='cuda:0', grad_fn=<NegBackward0>)
Training:  16%|█▌        | 4/25 [01:25<07:26, 21.27s/it]ic| loss: tensor([1.1541], device='cuda:0', grad_fn=<NegBackward0>)
Training:  20%|██        | 5/25 [01:46<07:04, 21.23s/it]ic| loss: tensor([0.5877], device='cuda:0', grad_fn=<NegBackward0>)
Training:  24%|██▍       | 6/25 [02:07<06:41, 21.13s/it]ic| loss: tensor([0.9522], device='cuda:0', grad_fn=<NegBackward0>)
Training:  28%|██▊       | 7/25 [02:31<06:35, 21.95s/it]ic| loss: tensor([0.7250], device='cuda:0', grad_fn=<NegBackward0>)
Training:  32%|█

[36mValidation :[0m


Validating: 100%|██████████| 2/2 [00:57<00:00, 28.86s/it]


Classe [31m0[0m | Precision: [32m88.89%[0m, Recall: [32m61.54%[0m, F1 Score: [32m72.73%[0m
Classe [31m1[0m | Precision: [32m28.57%[0m, Recall: [32m66.67%[0m, F1 Score: [32m38.10%[0m
Score : 0.4432900547981262
[33mTest :[0m


Validating: 100%|██████████| 3/3 [01:04<00:00, 21.39s/it]


Classe [31m0[0m | Precision: [32m85.42%[0m, Recall: [32m52.23%[0m, F1 Score: [32m64.82%[0m
Classe [31m1[0m | Precision: [32m21.88%[0m, Recall: [32m60.00%[0m, F1 Score: [32m26.25%[0m
Score : 0.33192986249923706

[32mEPOCH 3:[0m


Training:   0%|          | 0/25 [00:00<?, ?it/s]ic| loss: tensor([0.9439], device='cuda:0', grad_fn=<NegBackward0>)
Training:   4%|▍         | 1/25 [00:21<08:34, 21.44s/it]ic| loss: tensor([1.2937], device='cuda:0', grad_fn=<NegBackward0>)
Training:   8%|▊         | 2/25 [00:42<08:13, 21.44s/it]ic| loss: tensor([0.8635], device='cuda:0', grad_fn=<NegBackward0>)
Training:  12%|█▏        | 3/25 [01:04<07:51, 21.44s/it]ic| loss: tensor([0.7720], device='cuda:0', grad_fn=<NegBackward0>)
Training:  16%|█▌        | 4/25 [01:25<07:29, 21.41s/it]ic| loss: tensor([0.8483], device='cuda:0', grad_fn=<NegBackward0>)
Training:  20%|██        | 5/25 [01:47<07:08, 21.40s/it]ic| loss: tensor([0.8074], device='cuda:0', grad_fn=<NegBackward0>)
Training:  24%|██▍       | 6/25 [02:08<06:47, 21.44s/it]ic| loss: tensor([0.9003], device='cuda:0', grad_fn=<NegBackward0>)
Training:  28%|██▊       | 7/25 [02:29<06:24, 21.38s/it]ic| loss: tensor([0.6518], device='cuda:0', grad_fn=<NegBackward0>)
Training:  32%|█

[36mValidation :[0m


Validating: 100%|██████████| 2/2 [00:59<00:00, 29.61s/it]


Classe [31m0[0m | Precision: [32m88.00%[0m, Recall: [32m64.08%[0m, F1 Score: [32m74.16%[0m
Classe [31m1[0m | Precision: [32m30.19%[0m, Recall: [32m64.00%[0m, F1 Score: [32m38.64%[0m
Score : 0.450343519449234
[33mTest :[0m


Validating: 100%|██████████| 3/3 [01:28<00:00, 29.35s/it]


Classe [31m0[0m | Precision: [32m86.14%[0m, Recall: [32m56.13%[0m, F1 Score: [32m67.97%[0m
Classe [31m1[0m | Precision: [32m25.27%[0m, Recall: [32m62.16%[0m, F1 Score: [32m31.42%[0m
Score : 0.38000932335853577

[32mEPOCH 4:[0m


Training:   0%|          | 0/25 [00:00<?, ?it/s]ic| loss: tensor([0.7116], device='cuda:0', grad_fn=<NegBackward0>)
Training:   4%|▍         | 1/25 [00:29<11:49, 29.55s/it]ic| loss: tensor([0.7996], device='cuda:0', grad_fn=<NegBackward0>)
Training:   8%|▊         | 2/25 [00:54<10:23, 27.09s/it]ic| loss: tensor([0.7394], device='cuda:0', grad_fn=<NegBackward0>)
Training:  12%|█▏        | 3/25 [01:16<08:57, 24.44s/it]ic| loss: tensor([0.9625], device='cuda:0', grad_fn=<NegBackward0>)
Training:  16%|█▌        | 4/25 [01:37<08:05, 23.14s/it]ic| loss: tensor([1.1238], device='cuda:0', grad_fn=<NegBackward0>)
Training:  20%|██        | 5/25 [01:58<07:31, 22.58s/it]ic| loss: tensor([0.8755], device='cuda:0', grad_fn=<NegBackward0>)
Training:  24%|██▍       | 6/25 [02:24<07:26, 23.48s/it]ic| loss: tensor([0.8405], device='cuda:0', grad_fn=<NegBackward0>)
Training:  28%|██▊       | 7/25 [02:48<07:08, 23.83s/it]ic| loss: tensor([0.6571], device='cuda:0', grad_fn=<NegBackward0>)
Training:  32%|█

[36mValidation :[0m


Validating: 100%|██████████| 2/2 [00:45<00:00, 22.75s/it]


Classe [31m0[0m | Precision: [32m87.36%[0m, Recall: [32m72.38%[0m, F1 Score: [32m79.17%[0m
Classe [31m1[0m | Precision: [32m29.27%[0m, Recall: [32m52.17%[0m, F1 Score: [32m30.54%[0m
Score : 0.39293479919433594
[33mTest :[0m


Validating: 100%|██████████| 3/3 [01:05<00:00, 21.68s/it]


Classe [31m0[0m | Precision: [32m85.04%[0m, Recall: [32m69.23%[0m, F1 Score: [32m76.33%[0m
Classe [31m1[0m | Precision: [32m26.15%[0m, Recall: [32m47.22%[0m, F1 Score: [32m24.70%[0m
Score : 0.3399321734905243

[32mEPOCH 5:[0m


Training:   0%|          | 0/25 [00:00<?, ?it/s]ic| loss: tensor([0.5867], device='cuda:0', grad_fn=<NegBackward0>)
Training:   4%|▍         | 1/25 [00:23<09:18, 23.27s/it]ic| loss: tensor([0.7087], device='cuda:0', grad_fn=<NegBackward0>)
Training:   8%|▊         | 2/25 [00:53<10:25, 27.18s/it]ic| loss: tensor([0.8408], device='cuda:0', grad_fn=<NegBackward0>)
Training:  12%|█▏        | 3/25 [01:23<10:26, 28.50s/it]ic| loss: tensor([0.7092], device='cuda:0', grad_fn=<NegBackward0>)
Training:  16%|█▌        | 4/25 [01:54<10:17, 29.39s/it]ic| loss: tensor([0.8783], device='cuda:0', grad_fn=<NegBackward0>)
Training:  20%|██        | 5/25 [02:24<09:55, 29.80s/it]ic| loss: tensor([0.6615], device='cuda:0', grad_fn=<NegBackward0>)
Training:  24%|██▍       | 6/25 [02:54<09:26, 29.82s/it]ic| loss: tensor([0.7652], device='cuda:0', grad_fn=<NegBackward0>)
Training:  28%|██▊       | 7/25 [03:24<08:57, 29.86s/it]ic| loss: tensor([0.7234], device='cuda:0', grad_fn=<NegBackward0>)
Training:  32%|█

[36mValidation :[0m


Validating: 100%|██████████| 2/2 [01:00<00:00, 30.19s/it]


Classe [31m0[0m | Precision: [32m84.69%[0m, Recall: [32m79.05%[0m, F1 Score: [32m81.77%[0m
Classe [31m1[0m | Precision: [32m26.67%[0m, Recall: [32m34.78%[0m, F1 Score: [32m18.55%[0m
Score : 0.29930806159973145
[33mTest :[0m


Validating: 100%|██████████| 3/3 [01:20<00:00, 26.97s/it]

Classe [31m0[0m | Precision: [32m84.89%[0m, Recall: [32m75.64%[0m, F1 Score: [32m80.00%[0m
Classe [31m1[0m | Precision: [32m28.30%[0m, Recall: [32m41.67%[0m, F1 Score: [32m23.58%[0m
Score : 0.33739620447158813





In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchaudio
from load_data import *
from transformers import Wav2Vec2Processor
from transformers import CamembertTokenizer

raw_data = load_all_ipus("Dataset/transcr")
filepath = "Dataset/audio/2_channels/"

wave2vec_name = "facebook/wav2vec2-base-960h"
processor = Wav2Vec2Processor.from_pretrained(wave2vec_name)

bert_name = 'camembert-base'
tokenizer = CamembertTokenizer.from_pretrained(bert_name)

def get_audio(i, raw_data, filepath):
    audio_file_path =  filepath + raw_data["dyad"][i].replace("transcr\\","") + ".wav"
    audio_tensor, sampling_rate = torchaudio.load(audio_file_path)
    audio_tensor = processor(audio_tensor, return_tensors="pt", sampling_rate = sampling_rate).input_values.squeeze(0)
    stop = int(sampling_rate*raw_data["stop"][i])
    start = stop-sampling_rate*1
    if start < 0:
        sample_tensor = audio_tensor[:,0:stop]
        sample_tensor = torch.cat((torch.zeros((2,abs(start))),sample_tensor),dim=1)
    else:
        sample_tensor = audio_tensor[:,start:stop]
    return sample_tensor

def get_text(i, raw_data):
    text = raw_data["text"][i]
    text_tokenized = tokenizer(text, return_tensors="pt")['input_ids']
    text_tokenized = torch.cat((torch.tensor([[1]*20]),text_tokenized),dim=1)
    text_tokenized = text_tokenized[:,-20:]
    return text_tokenized.squeeze(0)

def get_label(i, raw_data):
    return raw_data["turn_after"].astype("float32")[i]

class DataGenerator(Dataset):
    
    def __init__(self, raw_data, filepath):
        self.raw_data = raw_data
        self.filepath = filepath
    
    def __getitem__(self, i):
        return {"audio" : get_audio(i, self.raw_data, self.filepath),
                "text" : get_text(i, self.raw_data),
                "label" : get_label(i, self.raw_data)}
        
    def __len__(self):
        return len(self.raw_data)
    
def create_dataloader(generator):
    
    dataloader = DataLoader(generator,
                            batch_size=64,
                            shuffle=True,
                            drop_last=True,)
    
    return dataloader

(16400, 12)
is_main_speaker      0.739207
turn_at_start        0.184756
turn_after           0.184756
turn_start_word     10.250372
yield_at_end         0.188415
request_at_start     0.198537
dtype: float64


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchaudio
from load_data import *
from transformers import Wav2Vec2Processor
from transformers import CamembertTokenizer

raw_data = load_all_ipus("Dataset/transcr")
filepath = "Dataset/audio/2_channels/"

wave2vec_name = "facebook/wav2vec2-base-960h"
processor = Wav2Vec2Processor.from_pretrained(wave2vec_name)

bert_name = 'camembert-base'
tokenizer = CamembertTokenizer.from_pretrained(bert_name)

def get_audio(i, raw_data, filepath):
    audio_file_path =  filepath + raw_data["dyad"][i].replace("transcr\\","") + ".wav"
    audio_tensor, sampling_rate = torchaudio.load(audio_file_path)
    audio_tensor = processor(audio_tensor, return_tensors="pt", sampling_rate = sampling_rate).input_values.squeeze(0)
    stop = int(sampling_rate*raw_data["stop"][i])
    start = stop-sampling_rate*1
    if start < 0:
        sample_tensor = audio_tensor[:,0:stop]
        sample_tensor = torch.cat((torch.zeros((2,abs(start))),sample_tensor),dim=1)
    else:
        sample_tensor = audio_tensor[:,start:stop]
    return sample_tensor

def get_text(i, raw_data):
    text = raw_data["text"][i]
    text_tokenized = tokenizer(text, return_tensors="pt")['input_ids']
    text_tokenized = torch.cat((torch.tensor([[1]*20]),text_tokenized),dim=1)
    text_tokenized = text_tokenized[:,-20:]
    return text_tokenized.squeeze(0)

def get_label(i, raw_data):
    return raw_data["turn_after"].astype("float32")[i]

class DataGenerator(Dataset):
    
    def __init__(self, raw_data, filepath):
        self.raw_data = raw_data
        self.filepath = filepath
    
    def __getitem__(self, i):
        return {"audio" : get_audio(i, self.raw_data, self.filepath),
                "text" : get_text(i, self.raw_data),
                "label" : get_label(i, self.raw_data)}
        
    def __len__(self):
        return len(self.raw_data)
    
def create_dataloader(generator):
    
    dataloader = DataLoader(generator,
                            batch_size=64,
                            shuffle=True,
                            drop_last=True,)
    
    return dataloader

(16400, 12)
is_main_speaker      0.739207
turn_at_start        0.184756
turn_after           0.184756
turn_start_word     10.250372
yield_at_end         0.188415
request_at_start     0.198537
dtype: float64


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
generator = DataGenerator(raw_data.iloc[2048:2048*3], filepath)

In [6]:
model.train_loop(generator, 5)


[32mEPOCH 1:[0m


Training:   0%|          | 0/51 [00:00<?, ?it/s]ic| loss: tensor([0.9636], device='cuda:0', grad_fn=<NegBackward0>)
Training:   2%|▏         | 1/51 [00:30<25:03, 30.07s/it]ic| loss: tensor([1.2293], device='cuda:0', grad_fn=<NegBackward0>)
Training:   4%|▍         | 2/51 [01:05<27:12, 33.31s/it]ic| loss: tensor([0.8569], device='cuda:0', grad_fn=<NegBackward0>)
Training:   6%|▌         | 3/51 [01:28<23:00, 28.76s/it]ic| loss: tensor([1.0114], device='cuda:0', grad_fn=<NegBackward0>)
Training:   8%|▊         | 4/51 [01:55<21:53, 27.94s/it]ic| loss: tensor([1.2257], device='cuda:0', grad_fn=<NegBackward0>)
Training:  10%|▉         | 5/51 [02:36<23:58, 31.28s/it]


An error occurred: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).
[36mValidation :[0m


Validating: 100%|██████████| 5/5 [02:41<00:00, 32.21s/it]


Classe [31m0[0m | Precision: [32m84.49%[0m, Recall: [32m80.23%[0m, F1 Score: [32m82.31%[0m
Classe [31m1[0m | Precision: [32m32.00%[0m, Recall: [32m38.71%[0m, F1 Score: [32m24.77%[0m
Score : 0.3512994647026062
[33mTest :[0m


Validating: 100%|██████████| 6/6 [03:00<00:00, 30.06s/it]


Classe [31m0[0m | Precision: [32m85.37%[0m, Recall: [32m78.44%[0m, F1 Score: [32m81.76%[0m
Classe [31m1[0m | Precision: [32m23.33%[0m, Recall: [32m32.81%[0m, F1 Score: [32m15.31%[0m
Score : 0.2727286219596863

[32mEPOCH 2:[0m


Training:   0%|          | 0/51 [00:00<?, ?it/s]ic| loss: tensor([0.7594], device='cuda:0', grad_fn=<NegBackward0>)
Training:   2%|▏         | 1/51 [00:23<19:59, 23.99s/it]ic| loss: tensor([0.9254], device='cuda:0', grad_fn=<NegBackward0>)
Training:   4%|▍         | 2/51 [00:50<20:40, 25.32s/it]ic| loss: tensor([0.8800], device='cuda:0', grad_fn=<NegBackward0>)
Training:   6%|▌         | 3/51 [01:20<22:06, 27.63s/it]ic| loss: tensor([1.0161], device='cuda:0', grad_fn=<NegBackward0>)
Training:   8%|▊         | 4/51 [01:44<20:36, 26.31s/it]ic| loss: tensor([0.9283], device='cuda:0', grad_fn=<NegBackward0>)
Training:  10%|▉         | 5/51 [02:09<19:38, 25.63s/it]ic| loss: tensor([0.9517], device='cuda:0', grad_fn=<NegBackward0>)
Training:  12%|█▏        | 6/51 [02:33<18:48, 25.09s/it]ic| loss: tensor([0.9378], device='cuda:0', grad_fn=<NegBackward0>)
Training:  14%|█▎        | 7/51 [03:01<19:07, 26.09s/it]ic| loss: tensor([0.7265], device='cuda:0', grad_fn=<NegBackward0>)
Training:  16%|█

An error occurred: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).
[36mValidation :[0m


Validating: 100%|██████████| 5/5 [02:05<00:00, 25.15s/it]


Classe [31m0[0m | Precision: [32m84.33%[0m, Recall: [32m87.60%[0m, F1 Score: [32m85.93%[0m
Classe [31m1[0m | Precision: [32m38.46%[0m, Recall: [32m32.26%[0m, F1 Score: [32m24.81%[0m
Score : 0.35815075039863586
[33mTest :[0m


Validating: 100%|██████████| 6/6 [02:57<00:00, 29.66s/it]


Classe [31m0[0m | Precision: [32m85.62%[0m, Recall: [32m85.09%[0m, F1 Score: [32m85.36%[0m
Classe [31m1[0m | Precision: [32m25.00%[0m, Recall: [32m25.81%[0m, F1 Score: [32m12.90%[0m
Score : 0.259451299905777

[32mEPOCH 3:[0m


Training:   0%|          | 0/51 [00:00<?, ?it/s]ic| loss: tensor([0.8643], device='cuda:0', grad_fn=<NegBackward0>)
Training:   2%|▏         | 1/51 [00:39<33:02, 39.65s/it]ic| loss: tensor([0.9456], device='cuda:0', grad_fn=<NegBackward0>)
Training:   4%|▍         | 2/51 [01:08<27:10, 33.27s/it]ic| loss: tensor([0.8322], device='cuda:0', grad_fn=<NegBackward0>)
Training:   6%|▌         | 3/51 [01:39<25:45, 32.20s/it]ic| loss: tensor([0.7710], device='cuda:0', grad_fn=<NegBackward0>)
Training:   8%|▊         | 4/51 [02:12<25:25, 32.47s/it]ic| loss: tensor([1.0474], device='cuda:0', grad_fn=<NegBackward0>)
Training:  10%|▉         | 5/51 [02:43<24:25, 31.85s/it]ic| loss: tensor([1.3760], device='cuda:0', grad_fn=<NegBackward0>)
Training:  12%|█▏        | 6/51 [03:12<23:19, 31.11s/it]ic| loss: tensor([1.0645], device='cuda:0', grad_fn=<NegBackward0>)
Training:  14%|█▎        | 7/51 [03:42<22:32, 30.75s/it]ic| loss: tensor([1.0095], device='cuda:0', grad_fn=<NegBackward0>)
Training:  16%|█

An error occurred: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).
[36mValidation :[0m


Validating: 100%|██████████| 5/5 [03:21<00:00, 40.25s/it]


Classe [31m0[0m | Precision: [32m87.36%[0m, Recall: [32m58.69%[0m, F1 Score: [32m70.21%[0m
Classe [31m1[0m | Precision: [32m26.71%[0m, Recall: [32m63.93%[0m, F1 Score: [32m34.16%[0m
Score : 0.4064594507217407
[33mTest :[0m


Validating: 100%|██████████| 6/6 [03:11<00:00, 31.90s/it]


Classe [31m0[0m | Precision: [32m88.18%[0m, Recall: [32m55.76%[0m, F1 Score: [32m68.32%[0m
Classe [31m1[0m | Precision: [32m21.55%[0m, Recall: [32m61.90%[0m, F1 Score: [32m26.68%[0m
Score : 0.3417300581932068

[32mEPOCH 4:[0m


Training:   0%|          | 0/51 [00:00<?, ?it/s]ic| loss: tensor([0.8782], device='cuda:0', grad_fn=<NegBackward0>)
Training:   2%|▏         | 1/51 [00:35<29:25, 35.31s/it]ic| loss: tensor([0.9013], device='cuda:0', grad_fn=<NegBackward0>)
Training:   4%|▍         | 2/51 [01:10<28:34, 34.99s/it]ic| loss: tensor([0.7270], device='cuda:0', grad_fn=<NegBackward0>)
Training:   6%|▌         | 3/51 [01:44<27:50, 34.81s/it]ic| loss: tensor([0.6767], device='cuda:0', grad_fn=<NegBackward0>)
Training:   8%|▊         | 4/51 [02:18<27:03, 34.54s/it]ic| loss: tensor([0.9790], device='cuda:0', grad_fn=<NegBackward0>)
Training:  10%|▉         | 5/51 [02:53<26:25, 34.47s/it]ic| loss: tensor([0.6718], device='cuda:0', grad_fn=<NegBackward0>)
Training:  12%|█▏        | 6/51 [03:27<25:47, 34.39s/it]ic| loss: tensor([0.8697], device='cuda:0', grad_fn=<NegBackward0>)
Training:  14%|█▎        | 7/51 [04:01<25:15, 34.44s/it]ic| loss: tensor([0.9754], device='cuda:0', grad_fn=<NegBackward0>)
Training:  16%|█

An error occurred: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).
[36mValidation :[0m


Validating: 100%|██████████| 5/5 [02:00<00:00, 24.07s/it]


Classe [31m0[0m | Precision: [32m84.72%[0m, Recall: [32m74.62%[0m, F1 Score: [32m79.35%[0m
Classe [31m1[0m | Precision: [32m27.47%[0m, Recall: [32m41.67%[0m, F1 Score: [32m22.89%[0m
Score : 0.33055102825164795
[33mTest :[0m


Validating:   0%|          | 0/6 [00:09<?, ?it/s]


KeyboardInterrupt: 

In [7]:
torch.save(model, "checkpoint1.pth")

In [8]:
import torch
from tqdm import tqdm
from colorama import Fore, Style
from data_processing import *
from transformers import Wav2Vec2Model
from transformers import CamembertModel
from icecream import ic

########################################################################

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

wave2vec_name = "facebook/wav2vec2-base-960h"
wave2vec_model = Wav2Vec2Model.from_pretrained(wave2vec_name)
wave2vec_model = wave2vec_model.to(device)

bert_name = 'camembert-base'
bert_model = CamembertModel.from_pretrained(bert_name)
bert_model = bert_model.to(device)

for param in wave2vec_model.parameters():
    param.requires_grad = False

for param in bert_model.parameters():
    param.requires_grad = False
    
########################################################################

class Model(torch.nn.Module):
    """
    Model using covolutional neural net architecture.
    """
    def __init__(self, input_dim, hidden_dim, output_dim, device):
        
        super(Model, self).__init__()

        self.device = device
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        # Define model components
        self.linear1 = torch.nn.Linear(input_dim, hidden_dim)
        self.dropout = torch.nn.Dropout(p=0.1)
        self.linear2 = torch.nn.Linear(hidden_dim, output_dim)
        
###################
        
    def parameters_number(self):
        return(sum(p.numel() for p in self.parameters() if p.requires_grad))
        
    def forward(self, input_data):
        # Define the forward pass using the model components
        
        input_data['audio'] = input_data['audio'].to(self.device)
        input_data['text'] = input_data['text'].to(self.device)
        input_data['label'] = input_data['label'].to(self.device)

        channel0 = input_data['audio'][:,0,:]
        channel1 = input_data['audio'][:,1,:]
        
        wave2vec_output0 = wave2vec_model(channel0)
        wave2vec_output1 = wave2vec_model(channel1)
        
        wave2vec_output0 = wave2vec_output0.last_hidden_state
        wave2vec_output1 = wave2vec_output1.last_hidden_state
        
        wave2vec_output0 = torch.nn.functional.max_pool1d(wave2vec_output0, kernel_size=6)
        wave2vec_output1 = torch.nn.functional.max_pool1d(wave2vec_output1, kernel_size=6)
        
        bert_output = bert_model(input_data['text'])
        bert_output = bert_output.last_hidden_state
        
        wave2vec_output0 = torch.flatten(wave2vec_output0, start_dim=1)
        wave2vec_output1 = torch.flatten(wave2vec_output1, start_dim=1)
        bert_output = torch.flatten(bert_output, start_dim=1)

        # Concatenate or combine the outputs as needed
        combined_output = torch.cat((wave2vec_output0, wave2vec_output1, bert_output), dim=1)

        # Apply linear layers
        linear1_output = torch.relu(self.linear1(combined_output))
        final_output = self.linear2(self.dropout(linear1_output))

        return torch.softmax(final_output, dim=1)
    
    def evaluate(self, dataloader):
        self.eval()

        # Define the loss function
        loss_function = torch.nn.CrossEntropyLoss()

        total_loss = 0.0
        true_positive_1 = torch.tensor(0).to(self.device)
        false_positive_1 = torch.tensor(0).to(self.device)
        false_negative_1 = torch.tensor(0).to(self.device)
        
        true_positive_0 = torch.tensor(0).to(self.device)
        false_positive_0 = torch.tensor(0).to(self.device)
        false_negative_0 = torch.tensor(0).to(self.device)

        with torch.no_grad():  # Disable gradient computation during validation
            for _, batch in enumerate(tqdm(dataloader, desc="Validating")):
                
                output = self.forward(batch)
                labels = batch["label"].long()

                loss = loss_function(output, labels)
                total_loss += loss.item()

                _, predicted = torch.max(output, 1)
                
                true_positive_1 += torch.sum((predicted == labels) * (labels == 1))
                false_positive_1 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 1))
                false_negative_1 += torch.sum(((1-predicted) == labels) * (labels == 1))
                
                true_positive_0 += torch.sum((predicted == labels) * (labels == 0))
                false_positive_0 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 0))
                false_negative_0 += torch.sum(((1-predicted) == labels) * (labels == 0))
                
        precision_1 = true_positive_1 / max((true_positive_1 + false_positive_1), 1)
        recall_1 = true_positive_1 / max((true_positive_1 + false_negative_1), 1) 
        
        precision_0 = true_positive_0 / max((true_positive_0 + false_positive_0), 1)
        recall_0 = true_positive_0 / max((true_positive_0 + false_negative_0), 1) 
        
        f1_1 = 2 * (precision_1 * recall_1) / max((precision_1 + recall_1), 1)
        f1_0 = 2 * (precision_0 * recall_0) / max((precision_0 + recall_0), 1)
        
        print(f"Classe {Fore.RED}0{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_0 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_0 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_0 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Classe {Fore.RED}1{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_1 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_1 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_1 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Score : {(f1_0*0.18 + f1_1*(1-0.18))}")
                        
    def train_one_epoch(self, dataloader):
        
        self.train(True)
        
        optimizer = torch.optim.Adam(self.parameters())
        # loss_function = torch.nn.CrossEntropyLoss()
        
        try:
            for _, batch in enumerate(tqdm(dataloader, desc="Training")):
                
                try:
                
                    optimizer.zero_grad()
                    
                    output = self.forward(batch)
                    labels = batch["label"].long()
                    _, predicted = torch.max(output, 1)
                    
                    true_positive_0 = (output * (1 - labels).unsqueeze(1))[:, 0].sum()
                    false_positive_0 = ((1 - output) * labels.unsqueeze(1))[:, 0].sum()
                    false_negative_0 = ((1 - output) * (1 - labels).unsqueeze(1))[:, 0].sum()

                    true_positive_1 = (output * labels.unsqueeze(1))[:, 1].sum()
                    false_positive_1 = (output * (1 - labels).unsqueeze(1))[:, 1].sum()
                    false_negative_1 = ((1 - output) * labels.unsqueeze(1))[:, 1].sum()

                    epsilon = torch.tensor([1e-7]).to(self.device) # 1e-7
                    
                    precision_0 = true_positive_0 / (true_positive_0 + false_positive_0 + epsilon)
                    recall_0 = true_positive_0 / (true_positive_0 + false_negative_0 + epsilon)
                    f1_0 = 2 * (precision_0 * recall_0) / (precision_0 + recall_0 + epsilon)

                    precision_1 = true_positive_1 / (true_positive_1 + false_positive_1 + epsilon)
                    recall_1 = true_positive_1 / (true_positive_1 + false_negative_1 + epsilon)
                    f1_1 = 2 * (precision_1 * recall_1) / (precision_1 + recall_1 + epsilon)
                
                    loss = -torch.log((f1_0*0.18 + f1_1*(1-0.18)) + epsilon)
                    
                    # ic(output, labels)
                    # ic(true_positive_0, false_positive_0, false_negative_0, true_positive_1, false_positive_1, false_negative_1)
                    # ic(precision_0, recall_0, f1_0, precision_1, recall_1, f1_1)
                    # print("Successfully trained a batch")
                    
                    ic(loss)
                    loss.backward()
                    
                    # labels = batch["label"].long()
                    # loss = loss_function(output, labels)
                    # loss.backward()
                    
                    optimizer.step()
                    
                except StopIteration as e:
                    print(f"An error occurred: {e}")   
                    # Catch StopIteration and continue to the next batch
                    pass
        except Exception as e:
            # Handle other exceptions if needed
            print(f"An error occurred: {e}")      

    def train_loop(self, generator, nb_epoch):
        
        self.to(self.device)

        data = generator.raw_data

        train_data = data.sample(frac=0.9,random_state=200)
        test_data = data.drop(train_data.index)

        test_data.reset_index(drop=True, inplace=True)
        train_data.reset_index(drop=True, inplace=True)

        test_generator = DataGenerator(test_data, filepath)
        test_loader = create_dataloader(test_generator)
        
        for epoch_number in range(nb_epoch):

            train_subdata = train_data.sample(frac=0.9,random_state=200)
            val_subdata = train_data.drop(train_subdata.index)
            
            train_subdata.reset_index(drop=True, inplace=True)
            val_subdata.reset_index(drop=True, inplace=True)
            
            train_subgenerator = DataGenerator(train_subdata, filepath)
            val_subgenerator = DataGenerator(val_subdata, filepath)

            train_loader = create_dataloader(train_subgenerator)
            val_loader = create_dataloader(val_subgenerator)

            print("")    
            print(f'{Fore.GREEN}EPOCH {epoch_number + 1}:{Style.RESET_ALL}')
            
            # Train for one epoch
            self.train_one_epoch(train_loader)

            # Validate on the validation subset
            print(f'{Fore.CYAN}Validation :{Style.RESET_ALL}')
            self.evaluate(val_loader)
            print(f'{Fore.YELLOW}Test :{Style.RESET_ALL}')
            self.evaluate(test_loader)

########################################################################

generator = DataGenerator(raw_data.iloc[2048:2048*3], filepath)
model = Model(27904, 16, 2, device)

# dataloader = create_dataloader(generator)
# model.evaluate(dataloader)

print("Nombre de paramètres du model:", model.parameters_number())

model.train_loop(generator, 5)

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Nombre de paramètres du model: 446514

[32mEPOCH 1:[0m


Training:   0%|          | 0/51 [00:03<?, ?it/s]


KeyboardInterrupt: 

In [9]:
import torch
from tqdm import tqdm
from colorama import Fore, Style
from data_processing import *
from transformers import Wav2Vec2Model
from transformers import CamembertModel
from icecream import ic

########################################################################

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

wave2vec_name = "facebook/wav2vec2-base-960h"
wave2vec_model = Wav2Vec2Model.from_pretrained(wave2vec_name)
wave2vec_model = wave2vec_model.to(device)

bert_name = 'camembert-base'
bert_model = CamembertModel.from_pretrained(bert_name)
bert_model = bert_model.to(device)

for param in wave2vec_model.parameters():
    param.requires_grad = False

for param in bert_model.parameters():
    param.requires_grad = False
    
########################################################################

class Model(torch.nn.Module):
    """
    Model using covolutional neural net architecture.
    """
    def __init__(self, input_dim, hidden_dim, output_dim, device):
        
        super(Model, self).__init__()

        self.device = device
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        # Define model components
        self.linear1 = torch.nn.Linear(input_dim, hidden_dim)
        self.dropout = torch.nn.Dropout(p=0.1)
        self.linear2 = torch.nn.Linear(hidden_dim, output_dim)
        
###################
        
    def parameters_number(self):
        return(sum(p.numel() for p in self.parameters() if p.requires_grad))
        
    def forward(self, input_data):
        # Define the forward pass using the model components
        
        input_data['audio'] = input_data['audio'].to(self.device)
        input_data['text'] = input_data['text'].to(self.device)
        input_data['label'] = input_data['label'].to(self.device)

        channel0 = input_data['audio'][:,0,:]
        channel1 = input_data['audio'][:,1,:]
        
        wave2vec_output0 = wave2vec_model(channel0)
        wave2vec_output1 = wave2vec_model(channel1)
        
        wave2vec_output0 = wave2vec_output0.last_hidden_state
        wave2vec_output1 = wave2vec_output1.last_hidden_state
        
        wave2vec_output0 = torch.nn.functional.max_pool1d(wave2vec_output0, kernel_size=6)
        wave2vec_output1 = torch.nn.functional.max_pool1d(wave2vec_output1, kernel_size=6)
        
        bert_output = bert_model(input_data['text'])
        bert_output = bert_output.last_hidden_state
        
        wave2vec_output0 = torch.flatten(wave2vec_output0, start_dim=1)
        wave2vec_output1 = torch.flatten(wave2vec_output1, start_dim=1)
        bert_output = torch.flatten(bert_output, start_dim=1)

        # Concatenate or combine the outputs as needed
        combined_output = torch.cat((wave2vec_output0, wave2vec_output1, bert_output), dim=1)

        # Apply linear layers
        linear1_output = torch.relu(self.linear1(combined_output))
        final_output = self.linear2(self.dropout(linear1_output))

        return torch.softmax(final_output, dim=1)
    
    def evaluate(self, dataloader):
        self.eval()

        # Define the loss function
        loss_function = torch.nn.CrossEntropyLoss()

        total_loss = 0.0
        true_positive_1 = torch.tensor(0).to(self.device)
        false_positive_1 = torch.tensor(0).to(self.device)
        false_negative_1 = torch.tensor(0).to(self.device)
        
        true_positive_0 = torch.tensor(0).to(self.device)
        false_positive_0 = torch.tensor(0).to(self.device)
        false_negative_0 = torch.tensor(0).to(self.device)

        with torch.no_grad():  # Disable gradient computation during validation
            for _, batch in enumerate(tqdm(dataloader, desc="Validating")):
                
                output = self.forward(batch)
                labels = batch["label"].long()

                loss = loss_function(output, labels)
                total_loss += loss.item()

                _, predicted = torch.max(output, 1)
                
                true_positive_1 += torch.sum((predicted == labels) * (labels == 1))
                false_positive_1 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 1))
                false_negative_1 += torch.sum(((1-predicted) == labels) * (labels == 1))
                
                true_positive_0 += torch.sum((predicted == labels) * (labels == 0))
                false_positive_0 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 0))
                false_negative_0 += torch.sum(((1-predicted) == labels) * (labels == 0))
                
        precision_1 = true_positive_1 / max((true_positive_1 + false_positive_1), 1)
        recall_1 = true_positive_1 / max((true_positive_1 + false_negative_1), 1) 
        
        precision_0 = true_positive_0 / max((true_positive_0 + false_positive_0), 1)
        recall_0 = true_positive_0 / max((true_positive_0 + false_negative_0), 1) 
        
        f1_1 = 2 * (precision_1 * recall_1) / max((precision_1 + recall_1), 1)
        f1_0 = 2 * (precision_0 * recall_0) / max((precision_0 + recall_0), 1)
        
        print(f"Classe {Fore.RED}0{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_0 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_0 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_0 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Classe {Fore.RED}1{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_1 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_1 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_1 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Score : {(f1_0*0.18 + f1_1*(1-0.18))}")
                        
    def train_one_epoch(self, dataloader):
        
        self.train(True)
        
        optimizer = torch.optim.Adam(self.parameters())
        # loss_function = torch.nn.CrossEntropyLoss()
        
        try:
            for _, batch in enumerate(tqdm(dataloader, desc="Training")):
                
                try:
                
                    optimizer.zero_grad()
                    
                    output = self.forward(batch)
                    labels = batch["label"].long()
                    _, predicted = torch.max(output, 1)
                    
                    true_positive_0 = (output * (1 - labels).unsqueeze(1))[:, 0].sum()
                    false_positive_0 = ((1 - output) * labels.unsqueeze(1))[:, 0].sum()
                    false_negative_0 = ((1 - output) * (1 - labels).unsqueeze(1))[:, 0].sum()

                    true_positive_1 = (output * labels.unsqueeze(1))[:, 1].sum()
                    false_positive_1 = (output * (1 - labels).unsqueeze(1))[:, 1].sum()
                    false_negative_1 = ((1 - output) * labels.unsqueeze(1))[:, 1].sum()

                    epsilon = torch.tensor([1e-7]).to(self.device) # 1e-7
                    
                    precision_0 = true_positive_0 / (true_positive_0 + false_positive_0 + epsilon)
                    recall_0 = true_positive_0 / (true_positive_0 + false_negative_0 + epsilon)
                    f1_0 = 2 * (precision_0 * recall_0) / (precision_0 + recall_0 + epsilon)

                    precision_1 = true_positive_1 / (true_positive_1 + false_positive_1 + epsilon)
                    recall_1 = true_positive_1 / (true_positive_1 + false_negative_1 + epsilon)
                    f1_1 = 2 * (precision_1 * recall_1) / (precision_1 + recall_1 + epsilon)
                
                    loss = -torch.log((f1_0*0.18 + f1_1*(1-0.18)) + epsilon)
                    
                    # ic(output, labels)
                    # ic(true_positive_0, false_positive_0, false_negative_0, true_positive_1, false_positive_1, false_negative_1)
                    # ic(precision_0, recall_0, f1_0, precision_1, recall_1, f1_1)
                    # print("Successfully trained a batch")
                    
                    ic(loss)
                    loss.backward()
                    
                    # labels = batch["label"].long()
                    # loss = loss_function(output, labels)
                    # loss.backward()
                    
                    optimizer.step()
                    
                except StopIteration as e:
                    print(f"An error occurred: {e}")   
                    # Catch StopIteration and continue to the next batch
                    pass
        except Exception as e:
            # Handle other exceptions if needed
            print(f"An error occurred: {e}")      

    def train_loop(self, generator, nb_epoch):
        
        self.to(self.device)

        data = generator.raw_data

        train_data = data.sample(frac=0.9,random_state=200)
        test_data = data.drop(train_data.index)

        test_data.reset_index(drop=True, inplace=True)
        train_data.reset_index(drop=True, inplace=True)

        test_generator = DataGenerator(test_data, filepath)
        test_loader = create_dataloader(test_generator)
        
        for epoch_number in range(nb_epoch):

            train_subdata = train_data.sample(frac=0.9,random_state=200)
            val_subdata = train_data.drop(train_subdata.index)
            
            train_subdata.reset_index(drop=True, inplace=True)
            val_subdata.reset_index(drop=True, inplace=True)
            
            train_subgenerator = DataGenerator(train_subdata, filepath)
            val_subgenerator = DataGenerator(val_subdata, filepath)

            train_loader = create_dataloader(train_subgenerator)
            val_loader = create_dataloader(val_subgenerator)

            print("")    
            print(f'{Fore.GREEN}EPOCH {epoch_number + 1}:{Style.RESET_ALL}')
            
            # Train for one epoch
            self.train_one_epoch(train_loader)

            # Validate on the validation subset
            print(f'{Fore.CYAN}Validation :{Style.RESET_ALL}')
            self.evaluate(val_loader)
            print(f'{Fore.YELLOW}Test :{Style.RESET_ALL}')
            self.evaluate(test_loader)

########################################################################

generator = DataGenerator(raw_data.iloc[2048:2048*3], filepath)
model = Model(27904, 16, 2, device)

dataloader = create_dataloader(generator)
model.evaluate(dataloader)

# print("Nombre de paramètres du model:", model.parameters_number())

# model.train_loop(generator, 5)

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Validating:   0%|          | 0/64 [00:00<?, ?it/s]


KeyError: 1460

In [10]:
import torch
from tqdm import tqdm
from colorama import Fore, Style
from data_processing import *
from transformers import Wav2Vec2Model
from transformers import CamembertModel
from icecream import ic

########################################################################

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

wave2vec_name = "facebook/wav2vec2-base-960h"
wave2vec_model = Wav2Vec2Model.from_pretrained(wave2vec_name)
wave2vec_model = wave2vec_model.to(device)

bert_name = 'camembert-base'
bert_model = CamembertModel.from_pretrained(bert_name)
bert_model = bert_model.to(device)

for param in wave2vec_model.parameters():
    param.requires_grad = False

for param in bert_model.parameters():
    param.requires_grad = False
    
########################################################################

class Model(torch.nn.Module):
    """
    Model using covolutional neural net architecture.
    """
    def __init__(self, input_dim, hidden_dim, output_dim, device):
        
        super(Model, self).__init__()

        self.device = device
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        # Define model components
        self.linear1 = torch.nn.Linear(input_dim, hidden_dim)
        self.dropout = torch.nn.Dropout(p=0.1)
        self.linear2 = torch.nn.Linear(hidden_dim, output_dim)
        
###################
        
    def parameters_number(self):
        return(sum(p.numel() for p in self.parameters() if p.requires_grad))
        
    def forward(self, input_data):
        # Define the forward pass using the model components
        
        input_data['audio'] = input_data['audio'].to(self.device)
        input_data['text'] = input_data['text'].to(self.device)
        input_data['label'] = input_data['label'].to(self.device)

        channel0 = input_data['audio'][:,0,:]
        channel1 = input_data['audio'][:,1,:]
        
        wave2vec_output0 = wave2vec_model(channel0)
        wave2vec_output1 = wave2vec_model(channel1)
        
        wave2vec_output0 = wave2vec_output0.last_hidden_state
        wave2vec_output1 = wave2vec_output1.last_hidden_state
        
        wave2vec_output0 = torch.nn.functional.max_pool1d(wave2vec_output0, kernel_size=6)
        wave2vec_output1 = torch.nn.functional.max_pool1d(wave2vec_output1, kernel_size=6)
        
        bert_output = bert_model(input_data['text'])
        bert_output = bert_output.last_hidden_state
        
        wave2vec_output0 = torch.flatten(wave2vec_output0, start_dim=1)
        wave2vec_output1 = torch.flatten(wave2vec_output1, start_dim=1)
        bert_output = torch.flatten(bert_output, start_dim=1)

        # Concatenate or combine the outputs as needed
        combined_output = torch.cat((wave2vec_output0, wave2vec_output1, bert_output), dim=1)

        # Apply linear layers
        linear1_output = torch.relu(self.linear1(combined_output))
        final_output = self.linear2(self.dropout(linear1_output))

        return torch.softmax(final_output, dim=1)
    
    def evaluate(self, dataloader):
        self.eval()

        # Define the loss function
        loss_function = torch.nn.CrossEntropyLoss()

        total_loss = 0.0
        true_positive_1 = torch.tensor(0).to(self.device)
        false_positive_1 = torch.tensor(0).to(self.device)
        false_negative_1 = torch.tensor(0).to(self.device)
        
        true_positive_0 = torch.tensor(0).to(self.device)
        false_positive_0 = torch.tensor(0).to(self.device)
        false_negative_0 = torch.tensor(0).to(self.device)

        with torch.no_grad():  # Disable gradient computation during validation
            for _, batch in enumerate(tqdm(dataloader, desc="Validating")):
                
                output = self.forward(batch)
                labels = batch["label"].long()

                loss = loss_function(output, labels)
                total_loss += loss.item()

                _, predicted = torch.max(output, 1)
                
                true_positive_1 += torch.sum((predicted == labels) * (labels == 1))
                false_positive_1 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 1))
                false_negative_1 += torch.sum(((1-predicted) == labels) * (labels == 1))
                
                true_positive_0 += torch.sum((predicted == labels) * (labels == 0))
                false_positive_0 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 0))
                false_negative_0 += torch.sum(((1-predicted) == labels) * (labels == 0))
                
        precision_1 = true_positive_1 / max((true_positive_1 + false_positive_1), 1)
        recall_1 = true_positive_1 / max((true_positive_1 + false_negative_1), 1) 
        
        precision_0 = true_positive_0 / max((true_positive_0 + false_positive_0), 1)
        recall_0 = true_positive_0 / max((true_positive_0 + false_negative_0), 1) 
        
        f1_1 = 2 * (precision_1 * recall_1) / max((precision_1 + recall_1), 1)
        f1_0 = 2 * (precision_0 * recall_0) / max((precision_0 + recall_0), 1)
        
        print(f"Classe {Fore.RED}0{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_0 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_0 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_0 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Classe {Fore.RED}1{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_1 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_1 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_1 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Score : {(f1_0*0.18 + f1_1*(1-0.18))}")
                        
    def train_one_epoch(self, dataloader):
        
        self.train(True)
        
        optimizer = torch.optim.Adam(self.parameters())
        # loss_function = torch.nn.CrossEntropyLoss()
        
        try:
            for _, batch in enumerate(tqdm(dataloader, desc="Training")):
                
                try:
                
                    optimizer.zero_grad()
                    
                    output = self.forward(batch)
                    labels = batch["label"].long()
                    _, predicted = torch.max(output, 1)
                    
                    true_positive_0 = (output * (1 - labels).unsqueeze(1))[:, 0].sum()
                    false_positive_0 = ((1 - output) * labels.unsqueeze(1))[:, 0].sum()
                    false_negative_0 = ((1 - output) * (1 - labels).unsqueeze(1))[:, 0].sum()

                    true_positive_1 = (output * labels.unsqueeze(1))[:, 1].sum()
                    false_positive_1 = (output * (1 - labels).unsqueeze(1))[:, 1].sum()
                    false_negative_1 = ((1 - output) * labels.unsqueeze(1))[:, 1].sum()

                    epsilon = torch.tensor([1e-7]).to(self.device) # 1e-7
                    
                    precision_0 = true_positive_0 / (true_positive_0 + false_positive_0 + epsilon)
                    recall_0 = true_positive_0 / (true_positive_0 + false_negative_0 + epsilon)
                    f1_0 = 2 * (precision_0 * recall_0) / (precision_0 + recall_0 + epsilon)

                    precision_1 = true_positive_1 / (true_positive_1 + false_positive_1 + epsilon)
                    recall_1 = true_positive_1 / (true_positive_1 + false_negative_1 + epsilon)
                    f1_1 = 2 * (precision_1 * recall_1) / (precision_1 + recall_1 + epsilon)
                
                    loss = -torch.log((f1_0*0.18 + f1_1*(1-0.18)) + epsilon)
                    
                    # ic(output, labels)
                    # ic(true_positive_0, false_positive_0, false_negative_0, true_positive_1, false_positive_1, false_negative_1)
                    # ic(precision_0, recall_0, f1_0, precision_1, recall_1, f1_1)
                    # print("Successfully trained a batch")
                    
                    ic(loss)
                    loss.backward()
                    
                    # labels = batch["label"].long()
                    # loss = loss_function(output, labels)
                    # loss.backward()
                    
                    optimizer.step()
                    
                except StopIteration as e:
                    print(f"An error occurred: {e}")   
                    # Catch StopIteration and continue to the next batch
                    pass
        except Exception as e:
            # Handle other exceptions if needed
            print(f"An error occurred: {e}")      

    def train_loop(self, generator, nb_epoch):
        
        self.to(self.device)

        data = generator.raw_data

        train_data = data.sample(frac=0.9,random_state=200)
        test_data = data.drop(train_data.index)

        test_data.reset_index(drop=True, inplace=True)
        train_data.reset_index(drop=True, inplace=True)

        test_generator = DataGenerator(test_data, filepath)
        test_loader = create_dataloader(test_generator)
        
        for epoch_number in range(nb_epoch):

            train_subdata = train_data.sample(frac=0.9,random_state=200)
            val_subdata = train_data.drop(train_subdata.index)
            
            train_subdata.reset_index(drop=True, inplace=True)
            val_subdata.reset_index(drop=True, inplace=True)
            
            train_subgenerator = DataGenerator(train_subdata, filepath)
            val_subgenerator = DataGenerator(val_subdata, filepath)

            train_loader = create_dataloader(train_subgenerator)
            val_loader = create_dataloader(val_subgenerator)

            print("")    
            print(f'{Fore.GREEN}EPOCH {epoch_number + 1}:{Style.RESET_ALL}')
            
            # Train for one epoch
            self.train_one_epoch(train_loader)

            # Validate on the validation subset
            print(f'{Fore.CYAN}Validation :{Style.RESET_ALL}')
            self.evaluate(val_loader)
            print(f'{Fore.YELLOW}Test :{Style.RESET_ALL}')
            self.evaluate(test_loader)

########################################################################

generator = DataGenerator(raw_data.iloc[:64], filepath)
model = Model(27904, 16, 2, device)

dataloader = create_dataloader(generator)
model.evaluate(dataloader)

# print("Nombre de paramètres du model:", model.parameters_number())

# model.train_loop(generator, 5)

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Validating:   0%|          | 0/1 [00:27<?, ?it/s]


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

In [11]:
import torch
from tqdm import tqdm
from colorama import Fore, Style
from data_processing import *
from transformers import Wav2Vec2Model
from transformers import CamembertModel
from icecream import ic

########################################################################

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

wave2vec_name = "facebook/wav2vec2-base-960h"
wave2vec_model = Wav2Vec2Model.from_pretrained(wave2vec_name)
wave2vec_model = wave2vec_model.to(device)

bert_name = 'camembert-base'
bert_model = CamembertModel.from_pretrained(bert_name)
bert_model = bert_model.to(device)

for param in wave2vec_model.parameters():
    param.requires_grad = False

for param in bert_model.parameters():
    param.requires_grad = False
    
########################################################################

class Model(torch.nn.Module):
    """
    Model using covolutional neural net architecture.
    """
    def __init__(self, input_dim, hidden_dim, output_dim, device):
        
        super(Model, self).__init__()

        self.device = device
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        # Define model components
        self.linear1 = torch.nn.Linear(input_dim, hidden_dim)
        self.dropout = torch.nn.Dropout(p=0.1)
        self.linear2 = torch.nn.Linear(hidden_dim, output_dim)
        
###################
        
    def parameters_number(self):
        return(sum(p.numel() for p in self.parameters() if p.requires_grad))
        
    def forward(self, input_data):
        # Define the forward pass using the model components
        
        input_data['audio'] = input_data['audio'].to(self.device)
        input_data['text'] = input_data['text'].to(self.device)
        input_data['label'] = input_data['label'].to(self.device)

        channel0 = input_data['audio'][:,0,:]
        channel1 = input_data['audio'][:,1,:]
        
        wave2vec_output0 = wave2vec_model(channel0)
        wave2vec_output1 = wave2vec_model(channel1)
        
        wave2vec_output0 = wave2vec_output0.last_hidden_state
        wave2vec_output1 = wave2vec_output1.last_hidden_state
        
        wave2vec_output0 = torch.nn.functional.max_pool1d(wave2vec_output0, kernel_size=6)
        wave2vec_output1 = torch.nn.functional.max_pool1d(wave2vec_output1, kernel_size=6)
        
        bert_output = bert_model(input_data['text'])
        bert_output = bert_output.last_hidden_state
        
        wave2vec_output0 = torch.flatten(wave2vec_output0, start_dim=1)
        wave2vec_output1 = torch.flatten(wave2vec_output1, start_dim=1)
        bert_output = torch.flatten(bert_output, start_dim=1)

        # Concatenate or combine the outputs as needed
        combined_output = torch.cat((wave2vec_output0, wave2vec_output1, bert_output), dim=1)

        # Apply linear layers
        linear1_output = torch.relu(self.linear1(combined_output))
        final_output = self.linear2(self.dropout(linear1_output))

        return torch.softmax(final_output, dim=1)
    
    def evaluate(self, dataloader):
        
        self.to(self.device)
        
        self.eval()

        # Define the loss function
        loss_function = torch.nn.CrossEntropyLoss()

        total_loss = 0.0
        true_positive_1 = torch.tensor(0).to(self.device)
        false_positive_1 = torch.tensor(0).to(self.device)
        false_negative_1 = torch.tensor(0).to(self.device)
        
        true_positive_0 = torch.tensor(0).to(self.device)
        false_positive_0 = torch.tensor(0).to(self.device)
        false_negative_0 = torch.tensor(0).to(self.device)

        with torch.no_grad():  # Disable gradient computation during validation
            for _, batch in enumerate(tqdm(dataloader, desc="Validating")):
                
                output = self.forward(batch)
                labels = batch["label"].long()

                loss = loss_function(output, labels)
                total_loss += loss.item()

                _, predicted = torch.max(output, 1)
                
                true_positive_1 += torch.sum((predicted == labels) * (labels == 1))
                false_positive_1 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 1))
                false_negative_1 += torch.sum(((1-predicted) == labels) * (labels == 1))
                
                true_positive_0 += torch.sum((predicted == labels) * (labels == 0))
                false_positive_0 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 0))
                false_negative_0 += torch.sum(((1-predicted) == labels) * (labels == 0))
                
        precision_1 = true_positive_1 / max((true_positive_1 + false_positive_1), 1)
        recall_1 = true_positive_1 / max((true_positive_1 + false_negative_1), 1) 
        
        precision_0 = true_positive_0 / max((true_positive_0 + false_positive_0), 1)
        recall_0 = true_positive_0 / max((true_positive_0 + false_negative_0), 1) 
        
        f1_1 = 2 * (precision_1 * recall_1) / max((precision_1 + recall_1), 1)
        f1_0 = 2 * (precision_0 * recall_0) / max((precision_0 + recall_0), 1)
        
        print(f"Classe {Fore.RED}0{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_0 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_0 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_0 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Classe {Fore.RED}1{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_1 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_1 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_1 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Score : {(f1_0*0.18 + f1_1*(1-0.18))}")
                        
    def train_one_epoch(self, dataloader):
        
        self.train(True)
        
        optimizer = torch.optim.Adam(self.parameters())
        # loss_function = torch.nn.CrossEntropyLoss()
        
        try:
            for _, batch in enumerate(tqdm(dataloader, desc="Training")):
                
                try:
                
                    optimizer.zero_grad()
                    
                    output = self.forward(batch)
                    labels = batch["label"].long()
                    _, predicted = torch.max(output, 1)
                    
                    true_positive_0 = (output * (1 - labels).unsqueeze(1))[:, 0].sum()
                    false_positive_0 = ((1 - output) * labels.unsqueeze(1))[:, 0].sum()
                    false_negative_0 = ((1 - output) * (1 - labels).unsqueeze(1))[:, 0].sum()

                    true_positive_1 = (output * labels.unsqueeze(1))[:, 1].sum()
                    false_positive_1 = (output * (1 - labels).unsqueeze(1))[:, 1].sum()
                    false_negative_1 = ((1 - output) * labels.unsqueeze(1))[:, 1].sum()

                    epsilon = torch.tensor([1e-7]).to(self.device) # 1e-7
                    
                    precision_0 = true_positive_0 / (true_positive_0 + false_positive_0 + epsilon)
                    recall_0 = true_positive_0 / (true_positive_0 + false_negative_0 + epsilon)
                    f1_0 = 2 * (precision_0 * recall_0) / (precision_0 + recall_0 + epsilon)

                    precision_1 = true_positive_1 / (true_positive_1 + false_positive_1 + epsilon)
                    recall_1 = true_positive_1 / (true_positive_1 + false_negative_1 + epsilon)
                    f1_1 = 2 * (precision_1 * recall_1) / (precision_1 + recall_1 + epsilon)
                
                    loss = -torch.log((f1_0*0.18 + f1_1*(1-0.18)) + epsilon)
                    
                    # ic(output, labels)
                    # ic(true_positive_0, false_positive_0, false_negative_0, true_positive_1, false_positive_1, false_negative_1)
                    # ic(precision_0, recall_0, f1_0, precision_1, recall_1, f1_1)
                    # print("Successfully trained a batch")
                    
                    ic(loss)
                    loss.backward()
                    
                    # labels = batch["label"].long()
                    # loss = loss_function(output, labels)
                    # loss.backward()
                    
                    optimizer.step()
                    
                except StopIteration as e:
                    print(f"An error occurred: {e}")   
                    # Catch StopIteration and continue to the next batch
                    pass
        except Exception as e:
            # Handle other exceptions if needed
            print(f"An error occurred: {e}")      

    def train_loop(self, generator, nb_epoch):
        
        self.to(self.device)

        data = generator.raw_data

        train_data = data.sample(frac=0.9,random_state=200)
        test_data = data.drop(train_data.index)

        test_data.reset_index(drop=True, inplace=True)
        train_data.reset_index(drop=True, inplace=True)

        test_generator = DataGenerator(test_data, filepath)
        test_loader = create_dataloader(test_generator)
        
        for epoch_number in range(nb_epoch):

            train_subdata = train_data.sample(frac=0.9,random_state=200)
            val_subdata = train_data.drop(train_subdata.index)
            
            train_subdata.reset_index(drop=True, inplace=True)
            val_subdata.reset_index(drop=True, inplace=True)
            
            train_subgenerator = DataGenerator(train_subdata, filepath)
            val_subgenerator = DataGenerator(val_subdata, filepath)

            train_loader = create_dataloader(train_subgenerator)
            val_loader = create_dataloader(val_subgenerator)

            print("")    
            print(f'{Fore.GREEN}EPOCH {epoch_number + 1}:{Style.RESET_ALL}')
            
            # Train for one epoch
            self.train_one_epoch(train_loader)

            # Validate on the validation subset
            print(f'{Fore.CYAN}Validation :{Style.RESET_ALL}')
            self.evaluate(val_loader)
            print(f'{Fore.YELLOW}Test :{Style.RESET_ALL}')
            self.evaluate(test_loader)

########################################################################

generator = DataGenerator(raw_data.iloc[:64], filepath)
model = Model(27904, 16, 2, device)

dataloader = create_dataloader(generator)
model.evaluate(dataloader)

# print("Nombre de paramètres du model:", model.parameters_number())

# model.train_loop(generator, 5)

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Validating: 100%|██████████| 1/1 [00:23<00:00, 23.04s/it]

Classe [31m0[0m | Precision: [32m0.00%[0m, Recall: [32m0.00%[0m, F1 Score: [32m0.00%[0m
Classe [31m1[0m | Precision: [32m15.62%[0m, Recall: [32m100.00%[0m, F1 Score: [32m27.03%[0m
Score : 0.22162160277366638





In [12]:
import torch
from tqdm import tqdm
from colorama import Fore, Style
from data_processing import *
from transformers import Wav2Vec2Model
from transformers import CamembertModel
from icecream import ic

########################################################################

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

wave2vec_name = "facebook/wav2vec2-base-960h"
wave2vec_model = Wav2Vec2Model.from_pretrained(wave2vec_name)
wave2vec_model = wave2vec_model.to(device)

bert_name = 'camembert-base'
bert_model = CamembertModel.from_pretrained(bert_name)
bert_model = bert_model.to(device)

for param in wave2vec_model.parameters():
    param.requires_grad = False

for param in bert_model.parameters():
    param.requires_grad = False
    
########################################################################

class Model(torch.nn.Module):
    """
    Model using covolutional neural net architecture.
    """
    def __init__(self, input_dim, hidden_dim, output_dim, device):
        
        super(Model, self).__init__()

        self.device = device
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        # Define model components
        self.linear1 = torch.nn.Linear(input_dim, hidden_dim)
        self.dropout = torch.nn.Dropout(p=0.1)
        self.linear2 = torch.nn.Linear(hidden_dim, output_dim)
        
###################
        
    def parameters_number(self):
        return(sum(p.numel() for p in self.parameters() if p.requires_grad))
        
    def forward(self, input_data):
        # Define the forward pass using the model components
        
        input_data['audio'] = input_data['audio'].to(self.device)
        input_data['text'] = input_data['text'].to(self.device)
        input_data['label'] = input_data['label'].to(self.device)

        channel0 = input_data['audio'][:,0,:]
        channel1 = input_data['audio'][:,1,:]
        
        wave2vec_output0 = wave2vec_model(channel0)
        wave2vec_output1 = wave2vec_model(channel1)
        
        wave2vec_output0 = wave2vec_output0.last_hidden_state
        wave2vec_output1 = wave2vec_output1.last_hidden_state
        
        wave2vec_output0 = torch.nn.functional.max_pool1d(wave2vec_output0, kernel_size=6)
        wave2vec_output1 = torch.nn.functional.max_pool1d(wave2vec_output1, kernel_size=6)
        
        bert_output = bert_model(input_data['text'])
        bert_output = bert_output.last_hidden_state
        
        wave2vec_output0 = torch.flatten(wave2vec_output0, start_dim=1)
        wave2vec_output1 = torch.flatten(wave2vec_output1, start_dim=1)
        bert_output = torch.flatten(bert_output, start_dim=1)

        # Concatenate or combine the outputs as needed
        combined_output = torch.cat((wave2vec_output0, wave2vec_output1, bert_output), dim=1)

        # Apply linear layers
        linear1_output = torch.relu(self.linear1(combined_output))
        final_output = self.linear2(self.dropout(linear1_output))

        return torch.softmax(final_output, dim=1)
    
    def evaluate(self, dataloader):
        
        self.to(self.device)
        
        self.eval()

        # Define the loss function
        loss_function = torch.nn.CrossEntropyLoss()

        total_loss = 0.0
        true_positive_1 = torch.tensor(0).to(self.device)
        false_positive_1 = torch.tensor(0).to(self.device)
        false_negative_1 = torch.tensor(0).to(self.device)
        
        true_positive_0 = torch.tensor(0).to(self.device)
        false_positive_0 = torch.tensor(0).to(self.device)
        false_negative_0 = torch.tensor(0).to(self.device)

        with torch.no_grad():  # Disable gradient computation during validation
            for _, batch in enumerate(tqdm(dataloader, desc="Validating")):
                
                output = self.forward(batch)
                labels = batch["label"].long()

                loss = loss_function(output, labels)
                total_loss += loss.item()

                _, predicted = torch.max(output, 1)
                
                true_positive_1 += torch.sum((predicted == labels) * (labels == 1))
                false_positive_1 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 1))
                false_negative_1 += torch.sum(((1-predicted) == labels) * (labels == 1))
                
                true_positive_0 += torch.sum((predicted == labels) * (labels == 0))
                false_positive_0 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 0))
                false_negative_0 += torch.sum(((1-predicted) == labels) * (labels == 0))
                
        precision_1 = true_positive_1 / max((true_positive_1 + false_positive_1), 1)
        recall_1 = true_positive_1 / max((true_positive_1 + false_negative_1), 1) 
        
        precision_0 = true_positive_0 / max((true_positive_0 + false_positive_0), 1)
        recall_0 = true_positive_0 / max((true_positive_0 + false_negative_0), 1) 
        
        f1_1 = 2 * (precision_1 * recall_1) / max((precision_1 + recall_1), 1)
        f1_0 = 2 * (precision_0 * recall_0) / max((precision_0 + recall_0), 1)
        
        print(f"Classe {Fore.RED}0{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_0 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_0 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_0 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Classe {Fore.RED}1{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_1 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_1 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_1 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Score : {(f1_0*0.18 + f1_1*(1-0.18))}")
                        
    def train_one_epoch(self, dataloader):
        
        self.train(True)
        
        optimizer = torch.optim.Adam(self.parameters())
        # loss_function = torch.nn.CrossEntropyLoss()
        
        try:
            for _, batch in enumerate(tqdm(dataloader, desc="Training")):
                
                try:
                
                    optimizer.zero_grad()
                    
                    output = self.forward(batch)
                    labels = batch["label"].long()
                    _, predicted = torch.max(output, 1)
                    
                    true_positive_0 = (output * (1 - labels).unsqueeze(1))[:, 0].sum()
                    false_positive_0 = ((1 - output) * labels.unsqueeze(1))[:, 0].sum()
                    false_negative_0 = ((1 - output) * (1 - labels).unsqueeze(1))[:, 0].sum()

                    true_positive_1 = (output * labels.unsqueeze(1))[:, 1].sum()
                    false_positive_1 = (output * (1 - labels).unsqueeze(1))[:, 1].sum()
                    false_negative_1 = ((1 - output) * labels.unsqueeze(1))[:, 1].sum()

                    epsilon = torch.tensor([1e-7]).to(self.device) # 1e-7
                    
                    precision_0 = true_positive_0 / (true_positive_0 + false_positive_0 + epsilon)
                    recall_0 = true_positive_0 / (true_positive_0 + false_negative_0 + epsilon)
                    f1_0 = 2 * (precision_0 * recall_0) / (precision_0 + recall_0 + epsilon)

                    precision_1 = true_positive_1 / (true_positive_1 + false_positive_1 + epsilon)
                    recall_1 = true_positive_1 / (true_positive_1 + false_negative_1 + epsilon)
                    f1_1 = 2 * (precision_1 * recall_1) / (precision_1 + recall_1 + epsilon)
                
                    loss = -torch.log((f1_0*0.18 + f1_1*(1-0.18)) + epsilon)
                    
                    # ic(output, labels)
                    # ic(true_positive_0, false_positive_0, false_negative_0, true_positive_1, false_positive_1, false_negative_1)
                    # ic(precision_0, recall_0, f1_0, precision_1, recall_1, f1_1)
                    # print("Successfully trained a batch")
                    
                    ic(loss)
                    loss.backward()
                    
                    # labels = batch["label"].long()
                    # loss = loss_function(output, labels)
                    # loss.backward()
                    
                    optimizer.step()
                    
                except StopIteration as e:
                    print(f"An error occurred: {e}")   
                    # Catch StopIteration and continue to the next batch
                    pass
        except Exception as e:
            # Handle other exceptions if needed
            print(f"An error occurred: {e}")      

    def train_loop(self, generator, nb_epoch):
        
        self.to(self.device)

        data = generator.raw_data

        train_data = data.sample(frac=0.9,random_state=200)
        test_data = data.drop(train_data.index)

        test_data.reset_index(drop=True, inplace=True)
        train_data.reset_index(drop=True, inplace=True)

        test_generator = DataGenerator(test_data, filepath)
        test_loader = create_dataloader(test_generator)
        
        for epoch_number in range(nb_epoch):

            train_subdata = train_data.sample(frac=0.9,random_state=200)
            val_subdata = train_data.drop(train_subdata.index)
            
            train_subdata.reset_index(drop=True, inplace=True)
            val_subdata.reset_index(drop=True, inplace=True)
            
            train_subgenerator = DataGenerator(train_subdata, filepath)
            val_subgenerator = DataGenerator(val_subdata, filepath)

            train_loader = create_dataloader(train_subgenerator)
            val_loader = create_dataloader(val_subgenerator)

            print("")    
            print(f'{Fore.GREEN}EPOCH {epoch_number + 1}:{Style.RESET_ALL}')
            
            # Train for one epoch
            self.train_one_epoch(train_loader)

            # Validate on the validation subset
            print(f'{Fore.CYAN}Validation :{Style.RESET_ALL}')
            self.evaluate(val_loader)
            print(f'{Fore.YELLOW}Test :{Style.RESET_ALL}')
            self.evaluate(test_loader)

########################################################################

generator = DataGenerator(raw_data.iloc[:64*4], filepath)
model = Model(27904, 16, 2, device)

dataloader = create_dataloader(generator)
model.evaluate(dataloader)

# print("Nombre de paramètres du model:", model.parameters_number())

# model.train_loop(generator, 5)

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Validating: 100%|██████████| 4/4 [01:49<00:00, 27.47s/it]

Classe [31m0[0m | Precision: [32m82.81%[0m, Recall: [32m100.00%[0m, F1 Score: [32m90.60%[0m
Classe [31m1[0m | Precision: [32m0.00%[0m, Recall: [32m0.00%[0m, F1 Score: [32m0.00%[0m
Score : 0.16307693719863892





In [13]:
model = torch.load("checkpoint1.pth")

In [14]:
import torch
from tqdm import tqdm
from colorama import Fore, Style
from data_processing import *
from transformers import Wav2Vec2Model
from transformers import CamembertModel
from icecream import ic

########################################################################

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

wave2vec_name = "facebook/wav2vec2-base-960h"
wave2vec_model = Wav2Vec2Model.from_pretrained(wave2vec_name)
wave2vec_model = wave2vec_model.to(device)

bert_name = 'camembert-base'
bert_model = CamembertModel.from_pretrained(bert_name)
bert_model = bert_model.to(device)

for param in wave2vec_model.parameters():
    param.requires_grad = False

for param in bert_model.parameters():
    param.requires_grad = False
    
########################################################################

class Model(torch.nn.Module):
    """
    Model using covolutional neural net architecture.
    """
    def __init__(self, input_dim, hidden_dim, output_dim, device):
        
        super(Model, self).__init__()

        self.device = device
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        # Define model components
        self.linear1 = torch.nn.Linear(input_dim, hidden_dim)
        self.dropout = torch.nn.Dropout(p=0.1)
        self.linear2 = torch.nn.Linear(hidden_dim, output_dim)
        
###################
        
    def parameters_number(self):
        return(sum(p.numel() for p in self.parameters() if p.requires_grad))
        
    def forward(self, input_data):
        # Define the forward pass using the model components
        
        input_data['audio'] = input_data['audio'].to(self.device)
        input_data['text'] = input_data['text'].to(self.device)
        input_data['label'] = input_data['label'].to(self.device)

        channel0 = input_data['audio'][:,0,:]
        channel1 = input_data['audio'][:,1,:]
        
        wave2vec_output0 = wave2vec_model(channel0)
        wave2vec_output1 = wave2vec_model(channel1)
        
        wave2vec_output0 = wave2vec_output0.last_hidden_state
        wave2vec_output1 = wave2vec_output1.last_hidden_state
        
        wave2vec_output0 = torch.nn.functional.max_pool1d(wave2vec_output0, kernel_size=6)
        wave2vec_output1 = torch.nn.functional.max_pool1d(wave2vec_output1, kernel_size=6)
        
        bert_output = bert_model(input_data['text'])
        bert_output = bert_output.last_hidden_state
        
        wave2vec_output0 = torch.flatten(wave2vec_output0, start_dim=1)
        wave2vec_output1 = torch.flatten(wave2vec_output1, start_dim=1)
        bert_output = torch.flatten(bert_output, start_dim=1)

        # Concatenate or combine the outputs as needed
        combined_output = torch.cat((wave2vec_output0, wave2vec_output1, bert_output), dim=1)

        # Apply linear layers
        linear1_output = torch.relu(self.linear1(combined_output))
        final_output = self.linear2(self.dropout(linear1_output))

        return torch.softmax(final_output, dim=1)
    
    def evaluate(self, dataloader):
        
        self.to(self.device)
        
        self.eval()

        # Define the loss function
        loss_function = torch.nn.CrossEntropyLoss()

        total_loss = 0.0
        true_positive_1 = torch.tensor(0).to(self.device)
        false_positive_1 = torch.tensor(0).to(self.device)
        false_negative_1 = torch.tensor(0).to(self.device)
        
        true_positive_0 = torch.tensor(0).to(self.device)
        false_positive_0 = torch.tensor(0).to(self.device)
        false_negative_0 = torch.tensor(0).to(self.device)

        with torch.no_grad():  # Disable gradient computation during validation
            for _, batch in enumerate(tqdm(dataloader, desc="Validating")):
                
                output = self.forward(batch)
                labels = batch["label"].long()

                loss = loss_function(output, labels)
                total_loss += loss.item()

                _, predicted = torch.max(output, 1)
                
                true_positive_1 += torch.sum((predicted == labels) * (labels == 1))
                false_positive_1 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 1))
                false_negative_1 += torch.sum(((1-predicted) == labels) * (labels == 1))
                
                true_positive_0 += torch.sum((predicted == labels) * (labels == 0))
                false_positive_0 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 0))
                false_negative_0 += torch.sum(((1-predicted) == labels) * (labels == 0))
                
        precision_1 = true_positive_1 / max((true_positive_1 + false_positive_1), 1)
        recall_1 = true_positive_1 / max((true_positive_1 + false_negative_1), 1) 
        
        precision_0 = true_positive_0 / max((true_positive_0 + false_positive_0), 1)
        recall_0 = true_positive_0 / max((true_positive_0 + false_negative_0), 1) 
        
        f1_1 = 2 * (precision_1 * recall_1) / max((precision_1 + recall_1), 1)
        f1_0 = 2 * (precision_0 * recall_0) / max((precision_0 + recall_0), 1)
        
        print(f"Classe {Fore.RED}0{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_0 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_0 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_0 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Classe {Fore.RED}1{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_1 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_1 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_1 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Score : {(f1_0*0.18 + f1_1*(1-0.18))}")
                        
    def train_one_epoch(self, dataloader):
        
        self.train(True)
        
        optimizer = torch.optim.Adam(self.parameters())
        # loss_function = torch.nn.CrossEntropyLoss()
        
        try:
            for _, batch in enumerate(tqdm(dataloader, desc="Training")):
                
                try:
                
                    optimizer.zero_grad()
                    
                    output = self.forward(batch)
                    labels = batch["label"].long()
                    _, predicted = torch.max(output, 1)
                    
                    true_positive_0 = (output * (1 - labels).unsqueeze(1))[:, 0].sum()
                    false_positive_0 = ((1 - output) * labels.unsqueeze(1))[:, 0].sum()
                    false_negative_0 = ((1 - output) * (1 - labels).unsqueeze(1))[:, 0].sum()

                    true_positive_1 = (output * labels.unsqueeze(1))[:, 1].sum()
                    false_positive_1 = (output * (1 - labels).unsqueeze(1))[:, 1].sum()
                    false_negative_1 = ((1 - output) * labels.unsqueeze(1))[:, 1].sum()

                    epsilon = torch.tensor([1e-7]).to(self.device) # 1e-7
                    
                    precision_0 = true_positive_0 / (true_positive_0 + false_positive_0 + epsilon)
                    recall_0 = true_positive_0 / (true_positive_0 + false_negative_0 + epsilon)
                    f1_0 = 2 * (precision_0 * recall_0) / (precision_0 + recall_0 + epsilon)

                    precision_1 = true_positive_1 / (true_positive_1 + false_positive_1 + epsilon)
                    recall_1 = true_positive_1 / (true_positive_1 + false_negative_1 + epsilon)
                    f1_1 = 2 * (precision_1 * recall_1) / (precision_1 + recall_1 + epsilon)
                
                    loss = -torch.log((f1_0*0.18 + f1_1*(1-0.18)) + epsilon)
                    
                    # ic(output, labels)
                    # ic(true_positive_0, false_positive_0, false_negative_0, true_positive_1, false_positive_1, false_negative_1)
                    # ic(precision_0, recall_0, f1_0, precision_1, recall_1, f1_1)
                    # print("Successfully trained a batch")
                    
                    ic(loss)
                    loss.backward()
                    
                    # labels = batch["label"].long()
                    # loss = loss_function(output, labels)
                    # loss.backward()
                    
                    optimizer.step()
                    
                except StopIteration as e:
                    print(f"An error occurred: {e}")   
                    # Catch StopIteration and continue to the next batch
                    pass
        except Exception as e:
            # Handle other exceptions if needed
            print(f"An error occurred: {e}")      

    def train_loop(self, generator, nb_epoch):
        
        self.to(self.device)

        data = generator.raw_data

        train_data = data.sample(frac=0.9,random_state=200)
        test_data = data.drop(train_data.index)

        test_data.reset_index(drop=True, inplace=True)
        train_data.reset_index(drop=True, inplace=True)

        test_generator = DataGenerator(test_data, filepath)
        test_loader = create_dataloader(test_generator)
        
        for epoch_number in range(nb_epoch):

            train_subdata = train_data.sample(frac=0.9,random_state=200)
            val_subdata = train_data.drop(train_subdata.index)
            
            train_subdata.reset_index(drop=True, inplace=True)
            val_subdata.reset_index(drop=True, inplace=True)
            
            train_subgenerator = DataGenerator(train_subdata, filepath)
            val_subgenerator = DataGenerator(val_subdata, filepath)

            train_loader = create_dataloader(train_subgenerator)
            val_loader = create_dataloader(val_subgenerator)

            print("")    
            print(f'{Fore.GREEN}EPOCH {epoch_number + 1}:{Style.RESET_ALL}')
            
            # Train for one epoch
            self.train_one_epoch(train_loader)

            # Validate on the validation subset
            print(f'{Fore.CYAN}Validation :{Style.RESET_ALL}')
            self.evaluate(val_loader)
            print(f'{Fore.YELLOW}Test :{Style.RESET_ALL}')
            self.evaluate(test_loader)

########################################################################

generator = DataGenerator(raw_data.iloc[4096:8192], filepath)

# model = Model(27904, 16, 2, device)
# dataloader = create_dataloader(generator)
# model.evaluate(dataloader)

# print("Nombre de paramètres du model:", model.parameters_number())

# model.train_loop(generator, 5)

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
model.train_loop(generator, 5)


[32mEPOCH 1:[0m


Training:   0%|          | 0/51 [00:10<?, ?it/s]


An error occurred: Unable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length.
[36mValidation :[0m


Validating:   0%|          | 0/5 [00:00<?, ?it/s]

In [1]:
model = torch.load("checkpoint1")

NameError: name 'torch' is not defined

In [2]:
import torch
from tqdm import tqdm
from colorama import Fore, Style
from data_processing import *
from transformers import Wav2Vec2Model
from transformers import CamembertModel
from icecream import ic

########################################################################

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

wave2vec_name = "facebook/wav2vec2-base-960h"
wave2vec_model = Wav2Vec2Model.from_pretrained(wave2vec_name)
wave2vec_model = wave2vec_model.to(device)

bert_name = 'camembert-base'
bert_model = CamembertModel.from_pretrained(bert_name)
bert_model = bert_model.to(device)

for param in wave2vec_model.parameters():
    param.requires_grad = False

for param in bert_model.parameters():
    param.requires_grad = False
    
########################################################################

class Model(torch.nn.Module):
    """
    Model using covolutional neural net architecture.
    """
    def __init__(self, input_dim, hidden_dim, output_dim, device):
        
        super(Model, self).__init__()

        self.device = device
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        # Define model components
        self.linear1 = torch.nn.Linear(input_dim, hidden_dim)
        self.dropout = torch.nn.Dropout(p=0.1)
        self.linear2 = torch.nn.Linear(hidden_dim, output_dim)
        
###################
        
    def parameters_number(self):
        return(sum(p.numel() for p in self.parameters() if p.requires_grad))
        
    def forward(self, input_data):
        # Define the forward pass using the model components
        
        input_data['audio'] = input_data['audio'].to(self.device)
        input_data['text'] = input_data['text'].to(self.device)
        input_data['label'] = input_data['label'].to(self.device)

        channel0 = input_data['audio'][:,0,:]
        channel1 = input_data['audio'][:,1,:]
        
        wave2vec_output0 = wave2vec_model(channel0)
        wave2vec_output1 = wave2vec_model(channel1)
        
        wave2vec_output0 = wave2vec_output0.last_hidden_state
        wave2vec_output1 = wave2vec_output1.last_hidden_state
        
        wave2vec_output0 = torch.nn.functional.max_pool1d(wave2vec_output0, kernel_size=6)
        wave2vec_output1 = torch.nn.functional.max_pool1d(wave2vec_output1, kernel_size=6)
        
        bert_output = bert_model(input_data['text'])
        bert_output = bert_output.last_hidden_state
        
        wave2vec_output0 = torch.flatten(wave2vec_output0, start_dim=1)
        wave2vec_output1 = torch.flatten(wave2vec_output1, start_dim=1)
        bert_output = torch.flatten(bert_output, start_dim=1)

        # Concatenate or combine the outputs as needed
        combined_output = torch.cat((wave2vec_output0, wave2vec_output1, bert_output), dim=1)

        # Apply linear layers
        linear1_output = torch.relu(self.linear1(combined_output))
        final_output = self.linear2(self.dropout(linear1_output))

        return torch.softmax(final_output, dim=1)
    
    def evaluate(self, dataloader):
        
        self.to(self.device)
        
        self.eval()

        # Define the loss function
        loss_function = torch.nn.CrossEntropyLoss()

        total_loss = 0.0
        true_positive_1 = torch.tensor(0).to(self.device)
        false_positive_1 = torch.tensor(0).to(self.device)
        false_negative_1 = torch.tensor(0).to(self.device)
        
        true_positive_0 = torch.tensor(0).to(self.device)
        false_positive_0 = torch.tensor(0).to(self.device)
        false_negative_0 = torch.tensor(0).to(self.device)

        with torch.no_grad():  # Disable gradient computation during validation
            for _, batch in enumerate(tqdm(dataloader, desc="Validating")):
                
                output = self.forward(batch)
                labels = batch["label"].long()

                loss = loss_function(output, labels)
                total_loss += loss.item()

                _, predicted = torch.max(output, 1)
                
                true_positive_1 += torch.sum((predicted == labels) * (labels == 1))
                false_positive_1 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 1))
                false_negative_1 += torch.sum(((1-predicted) == labels) * (labels == 1))
                
                true_positive_0 += torch.sum((predicted == labels) * (labels == 0))
                false_positive_0 += torch.sum((predicted == (1 - labels)) * ((1 - labels) == 0))
                false_negative_0 += torch.sum(((1-predicted) == labels) * (labels == 0))
                
        precision_1 = true_positive_1 / max((true_positive_1 + false_positive_1), 1)
        recall_1 = true_positive_1 / max((true_positive_1 + false_negative_1), 1) 
        
        precision_0 = true_positive_0 / max((true_positive_0 + false_positive_0), 1)
        recall_0 = true_positive_0 / max((true_positive_0 + false_negative_0), 1) 
        
        f1_1 = 2 * (precision_1 * recall_1) / max((precision_1 + recall_1), 1)
        f1_0 = 2 * (precision_0 * recall_0) / max((precision_0 + recall_0), 1)
        
        print(f"Classe {Fore.RED}0{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_0 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_0 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_0 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Classe {Fore.RED}1{Style.RESET_ALL} | Precision: {Fore.GREEN}{precision_1 * 100:.2f}%{Style.RESET_ALL}, Recall: {Fore.GREEN}{recall_1 * 100:.2f}%{Style.RESET_ALL}, F1 Score: {Fore.GREEN}{f1_1 * 100:.2f}%{Style.RESET_ALL}")
        print(f"Score : {(f1_0*0.18 + f1_1*(1-0.18))}")
                        
    def train_one_epoch(self, dataloader):
        
        self.train(True)
        
        optimizer = torch.optim.Adam(self.parameters())
        # loss_function = torch.nn.CrossEntropyLoss()
        
        try:
            for _, batch in enumerate(tqdm(dataloader, desc="Training")):
                
                try:
                
                    optimizer.zero_grad()
                    
                    output = self.forward(batch)
                    labels = batch["label"].long()
                    _, predicted = torch.max(output, 1)
                    
                    true_positive_0 = (output * (1 - labels).unsqueeze(1))[:, 0].sum()
                    false_positive_0 = ((1 - output) * labels.unsqueeze(1))[:, 0].sum()
                    false_negative_0 = ((1 - output) * (1 - labels).unsqueeze(1))[:, 0].sum()

                    true_positive_1 = (output * labels.unsqueeze(1))[:, 1].sum()
                    false_positive_1 = (output * (1 - labels).unsqueeze(1))[:, 1].sum()
                    false_negative_1 = ((1 - output) * labels.unsqueeze(1))[:, 1].sum()

                    epsilon = torch.tensor([1e-7]).to(self.device) # 1e-7
                    
                    precision_0 = true_positive_0 / (true_positive_0 + false_positive_0 + epsilon)
                    recall_0 = true_positive_0 / (true_positive_0 + false_negative_0 + epsilon)
                    f1_0 = 2 * (precision_0 * recall_0) / (precision_0 + recall_0 + epsilon)

                    precision_1 = true_positive_1 / (true_positive_1 + false_positive_1 + epsilon)
                    recall_1 = true_positive_1 / (true_positive_1 + false_negative_1 + epsilon)
                    f1_1 = 2 * (precision_1 * recall_1) / (precision_1 + recall_1 + epsilon)
                
                    loss = -torch.log((f1_0*0.18 + f1_1*(1-0.18)) + epsilon)
                    
                    # ic(output, labels)
                    # ic(true_positive_0, false_positive_0, false_negative_0, true_positive_1, false_positive_1, false_negative_1)
                    # ic(precision_0, recall_0, f1_0, precision_1, recall_1, f1_1)
                    # print("Successfully trained a batch")
                    
                    ic(loss)
                    loss.backward()
                    
                    # labels = batch["label"].long()
                    # loss = loss_function(output, labels)
                    # loss.backward()
                    
                    optimizer.step()
                    
                except StopIteration as e:
                    print(f"An error occurred: {e}")   
                    # Catch StopIteration and continue to the next batch
                    pass
        except Exception as e:
            # Handle other exceptions if needed
            print(f"An error occurred: {e}")      

    def train_loop(self, generator, nb_epoch):
        
        self.to(self.device)

        data = generator.raw_data

        train_data = data.sample(frac=0.9,random_state=200)
        test_data = data.drop(train_data.index)

        test_data.reset_index(drop=True, inplace=True)
        train_data.reset_index(drop=True, inplace=True)

        test_generator = DataGenerator(test_data, filepath)
        test_loader = create_dataloader(test_generator)
        
        for epoch_number in range(nb_epoch):

            train_subdata = train_data.sample(frac=0.9,random_state=200)
            val_subdata = train_data.drop(train_subdata.index)
            
            train_subdata.reset_index(drop=True, inplace=True)
            val_subdata.reset_index(drop=True, inplace=True)
            
            train_subgenerator = DataGenerator(train_subdata, filepath)
            val_subgenerator = DataGenerator(val_subdata, filepath)

            train_loader = create_dataloader(train_subgenerator)
            val_loader = create_dataloader(val_subgenerator)

            print("")    
            print(f'{Fore.GREEN}EPOCH {epoch_number + 1}:{Style.RESET_ALL}')
            
            # Train for one epoch
            self.train_one_epoch(train_loader)

            # Validate on the validation subset
            print(f'{Fore.CYAN}Validation :{Style.RESET_ALL}')
            self.evaluate(val_loader)
            print(f'{Fore.YELLOW}Test :{Style.RESET_ALL}')
            self.evaluate(test_loader)

########################################################################

generator = DataGenerator(raw_data.iloc[8192:8192+4096], filepath)

# model = Model(27904, 16, 2, device)
# dataloader = create_dataloader(generator)
# model.evaluate(dataloader)

# print("Nombre de paramètres du model:", model.parameters_number())

# model.train_loop(generator, 5)

(16400, 12)
is_main_speaker      0.739207
turn_at_start        0.184756
turn_after           0.184756
turn_start_word     10.250372
yield_at_end         0.188415
request_at_start     0.198537
dtype: float64


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
model = torch.load("checkpoint1")

FileNotFoundError: [Errno 2] No such file or directory: 'checkpoint1'

In [4]:
model = torch.load("checkpoint1.pth")

In [5]:
model.train_loop(generator, 5)


[32mEPOCH 1:[0m


Training:   0%|          | 0/51 [00:00<?, ?it/s]We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.
ic| loss: tensor([1.0002], device='cuda:0', grad_fn=<NegBackward0>)
Training:   2%|▏         | 1/51 [00:47<39:53, 47.88s/it]ic| loss: tensor([0.6875], device='cuda:0', grad_fn=<NegBackward0>)
Training:   4%|▍         | 2/51 [01:31<37:14, 45.60s/it]ic| loss: tensor([1.3246], device='cuda:0', grad_fn=<NegBackward0>)
Training:   6%|▌         | 3/51 [02:13<34:58, 43.71s/it]ic| loss: tensor([1.1298], device='cuda:0', grad_fn=<NegBackward0>)
Training:   8%|▊         | 4/51 [02:52<32:56, 42.06s/it]ic| loss: tensor([0.7041], device='cuda:0', grad_fn=<NegBackward0>)
Training:  10%|▉         | 5/51 [03:31<31:16, 40.79s/it]ic| loss: tensor([1.1198], device='cuda:0', grad_fn=<NegBackward0>)
Training:  12%|█▏        | 6/51 [04:13<30:57, 41.27s/it]ic| los

An error occurred: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).
[36mValidation :[0m


Validating: 100%|██████████| 5/5 [03:30<00:00, 42.04s/it]


Classe [31m0[0m | Precision: [32m81.47%[0m, Recall: [32m84.40%[0m, F1 Score: [32m82.91%[0m
Classe [31m1[0m | Precision: [32m36.07%[0m, Recall: [32m31.43%[0m, F1 Score: [32m22.67%[0m
Score : 0.33512604236602783
[33mTest :[0m


Validating: 100%|██████████| 6/6 [03:58<00:00, 39.79s/it]


Classe [31m0[0m | Precision: [32m87.91%[0m, Recall: [32m83.02%[0m, F1 Score: [32m85.40%[0m
Classe [31m1[0m | Precision: [32m29.49%[0m, Recall: [32m38.33%[0m, F1 Score: [32m22.61%[0m
Score : 0.33909034729003906

[32mEPOCH 2:[0m


Training:   0%|          | 0/51 [00:00<?, ?it/s]ic| loss: tensor([0.6685], device='cuda:0', grad_fn=<NegBackward0>)
Training:   2%|▏         | 1/51 [00:40<33:34, 40.29s/it]ic| loss: tensor([0.8052], device='cuda:0', grad_fn=<NegBackward0>)
Training:   4%|▍         | 2/51 [01:18<31:57, 39.12s/it]ic| loss: tensor([0.9618], device='cuda:0', grad_fn=<NegBackward0>)
Training:   6%|▌         | 3/51 [01:59<31:55, 39.91s/it]ic| loss: tensor([0.7577], device='cuda:0', grad_fn=<NegBackward0>)
Training:   8%|▊         | 4/51 [02:43<32:40, 41.70s/it]ic| loss: tensor([0.8855], device='cuda:0', grad_fn=<NegBackward0>)
Training:  10%|▉         | 5/51 [03:26<32:18, 42.13s/it]ic| loss: tensor([0.6924], device='cuda:0', grad_fn=<NegBackward0>)
Training:  12%|█▏        | 6/51 [04:09<31:40, 42.23s/it]ic| loss: tensor([0.8990], device='cuda:0', grad_fn=<NegBackward0>)
Training:  14%|█▎        | 7/51 [04:52<31:14, 42.61s/it]ic| loss: tensor([1.0291], device='cuda:0', grad_fn=<NegBackward0>)
Training:  16%|█

An error occurred: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).
[36mValidation :[0m


Validating: 100%|██████████| 5/5 [02:49<00:00, 33.84s/it]


Classe [31m0[0m | Precision: [32m82.95%[0m, Recall: [32m72.87%[0m, F1 Score: [32m77.59%[0m
Classe [31m1[0m | Precision: [32m34.95%[0m, Recall: [32m49.32%[0m, F1 Score: [32m34.47%[0m
Score : 0.4223310947418213
[33mTest :[0m


Validating: 100%|██████████| 6/6 [03:52<00:00, 38.75s/it]


Classe [31m0[0m | Precision: [32m89.52%[0m, Recall: [32m68.73%[0m, F1 Score: [32m77.76%[0m
Classe [31m1[0m | Precision: [32m25.74%[0m, Recall: [32m57.38%[0m, F1 Score: [32m29.53%[0m
Score : 0.38212987780570984

[32mEPOCH 3:[0m


Training:   0%|          | 0/51 [00:00<?, ?it/s]ic| loss: tensor([0.7296], device='cuda:0', grad_fn=<NegBackward0>)
Training:   2%|▏         | 1/51 [00:41<34:50, 41.81s/it]ic| loss: tensor([0.7643], device='cuda:0', grad_fn=<NegBackward0>)
Training:   4%|▍         | 2/51 [01:22<33:36, 41.15s/it]ic| loss: tensor([0.9059], device='cuda:0', grad_fn=<NegBackward0>)
Training:   6%|▌         | 3/51 [02:02<32:33, 40.70s/it]ic| loss: tensor([0.7504], device='cuda:0', grad_fn=<NegBackward0>)
Training:   8%|▊         | 4/51 [02:49<33:42, 43.02s/it]ic| loss: tensor([0.6704], device='cuda:0', grad_fn=<NegBackward0>)
Training:  10%|▉         | 5/51 [03:25<31:02, 40.49s/it]ic| loss: tensor([0.8376], device='cuda:0', grad_fn=<NegBackward0>)
Training:  12%|█▏        | 6/51 [03:54<27:25, 36.56s/it]ic| loss: tensor([0.9075], device='cuda:0', grad_fn=<NegBackward0>)
Training:  14%|█▎        | 7/51 [04:36<28:05, 38.30s/it]ic| loss: tensor([1.1585], device='cuda:0', grad_fn=<NegBackward0>)
Training:  16%|█

KeyboardInterrupt: 

In [6]:
torch.save(model, "checkpoint2.pth")