In [1]:
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import DataLoader, Subset
from torchvision import transforms
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet101
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import numpy as np

In [2]:
from dataloader.dataloader import H5Dataset

In [3]:
# Caminho para os arquivos h5
h5_files = ['h5_files/audio_features.h5', 'h5_files/vsd_clipped_features.h5']

# Cria o dataset
dataset = H5Dataset(h5_files)


In [4]:
# Divide os dados em k folds
k = 5
skf = StratifiedKFold(n_splits=k)
folds = list(skf.split(np.zeros(len(dataset)), [label for _, label in dataset]))


In [5]:
# Processa cada fold
for fold, (train_idx, test_idx) in enumerate(folds):
    train_dataset = Subset(dataset, train_idx)
    test_dataset = Subset(dataset, test_idx)

    train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [11]:
from torchvision.models.resnet import ResNet101_Weights
from tqdm import tqdm

# Define the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device escolhido:', device)

# Define the number of classes
num_classes = 6

for fold, (train_idx, test_idx) in enumerate(folds):
    train_dataset = Subset(dataset, train_idx)
    test_dataset = Subset(dataset, test_idx)

    train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # Load the pretrained ResNet model and adjust the last layer
    model = resnet101(weights=ResNet101_Weights.IMAGENET1K_V1)

    
    # Adjust the first convolutional layer
    model.conv1 = nn.Conv2d(1, model.conv1.out_channels, 
                            kernel_size=model.conv1.kernel_size[0], 
                            stride=model.conv1.stride[0], 
                            padding=model.conv1.padding[0])
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    model = model.to(device)

    # Define the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    # Train the model
    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        pbar = tqdm(enumerate(train_dataloader), total=len(train_dataloader), leave=False)
        for i, (inputs, labels) in pbar:
            # Assuming `inputs` is your input tensor
            inputs = inputs.unsqueeze(1)  # Add channel dimension
            labels = labels.type(torch.LongTensor)



            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()  # Reset the gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute the loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update the weights

            # Update the progress bar
            pbar.set_description(f"Fold {fold+1}/{k}, Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")

        # Evaluate the model
        model.eval()  # Set the model to evaluation mode
        true_labels = []
        pred_labels = []
        with torch.no_grad():  # Disable gradient computation
            for inputs, labels in test_dataloader:
                # Assuming `inputs` is your input tensor
                inputs = inputs.unsqueeze(1)  # Add channel dimension
                labels = labels.type(torch.LongTensor)
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)  # Forward pass
                _, preds = torch.max(outputs, 1)  # Get the predicted labels

                true_labels.extend(labels.cpu().numpy())
                pred_labels.extend(preds.cpu().numpy())

        # Print the classification report
        print(f"Fold {fold+1}/{k}, Epoch {epoch+1}/{num_epochs}")
        print(classification_report(true_labels, pred_labels))


Device escolhido: cuda


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fold 1/5, Epoch 1/10
              precision    recall  f1-score   support

           0       0.61      0.99      0.76     11044
           1       0.00      0.00      0.00       599
           2       0.94      0.16      0.27      7016
           3       0.99      1.00      0.99       451
           4       0.00      0.00      0.00        88
           5       0.71      0.02      0.03       311

    accuracy                           0.64     19509
   macro avg       0.54      0.36      0.34     19509
weighted avg       0.72      0.64      0.55     19509



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fold 1/5, Epoch 2/10
              precision    recall  f1-score   support

           0       0.72      0.24      0.36     11044
           1       0.00      0.00      0.00       599
           2       0.45      1.00      0.62      7016
           3       0.98      1.00      0.99       451
           4       0.00      0.00      0.00        88
           5       0.00      0.00      0.00       311

    accuracy                           0.52     19509
   macro avg       0.36      0.37      0.33     19509
weighted avg       0.59      0.52      0.45     19509



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fold 1/5, Epoch 3/10
              precision    recall  f1-score   support

           0       0.89      0.86      0.87     11044
           1       0.00      0.00      0.00       599
           2       0.82      0.96      0.88      7016
           3       1.00      0.99      1.00       451
           4       0.50      0.06      0.10        88
           5       0.58      0.12      0.19       311

    accuracy                           0.86     19509
   macro avg       0.63      0.50      0.51     19509
weighted avg       0.83      0.86      0.84     19509



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fold 1/5, Epoch 4/10
              precision    recall  f1-score   support

           0       0.86      0.89      0.88     11044
           1       0.00      0.00      0.00       599
           2       0.85      0.94      0.89      7016
           3       1.00      0.57      0.73       451
           4       0.00      0.00      0.00        88
           5       0.52      0.05      0.09       311

    accuracy                           0.86     19509
   macro avg       0.54      0.41      0.43     19509
weighted avg       0.82      0.86      0.84     19509



                                                                                                   

KeyboardInterrupt: 

In [None]:
import datetime

# Get the current date to use in the filename
current_date = datetime.datetime.now().strftime('%Y-%m-%d')

# Save the model parameters
torch.save(model.state_dict(), f'model_weights_{current_date}.pth')

