In [6]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/iemocap-trainingdata/data_test.pt
/kaggle/input/iemocap-trainingdata/label_test.pt
/kaggle/input/iemocap-trainingdata/label_train.pt
/kaggle/input/iemocap-trainingdata/data_train.pt


In [7]:
import random
import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# **1. LoadData to DataLoader**

In [8]:
random.seed(42)
torch.manual_seed(42)

if torch.cuda.is_available():
  torch.cuda.manual_seed_all(42)

In [18]:
# Đường dẫn đến các file
data_train_iemocap_path = "/kaggle/input/iemocap-trainingdata/data_train.pt"  
label_train_iemocap_path = "/kaggle/input/iemocap-trainingdata/label_train.pt"
data_test_iemocap_path = "/kaggle/input/iemocap-trainingdata/data_test.pt"
label_test_iemocap_path = "/kaggle/input/iemocap-trainingdata/label_test.pt"

# Load dữ liệu từ các file
data_train_iemocap = torch.load(data_train_iemocap_path)
label_train_iemocap = torch.load(label_train_iemocap_path) 
data_test_iemocap = torch.load(data_test_iemocap_path)
label_test_iemocap = torch.load(label_test_iemocap_path)

# Chuẩn bị DataLoader cho dữ liệu huấn luyện và kiểm tra
batch_size = 32  

# Chuẩn bị DataLoader cho dữ liệu huấn luyện và kiểm tra
train_dataset = TensorDataset(data_train_iemocap, label_train_iemocap)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(data_test_iemocap, label_test_iemocap)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define a custom dataset
class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = {'data': self.data[idx], 'label': self.labels[idx]}
        return sample

# Create DataLoader for training and testing using the custom dataset
train_dataset_custom = CustomDataset(data_train, label_train)
train_loader_custom = DataLoader(train_dataset_custom, batch_size=batch_size, shuffle=True)

test_dataset_custom = CustomDataset(data_test, label_test)
test_loader_custom = DataLoader(test_dataset_custom, batch_size=batch_size, shuffle=False)


def check_data(data, label):
  print(f'Shape data: {data.shape}') 
  print(f'Dtype data: {data.dtype}')
  print(f'Shape label: {label.shape}')
  print(f'Length: {len(data)}')


# Print information about the loaded data
print("Information about IEMOCAP training data:")
check_data(data_train_iemocap, label_train_iemocap)

print("\nInformation about IEMOCAP test data:")
check_data(data_test_iemocap, label_test_iemocap)

Information about IEMOCAP training data:
Shape data: torch.Size([6023, 40, 1500])
Dtype data: torch.float32
Shape label: torch.Size([6023, 3])
Length: 6023

Information about IEMOCAP test data:
Shape data: torch.Size([1506, 40, 1500])
Dtype data: torch.float32
Shape label: torch.Size([1506, 3])
Length: 1506


# 2. Define Neural Network

In [13]:
#Adjust hidden nodes and one hidden layer
import torch
import torch.nn as nn 
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
# Thêm các imports cần thiết
from sklearn.metrics import f1_score, accuracy_score

# import

# import

class SpeechClassifier(nn.Module):

  def __init__(self, cnn_input_channels, cnn_output_channels,  
               cnn_kernel_size, cnn_stride,  
               cnn_pool_kernel_size, cnn_pool_stride,   
               hidden_size1, hidden_size2, output_size):  
    
    super().__init__()

    # CNN block
    self.cnn = nn.Conv1d(cnn_input_channels, cnn_output_channels,   
                        cnn_kernel_size, stride=cnn_stride)
                        
    self.pool = nn.MaxPool1d(cnn_pool_kernel_size,   
                             stride=cnn_pool_stride)
                             
    
    # Tính toán kích thước đầu vào
    conv_out_length = (1500 - cnn_kernel_size) // cnn_pool_kernel_size + 1  
    self.input_size = cnn_output_channels * conv_out_length     

    self.hidden1 = nn.Linear(self.input_size, hidden_size1)
    self.hidden2 = nn.Linear(hidden_size1, hidden_size2) 

    self.output = nn.Linear(hidden_size2, output_size)

  def forward(self, x):
    x = self.cnn(x)
    x = self.pool(x)
    x = x.view(x.size(0), -1)  
    x = F.relu(self.hidden1(x))
    x = F.relu(self.hidden2(x))
    x = self.output(x)
    return F.softmax(x, dim=1)

# Define model parameters
cnn_input_channels = 40  
cnn_output_channels = 16   
cnn_kernel_size = 5
cnn_stride = 1
cnn_pool_kernel_size = 2  
cnn_pool_stride = 2
hidden_size1 = 256 
hidden_size2 = 512
output_size = 3

# Create an instance of the model
model = SpeechClassifier(cnn_input_channels, cnn_output_channels, cnn_kernel_size, cnn_stride,  
                         cnn_pool_kernel_size, cnn_pool_stride, 
                         hidden_size1, hidden_size2, output_size)
                         
# Print the model architecture   
print(model)




SpeechClassifier(
  (cnn): Conv1d(40, 16, kernel_size=(5,), stride=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (hidden1): Linear(in_features=11968, out_features=256, bias=True)
  (hidden2): Linear(in_features=256, out_features=512, bias=True)
  (output): Linear(in_features=512, out_features=3, bias=True)
)


In [5]:
# #main
# import torch
# import torch.nn as nn
# import torch.nn.functional as F

# class ComplexNN(nn.Module):
#     def __init__(self, cnn_input_channels, cnn_output_channels, cnn_kernel_size, cnn_stride, 
#                  cnn_pool_kernel_size, cnn_pool_stride, hidden_size, output_size):
#         super(ComplexNN, self).__init__()
        
#         self.cnn = nn.Conv1d(cnn_input_channels, cnn_output_channels, cnn_kernel_size, stride=cnn_stride)
#         self.pool = nn.MaxPool1d(cnn_pool_kernel_size, stride=cnn_pool_stride)
        
#         self.input_size = cnn_output_channels * ((1500 - cnn_kernel_size) // cnn_pool_kernel_size + 1)
#         self.hidden = nn.Linear(self.input_size, hidden_size)
#         self.relu = nn.ReLU()
        
#         self.output = nn.Linear(hidden_size, output_size)

#     def forward(self, x):
#         x = self.cnn(x)
#         x = self.pool(x)
#         x = x.view(x.size(0), -1)
#         x = self.hidden(x)
#         x = self.relu(x)
#         x = self.output(x)
#         x = F.softmax(x, dim=1)
#         return x

# # Define model parameters
# cnn_input_channels = 40
# cnn_output_channels = 16
# cnn_kernel_size = 5
# cnn_stride = 1
# cnn_pool_kernel_size = 2
# cnn_pool_stride = 2
# hidden_size = 256
# output_size = 3

# # Create an instance of the ComplexNN model
# model = ComplexNN(cnn_input_channels, cnn_output_channels, cnn_kernel_size, cnn_stride,
#                    cnn_pool_kernel_size, cnn_pool_stride, hidden_size, output_size)

# # Print the model architecture
# print(model)


ComplexNN(
  (cnn): Conv1d(40, 16, kernel_size=(5,), stride=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (hidden): Linear(in_features=11968, out_features=128, bias=True)
  (relu): ReLU()
  (output): Linear(in_features=128, out_features=3, bias=True)
)


In [15]:
#ADD
import torch
import torch.nn as nn 
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
# Thêm các imports cần thiết
from sklearn.metrics import f1_score, accuracy_score

# import

# import

class SpeechClassifier(nn.Module):

  def __init__(self, cnn_input_channels, cnn_output_channels, 
               cnn_kernel_size, cnn_stride, 
               cnn_pool_kernel_size, cnn_pool_stride, 
               hidden_size, output_size):
    
    super().__init__()
    
    # CNN block
    self.cnn = nn.Conv1d(cnn_input_channels, cnn_output_channels,  
                        cnn_kernel_size, stride=cnn_stride)
                        
    self.pool = nn.MaxPool1d(cnn_pool_kernel_size,  
                             stride=cnn_pool_stride)
                             
    
    # Tính toán kích thước đầu vào 
    conv_out_length = (1500 - cnn_kernel_size) // cnn_pool_kernel_size + 1
    self.input_size = cnn_output_channels * conv_out_length     

    self.hidden = nn.Linear(self.input_size, hidden_size)
    
    self.output = nn.Linear(hidden_size, output_size)

  def forward(self, x):

    x = self.cnn(x)
    x = self.pool(x)
    
    x = x.view(x.size(0), -1)
    
    x = self.hidden(x)
    x = F.relu(x)
    
    x = self.output(x)
    
    return F.softmax(x, dim=1)

# Define model parameters
cnn_input_channels = 40
cnn_output_channels = 16
cnn_kernel_size = 5
cnn_stride = 1
cnn_pool_kernel_size = 2
cnn_pool_stride = 2
hidden_size = 128
output_size = 3

# Create an instance of the ComplexNN model
model = SpeechClassifier(cnn_input_channels, cnn_output_channels, cnn_kernel_size, cnn_stride,
                   cnn_pool_kernel_size, cnn_pool_stride, hidden_size, output_size)

# Print the model architecture
print(model)


SpeechClassifier(
  (cnn): Conv1d(40, 16, kernel_size=(5,), stride=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (hidden): Linear(in_features=11968, out_features=128, bias=True)
  (output): Linear(in_features=128, out_features=3, bias=True)
)


# 3. Training Model

In [None]:

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Move the model to the appropriate device
model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)   # Adjust the learning rate as needed
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
# Training loop
num_epochs = 100  # Adjust the number of epochs as needed

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    
    for batch_data, batch_labels in train_loader:
        # Move the data to the appropriate device
        batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_data)
        loss = criterion(outputs, torch.argmax(batch_labels, dim=1))
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        # Update running loss
        running_loss += loss.item()
    
    # Print the average loss for the epoch
    print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")

print('Finished Training')


In [16]:
#Adđ F1-Score & Accuracy after Training

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Move the model to the appropriate device
model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)   # Adjust the learning rate as needed
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
# Training loop
num_epochs = 10  # Adjust the number of epochs as needed

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    
    for batch_data, batch_labels in train_loader:
        # Move the data to the appropriate device
        batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_data)
        loss = criterion(outputs, torch.argmax(batch_labels, dim=1))
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        # Update running loss
        running_loss += loss.item()
    
    # Print the average loss for the epoch
    print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")

print('Finished Training')

# Evaluate on test set
print('Evaluating on test set')

model.eval()

test_loss = 0
correct = 0

with torch.no_grad():
    for batch_data, batch_labels in test_loader:
        
        batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
        
        outputs = model(batch_data)
        loss = criterion(outputs, torch.argmax(batch_labels, dim=1))
        
        test_loss += loss.item() * batch_labels.size(0)
        
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == torch.argmax(batch_labels, dim=1)).sum().item()
        
test_loss /= len(test_loader.dataset)

print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {100*correct/len(test_loader.dataset):.2f}%')

# Lưu model và kết quả
torch.save(model.state_dict(), 'best_model.pt')

Using device: cuda
Epoch 1, Loss: 1.2561025979026916
Epoch 2, Loss: 1.2536841164190302
Epoch 3, Loss: 1.2560461728030412
Epoch 4, Loss: 1.2560461731184096
Epoch 5, Loss: 1.2560461728030412
Epoch 6, Loss: 1.2554556591800912
Epoch 7, Loss: 1.254865144926404
Epoch 8, Loss: 1.2554556591800912
Epoch 9, Loss: 1.2554556591800912
Epoch 10, Loss: 1.2560461731184096
Finished Training
Evaluating on test set
Test Loss: 1.2552961495768027
Test Accuracy: 29.61%
