In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

import os, pathlib, glob, random
import numpy as np
import matplotlib.pyplot as plt 

from sklearn.metrics import confusion_matrix,f1_score,jaccard_score,matthews_corrcoef,hamming_loss
import scipy

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [4]:
batch_size = 32
output_nodes = 5
learning_rate = 0.01
train_data_path = r"/kaggle/input/lfrcc-order16-5200t/order_16-split/train"
test_data_path = r"/kaggle/input/lfrcc-order16-5200t/order_16-split/test"

In [5]:
class PtDataset(Dataset):
    def __init__(self, directory):
        self.directory = directory
        self.classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir())
        self.class_to_idx = {c: i for i, c in enumerate(self.classes)}
        self.files = []
        for c in self.classes:
            c_dir = os.path.join(directory, c)
            c_files = [(os.path.join(c_dir, f), self.class_to_idx[c]) for f in os.listdir(c_dir)]
            self.files.extend(c_files)
        random.shuffle(self.files)
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        filepath, label = self.files[idx]
        try:
            mat_vals = scipy.io.loadmat(filepath)
            data = mat_vals['final']
            max_len=1332
            if (max_len > data.shape[0]):
                pad_width = max_len - data.shape[0]
                data = np.pad(data, pad_width=((0, pad_width),(0,0)), mode='constant')
            else:
                data = data[:max_len, :]
        except Exception as e:
            print(f"Error loading file {filepath}: {str(e)}")
            return None
        return data, label
    
train_dataset = PtDataset(train_data_path)
test_dataset = PtDataset(test_data_path)

In [6]:
class PtDataLoader(DataLoader):
    def __init__(self, directory, batch_size, shuffle=True):
        dataset = PtDataset(directory)
        super().__init__(dataset, batch_size=batch_size, shuffle=shuffle)
        
train_dataloader = PtDataLoader(directory=train_data_path, batch_size=batch_size)
test_dataloader = PtDataLoader(directory=test_data_path, batch_size=batch_size)

train_count = len(train_dataset) 
test_count = len(test_dataset)

In [7]:
class TDNN(nn.Module):
    
    def __init__(
                    self, 
                    input_dim, 
                    output_dim,
                    context_size=5,
                    stride=1,
                    dilation=1,
                    batch_norm=False,
                    dropout_p=0.2
                ):
        super(TDNN, self).__init__()
        self.context_size = context_size
        self.stride = stride
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.dilation = dilation
        self.dropout_p = dropout_p
        self.batch_norm = batch_norm
      
        self.kernel = nn.Linear(input_dim*context_size, output_dim)
        self.nonlinearity = nn.ReLU()
        if self.batch_norm:
            self.bn = nn.BatchNorm1d(output_dim)
        if self.dropout_p:
            self.drop = nn.Dropout(p=self.dropout_p)
        
    def forward(self, x):
        '''
        input: size (batch, seq_len, input_features)
        outpu: size (batch, new_seq_len, output_features)
        '''
        
        _, _, d = x.shape
        assert (d == self.input_dim), 'Input dimension was wrong. Expected ({}), got ({})'.format(self.input_dim, d)
        x = x.unsqueeze(1)

        # Unfold input into smaller temporal contexts
        x = F.unfold(
                        x, 
                        (self.context_size, self.input_dim), 
                        stride=(1,self.input_dim), 
                        dilation=(self.dilation,1)
                    )

        # N, output_dim*context_size, new_t = x.shape
        x = x.transpose(1,2)
        x = self.kernel(x.float())
        x = self.nonlinearity(x)
        
        if self.dropout_p:
            x = self.drop(x)

        if self.batch_norm:
            x = x.transpose(1,2)
            x = self.bn(x)
            x = x.transpose(1,2)

        return x

In [8]:
class Conv2DLayer(nn.Module):
    """Convolutional 2D subsampling (to 1/4 length).
    Module accepts an input tensor with size (B, T1, I_DIM),
    then subsamples it into tensor with size (B, T2, O_DIM)
    in which T2 is about T1 / 4.
    :param i_dim: Dimension of input feature
    :type i_dim: int
    :param o_dim: Dimension of output feature
    :type o_dim: int
    :param dropout_rate: Dropout rate
    :type o_dim: float
    """
    def __init__(
        self,
        i_dim: int,
        o_dim: int,
    ) -> None:
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, o_dim, 3, 2),
            nn.ReLU(),
            nn.Conv2d(o_dim, o_dim, 3, 2),
            nn.ReLU(),
        )
        # after convolution, tensor size should be (b, o_dim, t, f_dim)
        f_dim = (i_dim - 3) // 2 + 1 ## operation to determine the shape of the output after the 1st convolution layer
        f_dim = (f_dim - 3) // 2 + 1 ## operation to determine the shape of the output after the 2nd convolution layer
        self.out = nn.Linear(o_dim * f_dim, o_dim)

    def forward(self, x):
        x=x.float()
        x = x.unsqueeze(1)  # (b, t, i_dim) -> (b, 1, t, i_dim)
        x = self.conv(x)  # (b, 1, t, i_dim) -> (b, o_dim, t/4, f_dim)
#         print(x.shape)
        b, c, t, f = x.size()
        x = x.transpose(1, 2).contiguous().view(b, t, c * f)
#         print(x.shape)
        x = self.out(x)
        return x

In [9]:
# model_test = Conv2DLayer(39,39).to(device)
# y = torch.tensor(np.random.rand(32,1332,39)).to(device)
# p=model_test(y)

In [10]:
import torch
from torch import nn
from torch.nn import Parameter
import torch.nn.functional as F

class Classic_Attention(nn.Module):
    def __init__(self,input_dim, embed_dim, attn_dropout=0.0):
        super().__init__()
        self.embed_dim = embed_dim
        self.attn_dropout = attn_dropout
        self.lin_proj = nn.Linear(input_dim,embed_dim)
        self.v = torch.nn.Parameter(torch.randn(embed_dim))
    
    def forward(self,inputs):
        lin_out = self.lin_proj(inputs)
        v_view = self.v.unsqueeze(0).expand(lin_out.size(0), len(self.v)).unsqueeze(2)
        attention_weights = torch.tanh(lin_out.bmm(v_view).squeeze(2))
        attention_weights_normalized = F.softmax(attention_weights,1)
        return attention_weights_normalized

In [11]:
class Convnet(nn.Module):
    def __init__(self, input_dim = 39, num_classes=5):
        super(Convnet, self).__init__()
        ## Frame level feature processing
        self.conv_subsample = Conv2DLayer(39,39)
        self.tdnn1 = TDNN(input_dim=39, output_dim=512, context_size=5, dilation=1,dropout_p=0.2)
        self.tdnn2 = TDNN(input_dim=512, output_dim=512, context_size=5, dilation=2,dropout_p=0.2)
        self.tdnn3 = TDNN(input_dim=512, output_dim=512, context_size=7, dilation=3,dropout_p=0.2)
        self.tdnn4 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1,dropout_p=0.2)
        self.tdnn5 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1,dropout_p=0.2)
        ### Statistics attentive pooling
        self.attention = Classic_Attention(512,512)
        #### Frame levelPooling
        self.segment6 = nn.Linear(1024, 512)
        self.segment7 = nn.Linear(512,512 )
        self.output = nn.Linear(512, num_classes)
        self.softmax = nn.Softmax(dim=1)
    
    def weighted_sd(self,inputs,attention_weights, mean):
        el_mat_prod = torch.mul(inputs,attention_weights.unsqueeze(2).expand(-1,-1,inputs.shape[-1]))
        hadmard_prod = torch.mul(inputs,el_mat_prod)
        variance = torch.sum(hadmard_prod,1) - torch.mul(mean,mean)
        return variance
    
    
    def stat_attn_pool(self,inputs,attention_weights):
        el_mat_prod = torch.mul(inputs,attention_weights.unsqueeze(2).expand(-1,-1,inputs.shape[-1]))
        mean = torch.mean(el_mat_prod,1)
        variance = self.weighted_sd(inputs,attention_weights,mean)
        stat_pooling = torch.cat((mean,variance),1)
        return stat_pooling
    
    
    def forward(self, inputs):
        inputs=self.conv_subsample(inputs)
#         print(inputs.shape)
        tdnn1_out = self.tdnn1(inputs)
        tdnn2_out = self.tdnn2(tdnn1_out)
        tdnn3_out = self.tdnn3(tdnn2_out)
        tdnn4_out = self.tdnn4(tdnn3_out)
        tdnn5_out = self.tdnn5(tdnn4_out)
        ### Stat Pool
        attn_weights = self.attention(tdnn5_out)
        stat_pool_out = self.stat_attn_pool(tdnn5_out,attn_weights)
        segment6_out = self.segment6(stat_pool_out)
        x_vec = self.segment7(segment6_out)
        predictions = self.output(x_vec)
        return predictions,x_vec

In [12]:
model = Convnet().to(device)

In [13]:
# y = torch.tensor(np.random.rand(32,1332,39)).to(device)
# p=model(y)

In [14]:
# p[0].shape

In [15]:
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

In [16]:
# learning_rate=0.001
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [17]:
%pip install tqdm
from tqdm import tqdm
import scipy
from scipy import io
print(model)

[0mNote: you may need to restart the kernel to use updated packages.
Convnet(
  (conv_subsample): Conv2DLayer(
    (conv): Sequential(
      (0): Conv2d(1, 39, kernel_size=(3, 3), stride=(2, 2))
      (1): ReLU()
      (2): Conv2d(39, 39, kernel_size=(3, 3), stride=(2, 2))
      (3): ReLU()
    )
    (out): Linear(in_features=351, out_features=39, bias=True)
  )
  (tdnn1): TDNN(
    (kernel): Linear(in_features=195, out_features=512, bias=True)
    (nonlinearity): ReLU()
    (drop): Dropout(p=0.2, inplace=False)
  )
  (tdnn2): TDNN(
    (kernel): Linear(in_features=2560, out_features=512, bias=True)
    (nonlinearity): ReLU()
    (drop): Dropout(p=0.2, inplace=False)
  )
  (tdnn3): TDNN(
    (kernel): Linear(in_features=3584, out_features=512, bias=True)
    (nonlinearity): ReLU()
    (drop): Dropout(p=0.2, inplace=False)
  )
  (tdnn4): TDNN(
    (kernel): Linear(in_features=512, out_features=512, bias=True)
    (nonlinearity): ReLU()
    (drop): Dropout(p=0.2, inplace=False)
  )
  (t

In [18]:
#Model training and testing 
n_total_steps = len(train_dataloader) # n_total_steps * batch size will give total number of training files (consider that last batch may not be fully filled)
train_accuracy_list = []
train_loss_list = []
test_accuracy_list = []
pred_labels =[]
act_labels = []
max_acc=0
num_epochs = 10
for epoch in tqdm(range(num_epochs)):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for batch_idx, (images,labels) in enumerate(train_dataloader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
        ##images = images.unsqueeze(1)
        optimizer.zero_grad()
        outputs=model(images)
        outputs=outputs[0]
#         labels = labels.float()
#         labels=labels.unsqueeze(1)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    train_accuracy_list.append(train_accuracy)
    train_loss_list.append(train_loss)

    
    # Evaluation on testing dataset
    model.eval()
    test_accuracy=0.0
    pred = []
    lab = []
    for i, (images,labels) in enumerate(test_dataloader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
        ##images = images.unsqueeze(1) 
#         print(i,images.shape)
        outputs=model(images)
        outputs=outputs[0]
#         labels = labels.float()
#         labels=labels.unsqueeze(1)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
        pred.extend(prediction.tolist())
        lab.extend(labels.tolist())
    test_accuracy=test_accuracy/test_count
    test_accuracy_list.append(test_accuracy)
    if max_acc < test_accuracy:
        max_acc = test_accuracy
        pred_labels = pred
        actual_labels = lab
        torch.save(model,'best_save_lfrcc_tdnn_conv-sample-1332.pth')
    
    print('Epoch : '+str(epoch+1)+'/'+str(num_epochs)+'   Train Loss : '+str(train_loss)+'   Train Accuracy : '+str(train_accuracy)+'   Test Accuracy : '+str(test_accuracy))
print(max_acc)    
print('Finished Training and Testing')

 10%|█         | 1/10 [04:07<37:04, 247.21s/it]

Epoch : 1/10   Train Loss : tensor(1.6106)   Train Accuracy : 0.20009640877319837   Test Accuracy : 0.2004626060138782


 20%|██        | 2/10 [06:38<25:27, 190.99s/it]

Epoch : 2/10   Train Loss : tensor(1.6104)   Train Accuracy : 0.2   Test Accuracy : 0.1981495759444873


 30%|███       | 3/10 [09:10<20:12, 173.20s/it]

Epoch : 3/10   Train Loss : tensor(1.6103)   Train Accuracy : 0.19956616052060738   Test Accuracy : 0.2004626060138782


 40%|████      | 4/10 [11:41<16:25, 164.32s/it]

Epoch : 4/10   Train Loss : tensor(1.6105)   Train Accuracy : 0.1990359122680164   Test Accuracy : 0.2004626060138782


 50%|█████     | 5/10 [14:12<13:18, 159.66s/it]

Epoch : 5/10   Train Loss : tensor(1.6106)   Train Accuracy : 0.1975415762834418   Test Accuracy : 0.1981495759444873


 60%|██████    | 6/10 [16:45<10:28, 157.17s/it]

Epoch : 6/10   Train Loss : tensor(1.6105)   Train Accuracy : 0.1977825982164377   Test Accuracy : 0.2004626060138782


 70%|███████   | 7/10 [19:15<07:44, 154.92s/it]

Epoch : 7/10   Train Loss : tensor(1.6105)   Train Accuracy : 0.1970113280308508   Test Accuracy : 0.2004626060138782


 80%|████████  | 8/10 [21:46<05:06, 153.49s/it]

Epoch : 8/10   Train Loss : tensor(1.6104)   Train Accuracy : 0.19734875873704508   Test Accuracy : 0.2004626060138782


 90%|█████████ | 9/10 [24:16<02:32, 152.58s/it]

Epoch : 9/10   Train Loss : tensor(1.6104)   Train Accuracy : 0.19865027717522293   Test Accuracy : 0.2004626060138782


100%|██████████| 10/10 [26:47<00:00, 160.70s/it]

Epoch : 10/10   Train Loss : tensor(1.6099)   Train Accuracy : 0.20086767895878524   Test Accuracy : 0.2004626060138782
0.2004626060138782
Finished Training and Testing



