In [1]:
!pwd

/dli/task/ComParE2022_VecNet/notebooks/DK


In [2]:
import os
import pandas as pd
import sys
sys.path.insert(0, os.path.abspath('../../src'))
sys.path.insert(0, os.path.abspath('ComParE2022_VecNet/src'))
import config,config_pytorch
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score ,confusion_matrix, classification_report

import math
import pickle

from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
import torch
import torch.optim as optim
from datetime import datetime
import time


import matplotlib
import matplotlib.pyplot as plt



from tqdm.notebook import tqdm

import random
import torchaudio
import torchaudio.transforms as AT
import torchvision.transforms as VT
from torch.cuda.amp import autocast, GradScaler
#from timm.scheduler.cosine_lr import CosineLRScheduler
import timm
import timm.optim
from timm.models import model_parameters
from glob import glob
## nnAudio
from nnAudio import features
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset, DataLoader
import argparse
## DDp Import
import torch.distributed as dist
import warnings
warnings.filterwarnings("ignore")

import argparse
#import deepspeed
from torch.utils.tensorboard import SummaryWriter 
import torch.profiler
from contextlib import ExitStack
DEBUG = False

In [3]:
class Expert(nn.Module):
    def __init__(self, input_size =8, hidden_size =8, output_size=8):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

class Gate(nn.Module):
    def __init__(self, input_size=8, num_experts = 8):
        super().__init__()
        self.fc1 = nn.Linear(input_size, num_experts)

    def forward(self, x):
        x = self.fc1(x)
        x = torch.softmax(x, dim=1)
        return x


In [7]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, is_last=False):
        super(BasicBlock, self).__init__()
        self.is_last = is_last
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        preact = out
        out = F.relu(out)
        if self.is_last:
            return out, preact
        else:
            return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1, is_last=False):
        super(Bottleneck, self).__init__()
        self.is_last = is_last
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        preact = out
        out = F.relu(out)
        if self.is_last:
            return out, preact
        else:
            return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, in_channel=1, zero_init_residual=False):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(in_channel, 64, kernel_size=3, stride=1, padding=1,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves
        # like an identity. This improves the model by 0.2~0.3% according to:
        # https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for i in range(num_blocks):
            stride = strides[i]
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, layer=100):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        return out


def resnet18(**kwargs):
    return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)


def resnet34(**kwargs):
    return ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)


def resnet50(**kwargs):
    return ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)


def resnet101(**kwargs):
    return ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)


model_dict = {
    'resnet18': [resnet18, 512],
    'resnet34': [resnet34, 512],
    'resnet50': [resnet50, 2048],
    'resnet101': [resnet101, 2048],
}



class SupConResNet(nn.Module):
    """backbone + projection head"""
    def __init__(self, name='resnet50', head='mlp', feat_dim=128):
        super(SupConResNet, self).__init__()
        model_fun, dim_in = model_dict[name]
        self.encoder = model_fun()
        if head == 'linear':
            self.head = nn.Linear(dim_in, feat_dim)
        elif head == 'mlp':
            self.head = nn.Sequential(
                nn.Linear(dim_in, dim_in),
                nn.ReLU(inplace=True),
                nn.Linear(dim_in, feat_dim)
            )
        else:
            raise NotImplementedError(
                'head not supported: {}'.format(head))

    def forward(self, x):
        feat = self.encoder(x)
        feat = F.normalize(self.head(feat), dim=1)
        return feat

In [8]:
class MyModel(nn.Module):
    def __init__(self, model_name, image_size = 224):
        super().__init__()
        # num_classes=0 removes the pretrained head
        #self.backbone = timm.create_model(model_name, pretrained=True, num_classes=8, in_chans=1, drop_path_rate=0.2, global_pool='max', drop_rate=0.25)
        #####  This section is model specific
        #### It freezes some fo the layers by name
        #### you'll have to inspect the model to see the names
                #### end layer freezing
        #self.out = nn.Linear(self.backbone.num_features, 1)
        self.sizer = VT.Resize((image_size,image_size),antialias = True)
        self.encoder = SupConResNet()
        self.batch_norm = nn.BatchNorm2d(num_features= 1)
        self.spec_layer = features.STFT(n_fft=int(config.NFFT), freq_bins=None, hop_length=int(config.n_hop),
                              window='hann', freq_scale='linear', center=True, pad_mode='reflect',
                           sr=config.rate, output_format="Magnitude", trainable=False,verbose = False).to('cuda')
        #self.augment_layer = augment_audio(trainable = True, sample_rate = config.rate)
        
    def forward(self, x,train = True):
        # first compute spectrogram
        spec_gram = self.spec_layer(x)
        output = {}
        #print("post spec gram shape = ",spec_gram.shape)
        spec_gram = self.batch_norm(spec_gram.unsqueeze(dim = 1))
        #print("post norm shape = ",spec_gram.shape)
        spec_gram_nan_check = torch.isnan(spec_gram).any().item()
        assert not (spec_gram_nan_check) ,"Tensor contains NaN values after spec gram creation."
        
        with torch.no_grad():
            if train == True:
                #generate a random number and if condition is met apply aug
                ta_transformations_rndm_choice = VT.RandomChoice([AT.FrequencyMasking(freq_mask_param=100),AT.TimeMasking(time_mask_param=50)], p=[.4, .4])
                ta_transformations_rndm_apply = VT.RandomApply([AT.FrequencyMasking(freq_mask_param=50),AT.TimeMasking(time_mask_param=25)],p = .2)
                spec_gram = ta_transformations_rndm_choice(spec_gram)
                spec_gram = ta_transformations_rndm_apply(spec_gram)
                spec_gram_nan_check = torch.isnan(spec_gram).any().item()
                assert not (spec_gram_nan_check) ,"Tensor contains NaN values after augmentations  "
                aug_bat = [ta_transformations_rndm_choice(spec_gram),ta_transformations_rndm_choice(spec_gram)]
                aug_bat = torch.cat(aug_bat , dim = 0)
                #print("shape of augmented batch = ",aug_bat.shape)
                #output['feat'] = aug_bat
                
        
        encoder = self.encoder.to('cuda')
        features = encoder(aug_bat)
        #print("output of encoder shape = ",features.shape)
        bsz = x.shape[0]
        f1, f2 = torch.split(features, [bsz, bsz], dim=0)
        features = torch.cat([f1.unsqueeze(1), f2.unsqueeze(1)], dim=1)
        #loss = criterion(features, y)
        output['feat'] = features
        #x = self.sizer(spec_gram.squeeze(dim = 1))
        #print("post sizer shape = ",x.shape)
        #x = x.unsqueeze(dim = 1)
        #print("post unsqueeze shape = ",x.shape)
        
        # then repeat channels
        del spec_gram,aug_bat
        #backbone_op_nan_check = torch.isnan(x).any().item()
        #assert not (backbone_op_nan_check) ,"Tensor contains NaN values in the backbone OP "
        #print("x shape = " + str(x.shape))
        #print("x = " +str(x))
        #pred = nn.Softmax(x)
        #pred = x
        #print(np.argmax(pred.detach().cpu().numpy()))
        #print(pred)
        #output["prediction"]=  pred 
        #print(output)
        return output


In [9]:
x = torch.rand(1,15360,device = 'cuda')

In [10]:
model_b =MyModel('convnext_xlarge_in22k',224)

In [7]:
ta_transformations_rndm_choice = VT.RandomChoice([AT.FrequencyMasking(freq_mask_param=100),AT.TimeMasking(time_mask_param=50)], p=[.4, .4])
ta_transformations_rndm_apply = VT.RandomApply([AT.FrequencyMasking(freq_mask_param=50),AT.TimeMasking(time_mask_param=25)],p = .15)

In [12]:
model_b.to('cuda')

MyModel(
  (sizer): Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
  (encoder): SupConResNet(
    (encoder): ResNet(
      (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (shortcut): Sequential(
            (0): Conv2d(64,

TypeError: forward() missing 1 required positional argument: 'specgram'

<__main__.TwoCropTransform object at 0x7fb3077d5f70>


In [13]:
o = model_b(x)

In [65]:
print(o)

tensor([[[-0.0905],
         [-0.1756],
         [-0.0890],
         [ 0.0043],
         [-0.0442],
         [ 0.0535],
         [ 0.1331],
         [-0.0163]]], grad_fn=<BmmBackward0>)


In [15]:
print(o['feat'].shape)

torch.Size([1, 2, 128])


In [39]:

class MoE(nn.Module):
    def __init__(self, input_size, hidden_size , num_classes , num_experts):
        super().__init__()
        self.experts = nn.ModuleList([Expert(input_size, hidden_size, num_classes) for _ in range(num_experts)])
        self.gate = Gate(input_size, num_experts)
        self.input = MyModel('convnext_xlarge_in22k',224)
        

    def forward(self, x):
        x = self.input(x)
        print("shape post cnn = ",x.shape)
        expert_outputs = [expert(x) for expert in self.experts]
        print("expert_outputs = ",expert_outputs)
        expert_outputs = torch.stack(expert_outputs, dim=1)
        print("expert_outputs post stack = ",expert_outputs)
        print("post stack shape  = ",expert_outputs.shape)
        
        gate_outputs = self.gate(x)
        print(" gate_outputs= ",gate_outputs)
        print(" gate_outputs shape = ",gate_outputs.shape)
        
        gate_outputs = gate_outputs.unsqueeze(2)
        print("post unsqueeze gate_outputs =  ",gate_outputs.shape)
        weighted_sum = torch.bmm(expert_outputs, gate_outputs)
        print("weighted_sum = ",weighted_sum)
        
        return weighted_sum.squeeze(2)

# Example usage



In [42]:
model = MoE(input_size=8, hidden_size=8, num_classes=8, num_experts=8)
input_data = torch.randn(1, 15360)
output = model(input_data)
print("output = ",output)  # (32, 5)
pred = torch.argmax(output, dim = 1)


post spec gram shape =  torch.Size([1, 1025, 121])
post norm shape =  torch.Size([1, 1, 1025, 121])
post sizer shape =  torch.Size([1, 224, 224])
post unsqueeze shape =  torch.Size([1, 1, 224, 224])
x shape = torch.Size([1, 8])
shape post cnn =  torch.Size([1, 8])
expert_outputs =  [tensor([[ 0.2929,  0.0893, -0.2214, -0.0515,  0.0937,  0.0526,  0.2732, -0.0616]],
       grad_fn=<AddmmBackward0>), tensor([[ 0.1425, -0.2046,  0.1789,  0.2661,  0.3432,  0.1691,  0.1548, -0.0121]],
       grad_fn=<AddmmBackward0>), tensor([[ 0.4719, -0.1176, -0.2985, -0.2853,  0.0560, -0.2683,  0.6084, -0.0822]],
       grad_fn=<AddmmBackward0>), tensor([[-0.0644, -0.1544, -0.1961,  0.0087,  0.0336, -0.2573,  0.2496,  0.1691]],
       grad_fn=<AddmmBackward0>), tensor([[-0.4718,  0.4125, -0.2638, -0.1281,  0.3371, -0.2907,  0.1157,  0.3006]],
       grad_fn=<AddmmBackward0>), tensor([[-0.3383, -0.2543,  0.2033,  0.1455,  0.0376,  0.0285, -0.2432, -0.3004]],
       grad_fn=<AddmmBackward0>), tensor([[-0.32

In [43]:
print(pred)

tensor([1])


In [36]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Expert(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Expert, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = self.linear2(x)
        return x

class Gate(nn.Module):
    def __init__(self, input_size, num_experts):
        super(Gate, self).__init__()
        self.linear1 = nn.Linear(input_size, num_experts)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.linear1(x)
        x = self.softmax(x)
        return x

class MixtureOfExperts(nn.Module):
    def __init__(self, input_size =8 , hidden_size = 8, num_experts = 8):
        super(MixtureOfExperts, self).__init__()
        self.experts = nn.ModuleList([Expert(input_size, hidden_size) for i in range(num_experts)])
        self.gate = Gate(input_size, num_experts)
        self.input = MyModel('convnext_xlarge_in22k',224)
        
    def forward(self, x):
        x = self.input(x)
        gates = self.gate(x)
        print("shape after gates is = ",gates.shape)
        print("output of gates = ",gates)
        expert_outputs = [expert(x) for expert in self.experts]
        print("expert_outputs = ",expert_outputs)
        output = torch.stack(expert_outputs, dim=1)
        print("output post stack  = ",output)
        print("post stack shape   = ",output.shape)
        output = torch.bmm(gates.unsqueeze(1), output).squeeze(1)
        print("Final output = ",output)
        return output


In [37]:
moe_model = MixtureOfExperts()

In [38]:
moe_model(x)

post spec gram shape =  torch.Size([1, 1025, 121])
post norm shape =  torch.Size([1, 1, 1025, 121])
post sizer shape =  torch.Size([1, 224, 224])
post unsqueeze shape =  torch.Size([1, 1, 224, 224])
x shape = torch.Size([1, 8])
shape after gates is =  torch.Size([1, 8])
output of gates =  tensor([[0.1725, 0.0945, 0.0841, 0.1015, 0.1362, 0.1459, 0.1654, 0.0999]],
       grad_fn=<SoftmaxBackward0>)
expert_outputs =  [tensor([[0.0326]], grad_fn=<AddmmBackward0>), tensor([[-0.1055]], grad_fn=<AddmmBackward0>), tensor([[0.4171]], grad_fn=<AddmmBackward0>), tensor([[-0.0510]], grad_fn=<AddmmBackward0>), tensor([[0.2768]], grad_fn=<AddmmBackward0>), tensor([[0.2766]], grad_fn=<AddmmBackward0>), tensor([[0.6636]], grad_fn=<AddmmBackward0>), tensor([[-0.1350]], grad_fn=<AddmmBackward0>)]
output post stack  =  tensor([[[ 0.0326],
         [-0.1055],
         [ 0.4171],
         [-0.0510],
         [ 0.2768],
         [ 0.2766],
         [ 0.6636],
         [-0.1350]]], grad_fn=<StackBackward0>)


tensor([[0.1999]], grad_fn=<SqueezeBackward1>)