In [1]:
# creating new noisy melspectrogram as new dataset

In [2]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torch.utils.data import DataLoader
from tqdm import tqdm

In [3]:
# Load a pre-trained ResNet-50 model
resnet_model = models.resnet50(pretrained=False)

# Modify the final layer to match the number of classes in your dataset (4 classes)
num_features = resnet_model.fc.in_features
resnet_model.fc = torch.nn.Linear(num_features, 4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet_model = resnet_model.to(device)



In [4]:
model_path = 'C:\\Users\\PC\\Desktop\\lisnen_research_files\\audio_research_crnn_files\\model_weights\\resnet_50.pth'
state_dict=torch.load(model_path)
resnet_model.load_state_dict(state_dict)

  state_dict=torch.load(model_path)


<All keys matched successfully>

In [5]:
# Load a pre-trained ResNet-50 model
noise_model = models.resnet50(pretrained=False)

# Modify the final layer to match the number of classes in your dataset (2 classes)
num_features = noise_model.fc.in_features
noise_model.fc = torch.nn.Linear(num_features, 4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
noise_model = noise_model.to(device)

In [6]:
noise_model_path = 'C:\\Users\\PC\\Desktop\\sample_projects\\transfer_knowledge\\models\\resnet_50_noise.pth'
state_dict=torch.load(noise_model_path)
noise_model.load_state_dict(state_dict)

  state_dict=torch.load(noise_model_path)


<All keys matched successfully>

In [7]:
# creating the datasets
train_dataset_path="C:\\Users\\PC\\Desktop\\lisnen_data\\noise_validation_data\\train"
val_dataset_path = "C:\\Users\\PC\\Desktop\\lisnen_data\\noise_validation_data\\val"
test_dataset_path = "C:\\Users\\PC\\Desktop\\lisnen_data\\noise_validation_data\\test"


In [8]:
# Define transformations for training and validation
def get_transforms(is_training=True):
    if is_training:
        return transforms.Compose([
            
            transforms.ToTensor()
           
        ])
    else:
        return transforms.Compose([
            
            transforms.ToTensor()
        ])


# Create datasets using ImageFolder
train_dataset = datasets.ImageFolder(root=train_dataset_path, transform=get_transforms(is_training=True))
val_dataset = datasets.ImageFolder(root=val_dataset_path, transform=get_transforms(is_training=False))
test_dataset = datasets.ImageFolder(root=test_dataset_path, transform=get_transforms(is_training=False))

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)


In [46]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1)
        
        # Adaptive pooling to ensure consistent size
        self.adaptive_pool = nn.AdaptiveAvgPool2d((8, 8))
        
        self.fc1 = nn.Linear(128 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 4)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool(x)
        x = torch.relu(self.conv2(x))
        x = self.pool(x)
        x = torch.relu(self.conv3(x))
        x = self.pool(x)
        x = torch.relu(self.conv4(x))
        #x = self.pool(x)
        x = self.adaptive_pool(x)
        
        x = x.view(-1, 128 * 8 * 8)  # Flatten the tensor
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
model = CNNModel()
model.to(device)
# Define the loss function and optimizer
print(model)


CNNModel(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (adaptive_pool): AdaptiveAvgPool2d(output_size=(8, 8))
  (fc1): Linear(in_features=8192, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=4, bias=True)
)


In [47]:
from torchsummary import summary

# Assume `model` is your PyTorch model instance
summary(model, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 224, 224]             896
         MaxPool2d-2         [-1, 32, 112, 112]               0
            Conv2d-3         [-1, 64, 112, 112]          18,496
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5          [-1, 128, 56, 56]          73,856
         MaxPool2d-6          [-1, 128, 28, 28]               0
            Conv2d-7          [-1, 128, 28, 28]         147,584
 AdaptiveAvgPool2d-8            [-1, 128, 8, 8]               0
            Linear-9                  [-1, 128]       1,048,704
           Linear-10                    [-1, 4]             516
Total params: 1,290,052
Trainable params: 1,290,052
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 27.63
Params size (MB): 4.92
Estima

In [48]:
from keras.optimizers import Adam
opt = Adam(lr=0.0001)

  super(Adam, self).__init__(name, **kwargs)


In [49]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm

def train_2teacher_model(teacher1, teacher2, student, train_dataloader, T=1.25, epochs=25, learning_rate=0.001, device='cuda', soft_target_loss_weight=0.25, ce_loss_weight=0.75):
    ce_loss = nn.CrossEntropyLoss()
    optimizer = optim.Adam(student.parameters(), lr=learning_rate)
    teacher1.eval()
    teacher2.eval()
    student.train()
    
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
            
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass with teacher models
            with torch.no_grad():
                teacher1_logits = teacher1(inputs)
                teacher2_logits = teacher2(inputs)
            student_logits = student(inputs)
            
            # Softening the distributions
            soft_targets1 = F.softmax(teacher1_logits / T, dim=1)
            soft_targets2 = F.softmax(teacher2_logits / T, dim=1)
            # print(f"teacher1_logits shape: {teacher1_logits.shape}")
            # print(f"teacher2_logits shape: {teacher2_logits.shape}")
            # print(f"student_logits shape: {student_logits.shape}")
            
            # Ensure soft_targets2 matches the number of classes in student model
            # if soft_targets1.size(1) != soft_targets2.size(1):
            #     # Rescale soft_targets2 to match the number of classes in soft_targets1
            #     soft_targets2_mapped = F.interpolate(soft_targets2.unsqueeze(1), size=(soft_targets1.size(1),), mode='linear', align_corners=False).squeeze(1)
            # else:
            #     soft_targets2_mapped = soft_targets2
            
            soft_prob = F.log_softmax(student_logits / T, dim=1)
            soft_targets = (soft_targets1 + soft_targets2) / 2
            # print(f"soft_targets shape: {soft_targets.shape}")
            # print(f"soft_prob shape: {soft_prob.shape}")

            # Distillation loss
            distillation_loss = torch.sum(soft_targets * (soft_targets.log() - soft_prob) / soft_prob.size(0) * (T ** 2))
            # True label loss
            label_loss = ce_loss(student_logits, labels)

            # Weighted sum of two losses
            loss = distillation_loss * soft_target_loss_weight + ce_loss_weight * label_loss
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Epoch {epoch+1}/{epochs}, Loss = {running_loss / len(train_dataloader)}')


In [50]:
def test(model,test_dataloader,device):
    model.to(device)
    model.eval()

    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

In [52]:
train_2teacher_model(teacher1=resnet_model,teacher2=noise_model,student=model,train_dataloader=train_loader,T=1.25,epochs=25,learning_rate=0.001,device=device,soft_target_loss_weight=0.25,ce_loss_weight=0.75)
test_accuracy_light_ce_kd = test(model,test_loader,device)

Epoch 1/25: 100%|██████████| 48/48 [00:19<00:00,  2.51it/s]


Epoch 1/25, Loss = 0.6716961686809858


Epoch 2/25: 100%|██████████| 48/48 [00:18<00:00,  2.55it/s]


Epoch 2/25, Loss = 0.6496561306218306


Epoch 3/25: 100%|██████████| 48/48 [00:19<00:00,  2.48it/s]


Epoch 3/25, Loss = 0.6433433244625727


Epoch 4/25: 100%|██████████| 48/48 [00:18<00:00,  2.53it/s]


Epoch 4/25, Loss = 0.634464239080747


Epoch 5/25: 100%|██████████| 48/48 [00:19<00:00,  2.50it/s]


Epoch 5/25, Loss = 0.6229868642985821


Epoch 6/25: 100%|██████████| 48/48 [00:19<00:00,  2.52it/s]


Epoch 6/25, Loss = 0.618078970660766


Epoch 7/25: 100%|██████████| 48/48 [00:18<00:00,  2.54it/s]


Epoch 7/25, Loss = 0.6123548299074173


Epoch 8/25: 100%|██████████| 48/48 [00:19<00:00,  2.49it/s]


Epoch 8/25, Loss = 0.6122683274249235


Epoch 9/25: 100%|██████████| 48/48 [00:19<00:00,  2.50it/s]


Epoch 9/25, Loss = 0.6054237056523561


Epoch 10/25: 100%|██████████| 48/48 [00:19<00:00,  2.47it/s]


Epoch 10/25, Loss = 0.6115854332844416


Epoch 11/25: 100%|██████████| 48/48 [00:19<00:00,  2.51it/s]


Epoch 11/25, Loss = 0.5950079746544361


Epoch 12/25: 100%|██████████| 48/48 [00:19<00:00,  2.48it/s]


Epoch 12/25, Loss = 0.5948430995146433


Epoch 13/25: 100%|██████████| 48/48 [00:19<00:00,  2.51it/s]


Epoch 13/25, Loss = 0.587629825497667


Epoch 14/25: 100%|██████████| 48/48 [00:19<00:00,  2.51it/s]


Epoch 14/25, Loss = 0.6065860794236263


Epoch 15/25: 100%|██████████| 48/48 [00:18<00:00,  2.54it/s]


Epoch 15/25, Loss = 0.5906341249744097


Epoch 16/25: 100%|██████████| 48/48 [00:18<00:00,  2.60it/s]


Epoch 16/25, Loss = 0.5828350701679786


Epoch 17/25: 100%|██████████| 48/48 [00:18<00:00,  2.60it/s]


Epoch 17/25, Loss = 0.5881807518502077


Epoch 18/25: 100%|██████████| 48/48 [00:18<00:00,  2.58it/s]


Epoch 18/25, Loss = 0.5868325245877107


Epoch 19/25: 100%|██████████| 48/48 [00:18<00:00,  2.59it/s]


Epoch 19/25, Loss = 0.5842231400310993


Epoch 20/25: 100%|██████████| 48/48 [00:18<00:00,  2.58it/s]


Epoch 20/25, Loss = 0.5857556418826183


Epoch 21/25: 100%|██████████| 48/48 [00:18<00:00,  2.58it/s]


Epoch 21/25, Loss = 0.5869755900154511


Epoch 22/25: 100%|██████████| 48/48 [00:18<00:00,  2.62it/s]


Epoch 22/25, Loss = 0.5881171735624472


Epoch 23/25: 100%|██████████| 48/48 [00:18<00:00,  2.57it/s]


Epoch 23/25, Loss = 0.5765903604527315


Epoch 24/25: 100%|██████████| 48/48 [00:18<00:00,  2.56it/s]


Epoch 24/25, Loss = 0.5798723585903645


Epoch 25/25: 100%|██████████| 48/48 [00:18<00:00,  2.58it/s]


Epoch 25/25, Loss = 0.581745616470774
Test Accuracy: 90.16%
