<a href="https://colab.research.google.com/github/godofwar1007/Cynaptics-inductionn/blob/main/Task_1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Libraries and modules required

In [None]:
!pip install librosa
!pip install kaggle
from google.colab import files
files.upload()

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import OneCycleLR
import torch.optim as optim
import pandas as pd
import librosa
import numpy as np
import os
import time
from pathlib import Path
from sklearn.model_selection import train_test_split

"""there are comments written by me ahead for my refrence and for better explanaition and tracking """



GPU helper utlities

In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle competitions download -c the-frequency-quest


In [None]:
!unzip the-frequency-quest.zip -d ./data

In [None]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

def to_device(data, device):
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device

    def __iter__(self):
        for b in self.dl:
            yield to_device(b, self.device)

    def __len__(self):
        return len(self.dl)

The Audio Dataset thingy
(This also converts the auido into an image )

In [None]:


class AudioDataset(Dataset):
    def __init__(self, df, n_mels=128, fixed_width=400):

        self.df = df
        self.n_mels = n_mels
        self.fixed_width = fixed_width

        self.label_map = {label: i for i, label in enumerate(df['label'].unique())}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):


        row = self.df.iloc[index]

        filepath = row['filepath']
        label = row['label']


        try:
            y, sr = librosa.load(filepath, sr=22050)
        except Exception as e:
            print(f"Error loading {filepath}: {e}")

            return torch.zeros((3, self.n_mels, self.fixed_width)), torch.tensor(0)


        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=self.n_mels)
        S_db = librosa.power_to_db(S, ref=np.max)


        if S_db.shape[1] > self.fixed_width:
            S_db = S_db[:, :self.fixed_width]
        else:
            pad_width = self.fixed_width - S_db.shape[1]
            S_db = np.pad(S_db, ((0, 0), (0, pad_width)), mode='constant')


        S_db_3channel = np.stack([S_db, S_db, S_db], axis=0)


        image_tensor = torch.tensor(S_db_3channel, dtype=torch.float32)
        label_tensor = torch.tensor(label, dtype=torch.long)

        return image_tensor, label_tensor

The Resnet 9 Model for img

In [None]:

def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch
        out = self(images)
        loss = F.cross_entropy(out, labels)
        return loss

    def validation_step(self, batch):
        images, labels = batch
        out = self(images)
        loss = F.cross_entropy(out, labels)
        acc = accuracy(out, labels)
        return {'val_loss': loss.detach(), 'val_acc': acc}

    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}

    def epoch_end(self, epoch, result):
        print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['lrs'][-1], result['train_loss'], result['val_loss'], result['val_acc']))

def conv_block(in_channels, out_channels, pool=False):          # this parts helps in making mutliple layers look clean
    layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
              nn.BatchNorm2d(out_channels),
              nn.ReLU(inplace=True)]
    if pool: layers.append(nn.MaxPool2d(2))
    return nn.Sequential(*layers)

class AudioResNet9(ImageClassificationBase):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.conv1 = conv_block(in_channels, 64) # 64 x 128 x 300
        self.conv2 = conv_block(64, 128, pool=True) # 128 x 64 x 150
        self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))

        self.conv3 = conv_block(128, 256, pool=True) # 256 x 32 x 75
        self.conv4 = conv_block(256, 512, pool=True) # 512 x 16 x 37
        self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))


        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)), # Shrinks 512 x 16 x 37 to 512 x 1 x 1
            nn.Flatten(),                 # 512 x 1 x 1 to 512
            nn.Linear(512, num_classes)   # 512 to 5 ( 5 classes)
        )

    def forward(self, xb):
        out = self.conv1(xb)
        out = self.conv2(out)
        out = self.res1(out) + out
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.res2(out) + out
        out = self.classifier(out)
        return out

Training loop

In [None]:
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader,
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.Adam):
    torch.cuda.empty_cache()
    history = []

    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)

    sched = OneCycleLR(optimizer, max_lr, epochs=epochs,
                       steps_per_epoch=len(train_loader))

    for epoch in range(epochs):
        start_time = time.time()

        model.train()
        train_losses = []
        lrs = []
        for batch in train_loader:

            loss = model.training_step(batch)
            train_losses.append(loss)

            loss.backward()


            if grad_clip:
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)

            optimizer.step()

            optimizer.zero_grad()

            lrs.append(get_lr(optimizer))
            sched.step()

        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs

        model.epoch_end(epoch, result)
        history.append(result)

    end_time = time.time()
    print(f"Total training time: {(end_time-start_time):.2f}s")
    return history

def get_lr(optimizer):
    """Helper to get current learning rate"""
    for param_group in optimizer.param_groups:
        return param_group['lr']

Running the model

In [None]:
!ls -l ./data


In [None]:
!ls -l ./data/train

In [None]:
print("="*30)
print("Starting Audio Classification Training...")

data_dir = Path("./data")
train_audio_path = data_dir / "train" / "train"
test_csv_path = data_dir / "sample_submission.csv"

device = get_default_device()
print(f"Using device: {device}")

print("Scanning audio files and building dataset...")
filepaths = []
labels = []
for folder in train_audio_path.iterdir():
    if folder.is_dir():
        label = folder.name
        for ext in ['*.wav', '*.mp3', '*.ogg']:
            for file in folder.glob(ext):
               filepaths.append(file)
               labels.append(label)

df = pd.DataFrame({
    'filepath': filepaths,
    'label': labels
})

print(f"Found {len(df)} training files in {len(df['label'].unique())} classes.")

label_map = {label: i for i, label in enumerate(df['label'].unique())}
df['label'] = df['label'].map(label_map)
print(f"Label mapping: {label_map}")

train_df, val_df = train_test_split(df, test_size=0.1, random_state=42, stratify=df['label'])

N_MELS = 128
FIXED_WIDTH = 300
BATCH_SIZE = 64

train_ds = AudioDataset(train_df, n_mels=N_MELS, fixed_width=FIXED_WIDTH)
val_ds = AudioDataset(val_df, n_mels=N_MELS, fixed_width=FIXED_WIDTH)

train_dl = DataLoader(train_ds, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
val_dl = DataLoader(val_ds, BATCH_SIZE, num_workers=2, pin_memory=True)

train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)
print("DataLoaders are ready.")

model = AudioResNet9(in_channels=3, num_classes=5)
model = to_device(model, device)

history = [evaluate(model, val_dl)]
print("Initial random model performance:")
print(history[0])

epochs = 20
max_lr = 0.01
grad_clip = 0.1
weight_decay = 1e-4
opt_func = torch.optim.Adam

print("Starting training!")
history += fit_one_cycle(epochs, max_lr, model, train_dl, val_dl,
                         weight_decay=weight_decay,
                         grad_clip=grad_clip,
                         opt_func=opt_func)

print("="*30)
print("TRAINING FINISHED!")

torch.save(model.state_dict(), 'audio_resnet9_model.pth')
print("Model saved to audio_resnet9_model.pth")

Submission

In [None]:
import pandas as pd
from pathlib import Path

data_dir = Path("./data")
test_csv_path = data_dir / "sample_submission.csv"

test_df = pd.read_csv(test_csv_path)
print(test_df.head())

In [None]:

import pandas as pd
import librosa
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from pathlib import Path

print("="*30)
print("Starting submission file generation...")


class TestAudioDataset(Dataset):
    def __init__(self, df, data_path, n_mels=128, fixed_width=300):
        self.df = df
        self.data_path = data_path
        self.n_mels = n_mels
        self.fixed_width = fixed_width

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        row = self.df.iloc[index]

        filename = row['ID']

        filepath = self.data_path / filename

        try:
            y, sr = librosa.load(filepath, sr=22050)
        except Exception as e:
            print(f"Error loading {filepath}: {e}")
            y = np.zeros(22050 * 5)
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=self.n_mels)
        S_db = librosa.power_to_db(S, ref=np.max)

        if S_db.shape[1] > self.fixed_width:
            S_db = S_db[:, :self.fixed_width]
        else:
            pad_width = self.fixed_width - S_db.shape[1]
            S_db = np.pad(S_db, ((0, 0), (0, pad_width)), mode='constant')

        S_db_3channel = np.stack([S_db, S_db, S_db], axis=0)

        return torch.tensor(S_db_3channel, dtype=torch.float32)


@torch.no_grad()
def predict_all(model, test_loader):
    model.eval()
    all_preds = []
    for images in test_loader:
        outputs = model(images)
        _, preds = torch.max(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
    return all_preds

data_dir = Path("./data")

test_audio_path = data_dir / "test" / "test"

test_csv_path = data_dir / "sample_submission.csv"

test_df = pd.read_csv(test_csv_path)
print(f"Found {len(test_df)} test files to predict.")

print("Loading saved model: audio_resnet9_model.pth")
device = get_default_device()
model = AudioResNet9(in_channels=3, num_classes=5)
model.load_state_dict(torch.load('audio_resnet9_model.pth'))
model = to_device(model, device)
print("Model loaded.")

N_MELS = 128
FIXED_WIDTH = 400
BATCH_SIZE = 64

test_ds = TestAudioDataset(test_df, test_audio_path, n_mels=N_MELS, fixed_width=FIXED_WIDTH)
test_dl = DataLoader(test_ds, BATCH_SIZE, num_workers=2, pin_memory=True)
test_dl = DeviceDataLoader(test_dl, device)
print("Test DataLoader is ready.")

print("Making predictions on test data...")
predictions = predict_all(model, test_dl)

inv_label_map = {i: label for label, i in label_map.items()}

string_preds = [inv_label_map[p] for p in predictions]

submission_df = pd.DataFrame({
    'ID': test_df['ID'],
    'Class': string_preds
})

submission_df.to_csv('submission.csv', index=False)

print("="*30)
print("submission.csv created successfully!")
print("Here's a preview:")
print(submission_df.head())
print("="*30)