In [None]:
from torch.utils.data import Dataset, DataLoader
import torchaudio.transforms as T
import torch.nn.functional as F
import torchaudio
import torch
import json
import os

torch.set_num_threads(24)

batch_size = 2048
melspec = T.MelSpectrogram(n_mels = 32)

TEST_DIR = "/kaggle/working/test"
os.makedirs(TEST_DIR, exist_ok=True)

VALID_DIR = "/kaggle/working/valid"
os.makedirs(VALID_DIR, exist_ok=True)

TRAIN_DIR = "/kaggle/working/train"
os.makedirs(TRAIN_DIR, exist_ok=True)

class NSynthDataset(Dataset):
    def __init__(self, inputs_dir, labels_dir,install_dir, transform = None):
        self.inputs_dir = inputs_dir
        self.labels_dir = labels_dir
        self.install_dir = install_dir
        self.transform = transform

        with open(self.labels_dir, 'r') as f:
            self.labels = json.load(f)

        self.labels_list = list(self.labels.items())
       
    def __len__(self):
        return len(self.labels_list)

    def __getitem__(self, idx):
        filename, data = self.labels_list[idx]
        
        path = os.path.join(self.inputs_dir,filename +'.wav')
        
        waveform, sample_rate = torchaudio.load(path)
        waveform = F.pad(waveform, (0, max(0, 64000 - waveform.shape[-1])))[:, :64000]
        new_spec = melspec(waveform.mean(dim=0))
        new_spec = torch.log(new_spec + 1e-10)
        new_spec = new_spec.unsqueeze(0)

        instrument = data["instrument_family"]
        
        torch.save((new_spec, instrument), f"{self.install_dir}/spec_{idx}.pt")
        return new_spec, instrument


# test_dataloader= DataLoader(NSynthDataset('/kaggle/input/nsynth-train/nsynth-test.jsonwav/nsynth-test/audio', 
#                                      '/kaggle/input/nsynth-train/nsynth-test.jsonwav/nsynth-test/examples.json', TEST_DIR), 
#                        batch_size=batch_size,shuffle=False,num_workers=12,prefetch_factor=2,pin_memory=True,persistent_workers=True)

# valid_dataloader= DataLoader(NSynthDataset('/kaggle/input/nsynth-train/nsynth-valid.jsonwav/nsynth-valid/audio', 
#                                      '/kaggle/input/nsynth-train/nsynth-valid.jsonwav/nsynth-valid/examples.json', VALID_DIR), 
#                        batch_size=batch_size,shuffle=False,num_workers=12,prefetch_factor=2,pin_memory=True,persistent_workers=True)

# train_dataloader= DataLoader(NSynthDataset('/kaggle/input/nsynth-train/nsynth-train.jsonwav/nsynth-train/audio', 
#                                      '/kaggle/input/nsynth-train/nsynth-train.jsonwav/nsynth-train/examples.json', TRAIN_DIR), 
#                        batch_size=batch_size,shuffle=False,num_workers=12,prefetch_factor=2,pin_memory=True,persistent_workers=True)

# for spec, label in test_dataloader:
#     print("one more for test")
#     pass
        
# for spec, label in valid_dataloader:
#     print("one more for valid")
#     pass
        
# for spec, label in train_dataloader:
#     print("one more for train")
#     pass

        
# print("done")

In [None]:
# to delete everything in kaggle working directory
# !rm -rf /kaggle/working/*

In [None]:
#to make zip and download
import shutil

#customize your directory here
shutil.make_archive('/kaggle/working/nsynthtrain', 'zip', '/kaggle/working/train')