In [1]:
import math, random
import torch
import torchaudio
from torchaudio import transforms
from IPython.display import Audio
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision
from torchvision import datasets, models, transforms
import torch.nn.functional as F
import torch.nn as nn
import matplotlib.pyplot as plt




In [2]:
CUDA_LAUNCH_BLOCKING=1

In [3]:


# ----------------------------
# Sound Dataset
# ----------------------------
class SoundDS(Dataset):
    def __init__(self, df):
        self.df = df
        #self.data_path = str(data_path)
        self.duration = 4000
        self.sr = 44100
        self.channel = 2
        self.shift_pct = 0.4


    def __len__(self):
        return len(self.df)    

    def __getitem__(self, idx):
        audio_file = self.df.loc[idx, 'Path']
        class_id = self.df.loc[idx, 'Class']
        
        aud = torch.from_numpy(np.load(audio_file).T)
        num_rows, sig_len = aud.shape
        pad_begin_len = random.randint(0, 1400 - sig_len) # 1400 длина наша максимальная, растягиваю все данные до 1400 измерений
        pad_end_len = 1400 - sig_len - pad_begin_len

        # Pad with 0s
        pad_begin = torch.zeros((num_rows, pad_begin_len))
        pad_end = torch.zeros((num_rows, pad_end_len))

        aud = torch.cat((pad_begin, aud, pad_end), 1)
        #print(aud.shape)
        #aud = np.transpose(aud, (2, 0, 1))

        return aud, class_id

In [4]:
data_path_train = r"C:\Users\boris\Documents\notebok\госзнак\train_dataset.csv"
data_path_val = r"C:\Users\boris\Documents\notebok\госзнак\val_dataset.csv"

num_epochs=10
batch_size = 32

In [5]:
data_train = pd.read_csv(data_path_train)
myds_train = SoundDS( data_train)

data_val = pd.read_csv(data_path_val)
myds_val = SoundDS( data_val)

# Create training and validation data loaders
train_dl = torch.utils.data.DataLoader(myds_train, batch_size=batch_size, shuffle=True)
val_dl = torch.utils.data.DataLoader(myds_val, batch_size=batch_size, shuffle=False)

In [6]:
model_conv = torchvision.models.resnet18(pretrained=True)
for param in model_conv.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 2)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_conv = model_conv.to(device)



In [10]:
# ----------------------------
# Training Loop
# ----------------------------
def training(model, train_dl,val_dl, num_epochs,batch_size):
  # Loss Function, Optimizer and Scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001,
                                                steps_per_epoch=int(len(train_dl)),
                                                epochs=num_epochs,
                                                anneal_strategy='linear')
    best_val_loss = None # для сохранения лучшей модели
    # Repeat for each epoch
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_prediction = 0
        total_prediction = 0
        if epoch == 5: # после 5 эпохи обучаю всю модель
            for param in model.parameters():
                param.requires_grad = True
    # Repeat for each batch in the training set
        model.train()
        for i, data in enumerate(train_dl):
            # Get the input features and target labels, and put them on the GPU
            inputs, labels = data[0].to(device), data[1].to(device)
            # Normalize the inputs
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s
            inputs = torch.repeat_interleave(inputs,3,dim = 1)
            inputs = inputs.view(batch_size,3,80,1400)
            # Zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()

            # Keep stats for Loss and Accuracy
            running_loss += loss.item()

            # Get the predicted class with the highest score
            _, prediction = torch.max(outputs,1)
            # Count of predictions that matched the target label
            correct_prediction += (prediction == labels).sum().item()
            total_prediction += prediction.shape[0]

#             if i % 50 == 0:    # print every 50 mini-batches
#                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 50))


        model.eval()
        for i, data in enumerate(val_dl):
      # Get the input features and target labels, and put them on the GPU
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s
            inputs = torch.repeat_interleave(inputs,3,dim = 1)
            inputs = inputs.view(batch_size,3,80,1400)
          # Get predictions
            outputs = model(inputs)

            # Get the predicted class with the highest score
            _, prediction = torch.max(outputs,1)
            # Count of predictions that matched the target label
            correct_prediction += (prediction == labels).sum().item()
            total_prediction += prediction.shape[0]

            acc = correct_prediction/total_prediction
            #print(f'Accuracy: {acc:.2f}, Total items: {total_prediction}')
                    # Print stats at the end of the epoch
        
        num_batches = len(train_dl)
#         avg_loss = running_loss / num_batches
#         acc = correct_prediction/total_prediction
        print(f'Epoch: {epoch}, Accuracy: {acc:.2f}')
        if best_val_loss is None or best_val_loss < acc:
            print(f'Best model yet, saving')
            best_val_loss = acc
            best_epoch = epoch
            torch.save(model, './best_model.pth')

    print('Finished Training')

training(model_conv, train_dl, val_dl, num_epochs, batch_size)

tensor([[ -9.9468,   8.2929],
        [  5.3264,  -4.6657],
        [-18.8211,  15.9605],
        [ -3.0482,   2.3595],
        [-12.0753,  10.2940],
        [  3.4404,  -3.0188],
        [  8.7958,  -7.6631],
        [ -0.6576,   0.6229],
        [  5.7739,  -5.0412],
        [  2.3946,  -2.0047],
        [ -9.8214,   8.4352],
        [-12.4061,  10.6966],
        [-16.3068,  13.9932],
        [  2.8135,  -2.4306],
        [ -8.5288,   7.1269],
        [-14.5871,  12.3820],
        [  5.9658,  -5.1394],
        [  2.8047,  -2.4483],
        [-16.4155,  13.9675],
        [-13.9907,  11.8833],
        [ -3.0283,   2.4633],
        [  4.7596,  -4.2334],
        [  2.7115,  -2.5957],
        [  5.0274,  -4.3411],
        [ -7.2423,   5.9487],
        [  4.1160,  -3.7774],
        [  3.5882,  -3.0955],
        [  1.5351,  -1.3841],
        [  4.3889,  -3.8403],
        [  4.6616,  -4.2670],
        [-11.8045,   9.9275],
        [  3.6197,  -3.2102]], device='cuda:0', grad_fn=<AddmmBackward

RuntimeError: CUDA out of memory. Tried to allocate 56.00 MiB (GPU 0; 6.00 GiB total capacity; 4.09 GiB already allocated; 0 bytes free; 4.29 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
best_model = torch.load('./best_model.pth')

In [None]:
torch.save(model_conv, './best_model.pth')
