In [112]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime
import os

import librosa
import librosa.display
import IPython.display as ipd
from IPython.display import Audio

import soundfile

In [49]:
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(16 * 16, 120) # 16*5*5 input, 120 output
        self.fc2 = nn.Linear(120, 84) # 120 input, 84 output
        self.fc3 = nn.Linear(84, 16 * 16) # 84 input, 10 output

    def forward(self, x):
        x = x.flatten(1) # flatten all dimensions except batch dimension
        x = F.relu(self.fc1(x)) # 400 -> 120
        x = F.relu(self.fc2(x)) # 120 -> 84
        x = self.fc3(x) # 84 -> 10
        x = x.view(-1, 16, 16) # reshape to 4D tensor
        return x


In [51]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 128, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.bn1 = nn.BatchNorm2d(128)
        self.conv2 = nn.Conv2d(128, 64, 3)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 32, 3)
        self.bn3 = nn.BatchNorm2d(32)
        self.fc1 = nn.Linear(32 * 5 * 5, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(16, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = x.view(-1, 32 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [52]:
#This can be used when we want to overlay multiple trained models
class prepModel(nn.Module):
    def __init__(self, input_shape):
        super(prepModel, self).__init__()
        self.mfcc_model = prepareMfccModel(input_shape) #
        self.bass_model = prepareBassModel()
        self.guitar_model = prepareGuitarModel()
        self.piano_model = preparePianoModel()
        self.drums_model = prepareDrumsModel()

    def forward(self, x):
        mfcc = self.mfcc_model(x)
        bass_output = self.bass_model(mfcc)
        guitar_output = self.guitar_model(mfcc)
        piano_output = self.piano_model(mfcc)
        drums_output = self.drums_model(mfcc)
        concat_output = torch.cat([bass_output, guitar_output, piano_output, drums_output], dim=1)
        return concat_output
    
class prepareBassModel(CNN):
    def __init__(self):
        super(prepareBassModel, self).__init__()

class prepareGuitarModel(CNN):
    def __init__(self):
        super(prepareGuitarModel, self).__init__()

class preparePianoModel(CNN):
    def __init__(self):
        super(preparePianoModel, self).__init__()

class prepareDrumsModel(CNN):
    def __init__(self):
        super(prepareDrumsModel, self).__init__()


In [28]:
class torchAgent:
    def __init__(self,model, loss_fn, data_path: str = None, valid_path: str = path, optimizer = None, device: str = None, epoch: int = 0, model_path = None, verbose: int = 2, track_amount: int = None, **kwargs):
        # device: cpu / gpu
        if device is None:
            self.device = torch.device(
                "cuda" if torch.cuda.is_available() else "cpu" # set device
            )
        else:
            self.device = device # set device
        self.model = model.to(self.device) # set model
        self.loss_fn = loss_fn # set loss function
        self.optimizer = optimizer # set optimizer
        self.scheduler = None # set scheduler
        self.epoch = epoch # set epoch
        self.verbose = verbose # set verbose
        if model_path is None:
            self.model_path = f'model_{datetime.now().strftime("%y_%m_%d_%H%M")}' # set model path
        else:
            self.model_path = model_path

        if data_path is not None:
            self.data_path  = 'data' #data path
        else:
            self.data_path = data_path

        if valid_path is None:
            self.valid_path = 'data' #validataion path
        else:
            self.valid_path = valid_path

        if track_amount is None:
            self.track_amount = len(os.listdir(self.data_path))
        else:
            self.track_amount = track_amount

    
    def add_loss_fn(self, loss_fn):
        self.loss_fn = loss_fn

    def add_optimizer(self, optimizer, **kwargs):
        self.optimizer = optimizer(self.model.parameters(), **kwargs)

    def add_scheduler(self, scheduler, **kwargs):
        self.scheduler = scheduler(self.optimizer, **kwargs)

    def load_data(self, path: str):
        data = torch.Tensor(np.random.rand(100, 16, 16)).to(self.device)
        labels = torch.Tensor(np.random.rand(100, 16, 16)).to(self.device)

        return data, labels

    def tracks(self, validate: bool = False):
        if validate:
            self.data_path = self.valid_path
        #find all tracks in data path folder
        for track in os.listdir(self.data_path):
            yield self.load_data(track)

    def train_one_epoch(self, **kwargs):
        self.model.train(True)
        running_loss = 0.

        for i, (data, labels) in enumerate(self.tracks()):
            # Zero your gradients for every batch!
            self.optimizer.zero_grad()

            # calculate loss
            loss = self.loss_fn(self.model(data), labels)

            # backpropagation
            loss.backward()

            # update parameters
            self.optimizer.step()

            # print statistics
            running_loss += loss.item()
            print(f'Batch: [{i+1}] loss: {loss.item():.3f}, loss: {running_loss:.3f}',end='\r')

            # free memory
            del data, labels, loss
            torch.cuda.empty_cache()

        self.model.train(False)
        return running_loss/self.track_amount
    
    def validate(self, **kwargs):
        self.model.train(False)
        running_loss = 0.

        for i, (data, labels) in enumerate(self.tracks(validate=True)):
            # calculate loss
            loss = self.loss_fn(self.model(data), labels)

            # print statistics
            running_loss += loss.item()
            print(f'\nBatch: [{i+1}] Loss: {loss.item():.3f}, Total loss: {running_loss:.3f}',end='\n')

            # free memory
            del data, labels, loss
            torch.cuda.empty_cache()

        return running_loss/self.track_amount

    def train(self, **kwargs):
        best_loss = np.inf
        for epoch in range(self.epoch):
            print(f'Epoch: [{epoch+1}/{self.epoch}]')
            epoch_loss = self.train_one_epoch(**kwargs)
            print(f'Epoch: [{epoch+1}/{self.epoch}] loss: {epoch_loss:.3f}')
            valid_loss = self.validate(**kwargs)
            if best_loss > valid_loss:
                print('Saving model...')
                self.save_model()
                best_loss = valid_loss
            if self.scheduler is not None:
                self.scheduler.step()
        print('Finished Training')

    def save_model(self):
        torch.save(self.model.state_dict(), self.model_path)
        print(f'Model saved at {self.model_path}')

    def load_model(self, model_path: str):
        self.model.load_state_dict(torch.load(model_path))
        print(f'Model loaded from {model_path}')        
    
    

In [14]:
torchAgent = torchAgent(Model(), nn.MSELoss(), epoch=10, verbose=2)
torchAgent.add_optimizer(optim.SGD, lr=0.001, momentum=0.9)
torchAgent.add_scheduler(optim.lr_scheduler.StepLR, step_size=5, gamma=0.1)
torchAgent.train()

Epoch: [1/10]
Epoch: [1/10] loss: 0.331oss: 2.978

Batch: [1] Loss: 0.333, Total loss: 0.333

Batch: [2] Loss: 0.334, Total loss: 0.667

Batch: [3] Loss: 0.329, Total loss: 0.996

Batch: [4] Loss: 0.331, Total loss: 1.327

Batch: [5] Loss: 0.334, Total loss: 1.661

Batch: [6] Loss: 0.329, Total loss: 1.990

Batch: [7] Loss: 0.329, Total loss: 2.320

Batch: [8] Loss: 0.330, Total loss: 2.650

Batch: [9] Loss: 0.330, Total loss: 2.980

Batch: [10] Loss: 0.329, Total loss: 3.309

Batch: [11] Loss: 0.333, Total loss: 3.642

Batch: [12] Loss: 0.331, Total loss: 3.973

Batch: [13] Loss: 0.333, Total loss: 4.307

Batch: [14] Loss: 0.332, Total loss: 4.638

Batch: [15] Loss: 0.332, Total loss: 4.971

Batch: [16] Loss: 0.330, Total loss: 5.301

Batch: [17] Loss: 0.337, Total loss: 5.638

Batch: [18] Loss: 0.332, Total loss: 5.970

Batch: [19] Loss: 0.328, Total loss: 6.298

Batch: [20] Loss: 0.334, Total loss: 6.631

Batch: [21] Loss: 0.330, Total loss: 6.961
Saving model...
Model saved at mode

In [117]:
def ExtractMfcc(audio_file): 
    #Function that extracts the Mfcc with librosa
    signal, sr = soundfile.read(audio_file)
    mfcc = librosa.feature.mfcc(y = signal, sr=sr, n_mfcc=40, hop_length=512, n_fft=1024, window='hamming')
    return mfcc

def DisplayMfcc(mfccs):
     #Function that displays the Mfcc with librosa
    plt.figure(figsize=(25, 10))
    librosa.display.specshow(mfccs, 
                         x_axis="time", 
                         sr=sr)
    plt.colorbar(format="%+2.f")
    plt.show()


In [62]:
path = '/Users/odysseaslazaridis/Documents/GroupProject/new_babyslack'

In [121]:
duration = 30  # Duration in seconds

# Load audio and extract the specified duration
mix_audio=[]
guitar_audio=[]
mix_path=[]
guitar_path=[]
i =0
for tr in os.listdir(path):
    if tr !=".DS_Store":
        track_path = os.path.join(path, tr)
        mix_path.append(os.path.join(track_path, 'mix.wav'))

        audio, sr = soundfile.read(mix_path[-1])
        print(sr)
        mix_audio.append(audio)



        guitar_path.append(os.path.join(track_path, 'Guitar.wav'))
        audio, sr = soundfile.read(guitar_path[-1])
        guitar_audio.append(audio)

        i+=1

16000
16000
16000
16000
16000
16000
16000
16000
16000
16000
16000
16000
16000
16000
16000
16000
16000
16000
16000
16000


In [125]:

ExtractMfcc(mix_path[5]).shape


(40, 8569)

In [103]:
def apply_function_to_list(input_list, func):
    return [func(item) for item in input_list]

guitar_audio = apply_function_to_list(guitar_path,ExtractMfcc) #create a list of MFCC for guitar
mix_audio = apply_function_to_list(mix_path,ExtractMfcc)  #create a list of MFCC for the mix