In [1]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from model import EnvNet
from train import train_model
from data_preprocess import make_frames,make_frames_folder
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Conv1D,MaxPooling1D,Dense,Flatten,BatchNormalization,Dropout, Activation
from gammatone_init import GammatoneInit
from gammatone_init import generate_filters
from model_config import *
import torch
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import ast
import wandb
import torch.optim as optim

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
!export CUDA_VISIBLE_DEVICES=2

In [4]:
frame_length = 16000
overlapping_fraction = 0.5
data = torch.load('./torch_dataset_16khz/all_audio_data.pt')
print(data.size())

torch.Size([54033, 16001])


In [5]:
def to_categorical(tensors, num_classes=10):
    return torch.eye(num_classes)[y.int()]
print(data.size())
tensor_size = (data.size())[0]
print(tensor_size)

torch.Size([54033, 16001])
54033


In [6]:
X_train = data[:,0:frame_length].clone()
test_portion = int(0.8*((data.size())[0]))
#print(test_portion)
X_train = data[:test_portion, 0:frame_length].clone()
#print(X_train.size())
Y_train = data[:test_portion,frame_length:].clone()

X_train = X_train.reshape(-1,16,1000)

#print(X_train.size())
#print(Y_train.size())
#print(Y_train)

In [7]:
Y_train = Y_train.type(torch.LongTensor)
Y_train_one_hot = F.one_hot(Y_train)
#print(Y_train_one_hot)
#print(Y_train)
audio_dataset = TensorDataset (X_train, Y_train)
audio_dataloader = DataLoader (audio_dataset, batch_size = 300, shuffle= True)

In [8]:
sample_rate = 16000
min_center_freq = 100
order = 2

In [9]:
class RMSLELoss(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = torch.nn.MSELoss()
        
    def forward(self, pred, actual):
        return self.mse(torch.log(pred.float() + 1), torch.log(actual.float() + 1))
 

In [10]:
loss_function = torch.nn.CrossEntropyLoss()
input = torch.randn(100, 10, requires_grad=True)
target = torch.empty(100, dtype=torch.long).random_(5)
print(input.size())
print(target.size())


torch.Size([100, 10])
torch.Size([100])


In [None]:
from featureblocks import FeatureBlock3
# The trinity of models
model = FeatureBlock3()
#model = FeatureBlockGT()
# This is the losss function
#loss_function = RMSLELoss()
loss_function = torch.nn.CrossEntropyLoss()
# This is what controls the gradient descent
#optimizer = torch.optim.Adadelta(model.parameters(),lr=0.00001)
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
#scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2000, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, threshold_mode='rel', cooldown=0,min_lr=0, eps=1e-08, verbose=False)
T_max = 50
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max)
iteration = 0
losses = []


#wandb.init(project='end2end1D')
#config = wandb.config

#wandb.watch(model)
#model.train()

for epoch in range(100):
    
    print('Epoch-{0} lr: {1}'.format(epoch, optimizer.param_groups[0]['lr']))
    
    for index,(x,y) in enumerate(audio_dataloader):
        optimizer.zero_grad()
        #print(x.size())
        x = model(x.float())
        #print(x.size())
        #print(y.size())
        # Use argmax to get class with max probability value from softmax
        #x = x.argmax(dim=-1) 
        x = x.float()
        y = y.squeeze(1)
        
        loss = loss_function(x,y)
        loss.backward()
        optimizer.step()
        
        print("iteration:{} loss:{} ".format(iteration, loss.item()))
        losses.append(loss)
        iteration += 1
    scheduler.step()
        
        #wandb.log({"loss": loss, "epoch": epoch})
        
        
#using wandb to visualize

Epoch-0 lr: 0.001
iteration:0 loss:2.3018529415130615 
iteration:1 loss:2.2982075214385986 
iteration:2 loss:2.3008296489715576 
iteration:3 loss:2.298231840133667 
iteration:4 loss:2.2983298301696777 
iteration:5 loss:2.2966344356536865 
iteration:6 loss:2.294978380203247 
iteration:7 loss:2.287743091583252 
iteration:8 loss:2.2920918464660645 
iteration:9 loss:2.2912333011627197 
iteration:10 loss:2.2845652103424072 
iteration:11 loss:2.278446674346924 
iteration:12 loss:2.2861390113830566 
iteration:13 loss:2.2782137393951416 
iteration:14 loss:2.277071237564087 
iteration:15 loss:2.2739617824554443 
iteration:16 loss:2.269221544265747 
iteration:17 loss:2.2611501216888428 
iteration:18 loss:2.2705183029174805 
iteration:19 loss:2.262052536010742 
iteration:20 loss:2.2374393939971924 
iteration:21 loss:2.247450590133667 
iteration:22 loss:2.2481741905212402 
iteration:23 loss:2.256747007369995 
iteration:24 loss:2.2415664196014404 
iteration:25 loss:2.242741584777832 
iteration:26 l

In [None]:
shape = [16,16,64]
filters = generate_filters(shape[2],shape[0],sample_rate,min_center_freq,order)
filters = filters.reshape(filters.shape[1],1,filters.shape[0])


In [1]:
import matplotlib.pyplot as plt

y = np.array(losses, dtype=float)
x = np.arange(len(losses))

plt.xlabel("iteration")
plt.ylabel("loss")
plt.suptitle("CrossEntropy loss f, Adam optimizer, lr=variable, epochs=100, batch-size=300", fontsize=9)

m, b = np.polyfit(x, y, 1)
plt.plot(x, y, 'o')
plt.plot(x, m*x+b)

NameError: name 'np' is not defined