In [1]:
import wandb
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn

from src.datasets.polynomial import PolynomialDataset
from src.models.TAE import TAE


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
run = wandb.init(project='DTC', name='test')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtristanbester1[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
dataset = PolynomialDataset(
    run, 
    "tristanbester1/DTC/polynomial_dataset_X:v0",
    "tristanbester1/DTC/polynomial_dataset_Y:v0",
)

In [21]:
def incremental_average(ave, n_val, n):
    if ave is None:
        return n_val

    ave = ave + (n_val - ave)/float(n)
    return ave

In [30]:
def train_one_epoch(model, optimizer, criterion, data_loader, device, scheduler):
    model.train()

    ave_loss = None
    pbar = tqdm(data_loader, leave=True, total=len(data_loader))

    for n, (x, _) in enumerate(pbar):
        x = x.to(device)

        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, x)
        loss.backward()
        optimizer.step()

        ave_loss = incremental_average(ave_loss, loss.item(), (n+1) * data_loader.batch_size)
        pbar.set_description(f'loss - {ave_loss}')

    scheduler.step(ave_loss)
    return ave_loss

In [31]:
def validate(model, criterion, data_loader, device):
    model.eval()

    ave_loss = 0
    pbar = tqdm(data_loader, leave=True, total=len(data_loader))

    with torch.no_grad():
        for n, (x, _) in enumerate(pbar):
            x = x.to(device)

            outputs = model(x)
            loss = criterion(outputs, x)
            
            ave_loss += loss.item()
    return ave_loss / len(data_loader)

In [32]:
data_loader =  DataLoader(dataset=dataset, batch_size=1) 

In [33]:
device = torch.device('cpu')

In [34]:
model = TAE(
    input_dim=1,
    seq_len=100,
    cnn_kernel=10,
    cnn_stride=3,
    mp_kernel=10,
    mp_stride=3,
    lstm_hidden_dim=8,
    upsample_scale=2,
    deconv_kernel=10,
    deconv_stride=6,
)

In [35]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                factor=0.1,
                                                patience=10,
                                                threshold=0.01,
                                                verbose=True)

In [36]:
criterion = nn.MSELoss()

In [37]:
train_one_epoch(
    model=model,
    optimizer=optimizer,
    criterion=criterion,
    data_loader=data_loader,
    device=device,
    scheduler=scheduler    
)

loss - 0.5560173696279527: 100%|██████████| 300/300 [00:01<00:00, 189.67it/s]


0.5560173696279527

In [26]:
validate(model=model, criterion=criterion, data_loader=data_loader, device=device)

100%|██████████| 300/300 [00:00<00:00, 1538.67it/s]


0.7073707709709803

In [11]:
train_dataset = LibriSpeechDataset(csv_path='train-clean-360-1.csv')
train_loader = DataLoader(train_dataset, batch_size=args.train_batch_size,
                            collate_fn=collate_fn)

val_dataset = LibriSpeechDataset(csv_path='test-clean.csv')
val_loader = DataLoader(val_dataset, batch_size=args.val_batch_size,
                        collate_fn=collate_fn)

device = torch.device('cuda')
model = LSTMModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                    factor=0.1,
                                                    patience=1,
                                                    threshold=0.01,
                                                    verbose=True)
criterion = nn.CTCLoss().to(device)


NameError: name 'LibriSpeechDataset' is not defined