# **Train Models**

Este notebook tiene como misión proporcionar el punto de acceso para entrenamiento y evaluación de cada uno de los modelos desarrollados adaptándose a sus particularidades de ejecución

Importamos las librerías necesarias

In [1]:
import boto3
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader
from ipywidgets import IntProgress

In [2]:
torch.cuda.is_available()

True

In [3]:
torch.cuda.current_device()

0

In [4]:
torch.cuda.get_device_name(0)

'GeForce GTX 1080 Ti'

Añadimos al sistema la ruta con nuestro código desarrollado

In [5]:
sys.path.append("../src")

Importamos el código desarrollado

In [6]:
from Dataset import VideoDatasetAprox1, LocalVideoDatasetAprox1, VideoDatasetAprox2, LocalVideoDatasetAprox2, VideoDatasetAprox3, VideoDatasetAprox4
from Model import ModelAprox1, ModelAprox2, ModelAprox3, ModelAprox4, ModelAprox5
from Model import ModelAprox6, ModelAprox7, ModelAprox8, ModelAprox9, ModelAprox10
from Model import ModelAprox11, ModelAprox12, ModelAprox13, ModelAprox14, ModelAprox15
from Model import ModelAprox16, ModelAprox17, ModelAprox18, ModelAprox19

## **1. RNN con audio como input**

Inicializamos el dataset correspondiente

In [7]:
dataset = LocalVideoDatasetAprox1(set_selected="train")

In [8]:
#print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))
print("Dataset con %d instancias con las que poder trabajar"%(len(dataset)))

Dataset con 60157 instancias con las que poder trabajar


Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [9]:
BATCH_SIZE = 20
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [10]:
# Construimos el modelo correspondiente
model = ModelAprox1(input_size=1, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device(0)
model.cuda()

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [11]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [12]:
%%time
n_epochs=1
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch
        target_seq = batch.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad()
        input_seq = input_seq.cuda()
        target_seq = target_seq.cuda()
        output = model(input_seq)
        loss = criterion(output, target_seq)
        loss.backward()
        optimizer.step()
        n_batch += BATCH_SIZE
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

  sound_frames = torch.tensor(sound_frames, dtype=torch.float)


batch: 20/60157............. Loss: 5.5456
batch: 40/60157............. Loss: 5.5447
batch: 60/60157............. Loss: 5.5438


invalid new backstep -1


batch: 80/60157............. Loss: 5.5427
batch: 100/60157............. Loss: 5.5418
batch: 120/60157............. Loss: 5.5411
batch: 140/60157............. Loss: 5.5391
batch: 160/60157............. Loss: 5.5393
batch: 180/60157............. Loss: 5.5382
batch: 200/60157............. Loss: 5.5386
batch: 220/60157............. Loss: 5.5368
batch: 240/60157............. Loss: 5.5368
batch: 260/60157............. Loss: 5.5366
batch: 280/60157............. Loss: 5.5343
batch: 300/60157............. Loss: 5.5330
batch: 320/60157............. Loss: 5.5349
batch: 340/60157............. Loss: 5.5327
batch: 360/60157............. Loss: 5.5314
batch: 380/60157............. Loss: 5.5345
batch: 400/60157............. Loss: 5.5326
batch: 420/60157............. Loss: 5.5349
batch: 440/60157............. Loss: 5.5315
batch: 460/60157............. Loss: 5.5325
batch: 480/60157............. Loss: 5.5326
batch: 500/60157............. Loss: 5.5316
batch: 520/60157............. Loss: 5.5340
batch: 540/6

Evaluamos el modelo

torch.save(model, "model1")

In [14]:
evaluation(input_seq, target_seq)

tensor(46, device='cuda:0')


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

## **2. LSTM con audio como input**

In [7]:
dataset = LocalVideoDatasetAprox1(set_selected="train")

In [8]:
print("Dataset con %d instancias con las que poder trabajar"%(len(dataset)))

Dataset con 60157 instancias con las que poder trabajar


Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [9]:
BATCH_SIZE = 20
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [10]:
# Construimos el modelo correspondiente
model = ModelAprox2(input_size=1, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device(0)
model.cuda()

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [11]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [12]:
%%time
n_epochs=1
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch
        target_seq = batch.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad()
        input_seq = input_seq.cuda()
        target_seq = target_seq.cuda()
        output = model(input_seq)
        loss = criterion(output, target_seq)
        loss.backward()
        optimizer.step()
        n_batch += BATCH_SIZE
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

  sound_frames = torch.tensor(sound_frames, dtype=torch.float)


batch: 20/60157............. Loss: 5.5457
batch: 40/60157............. Loss: 5.5453
batch: 60/60157............. Loss: 5.5450
batch: 80/60157............. Loss: 5.5440
batch: 100/60157............. Loss: 5.5442


invalid new backstep -1


batch: 120/60157............. Loss: 5.5423
batch: 140/60157............. Loss: 5.5408
batch: 160/60157............. Loss: 5.5385
batch: 180/60157............. Loss: 5.5395
batch: 200/60157............. Loss: 5.5384
batch: 220/60157............. Loss: 5.5370
batch: 240/60157............. Loss: 5.5396
batch: 260/60157............. Loss: 5.5374
batch: 280/60157............. Loss: 5.5378
batch: 300/60157............. Loss: 5.5340
batch: 320/60157............. Loss: 5.5372
batch: 340/60157............. Loss: 5.5355
batch: 360/60157............. Loss: 5.5370
batch: 380/60157............. Loss: 5.5327
batch: 400/60157............. Loss: 5.5349
batch: 420/60157............. Loss: 5.5348
batch: 440/60157............. Loss: 5.5351
batch: 460/60157............. Loss: 5.5338
batch: 480/60157............. Loss: 5.5357
batch: 500/60157............. Loss: 5.5306
batch: 520/60157............. Loss: 5.5338
batch: 540/60157............. Loss: 5.5319
batch: 560/60157............. Loss: 5.5372
batch: 580/

torch.save(model, "model2")

Evaluamos el modelo

In [None]:
evaluation(input_seq, target_seq)

## **3. GRU con audio como input**

In [7]:
dataset = LocalVideoDatasetAprox1(set_selected="train")

In [8]:
print("Dataset con %d instancias con las que poder trabajar"%(len(dataset)))

Dataset con 60157 instancias con las que poder trabajar


Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [12]:
BATCH_SIZE = 16
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [13]:
# Construimos el modelo correspondiente
model = ModelAprox3(input_size=1, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device(0)
model.cuda()

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [14]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [15]:
%%time
n_epochs=1
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch
        target_seq = batch.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad()
        input_seq = input_seq.cuda()
        target_seq = target_seq.cuda()
        output = model(input_seq)
        loss = criterion(output, target_seq)
        loss.backward()
        optimizer.step()
        n_batch += BATCH_SIZE
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

  sound_frames = torch.tensor(sound_frames, dtype=torch.float)


batch: 16/60157............. Loss: 5.5459


invalid new backstep -1


batch: 32/60157............. Loss: 5.5452
batch: 48/60157............. Loss: 5.5439
batch: 64/60157............. Loss: 5.5430
batch: 80/60157............. Loss: 5.5421
batch: 96/60157............. Loss: 5.5407
batch: 112/60157............. Loss: 5.5398
batch: 128/60157............. Loss: 5.5396
batch: 144/60157............. Loss: 5.5393
batch: 160/60157............. Loss: 5.5383
batch: 176/60157............. Loss: 5.5345
batch: 192/60157............. Loss: 5.5374
batch: 208/60157............. Loss: 5.5366
batch: 224/60157............. Loss: 5.5364
batch: 240/60157............. Loss: 5.5351
batch: 256/60157............. Loss: 5.5329
batch: 272/60157............. Loss: 5.5380
batch: 288/60157............. Loss: 5.5338
batch: 304/60157............. Loss: 5.5337
batch: 320/60157............. Loss: 5.5334
batch: 336/60157............. Loss: 5.5337
batch: 352/60157............. Loss: 5.5333
batch: 368/60157............. Loss: 5.5307
batch: 384/60157............. Loss: 5.5339
batch: 400/60157

torch.save(model, "model3")

Evaluamos el modelo

In [None]:
evaluation(input_seq, target_seq)

## **4. RNN con audio como input y embedding de video inception v3 como valor inicial del hidden state**

BATCH_SIZE = 1
dataset = LocalVideoDatasetAprox2(get_image_embeddings=True)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False)
for batch in iter(dataloader):
    continue

In [7]:
dataset = LocalVideoDatasetAprox2()

In [8]:
print("Dataset con %d instancias con las que poder trabajar"%(len(dataset)))

Dataset con 60157 instancias con las que poder trabajar


Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [9]:
BATCH_SIZE = 20
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [10]:
# Construimos el modelo correspondiente
model = ModelAprox4(input_size=1, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device(0)
model.cuda()

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [11]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=1
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        target_seq = input_seq.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        batch = batch
        target_seq = target_seq.cuda()
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        output = model(batch)
        loss = criterion(output, target_seq)
        loss.backward()
        optimizer.step()
        n_batch += BATCH_SIZE
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

  sound_frames = torch.tensor(sound_frames, dtype=torch.float)


batch: 20/60157............. Loss: 5.5455
batch: 40/60157............. Loss: 5.5441
batch: 60/60157............. Loss: 5.5436


invalid new backstep -1


batch: 80/60157............. Loss: 5.5428
batch: 100/60157............. Loss: 5.5428
batch: 120/60157............. Loss: 5.5421
batch: 140/60157............. Loss: 5.5398
batch: 160/60157............. Loss: 5.5397
batch: 180/60157............. Loss: 5.5376
batch: 200/60157............. Loss: 5.5381
batch: 220/60157............. Loss: 5.5378
batch: 240/60157............. Loss: 5.5364
batch: 260/60157............. Loss: 5.5377
batch: 280/60157............. Loss: 5.5368
batch: 300/60157............. Loss: 5.5342
batch: 320/60157............. Loss: 5.5349
batch: 340/60157............. Loss: 5.5350
batch: 360/60157............. Loss: 5.5330
batch: 380/60157............. Loss: 5.5326
batch: 400/60157............. Loss: 5.5340
batch: 420/60157............. Loss: 5.5330
batch: 440/60157............. Loss: 5.5333
batch: 460/60157............. Loss: 5.5329
batch: 480/60157............. Loss: 5.5314
batch: 500/60157............. Loss: 5.5331
batch: 520/60157............. Loss: 5.5328
batch: 540/6

In [None]:
torch.save(model, "model4")

## **5. LSTM con audio como input y embedding de video inception v3 como valor inicial del hidden state**

In [None]:
dataset = VideoDatasetAprox2()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox5(input_size=1, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        target_seq = input_seq.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(batch)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **6. GRU con audio como input y embedding de video inception v3 como valor inicial del hidden state**

In [None]:
dataset = VideoDatasetAprox2()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox6(input_size=1, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        target_seq = input_seq.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(batch)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **7. RNN con audio como input y 10 embeddings de video inception v3 como valor inicial del hidden state**

In [None]:
dataset = VideoDatasetAprox2()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox7(input_size=1, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        target_seq = input_seq.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(batch)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **8. LSTM con audio como input y 10 embeddings de video inception v3 como valor inicial del hidden state**

In [None]:
dataset = VideoDatasetAprox2()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox8(input_size=1, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        target_seq = input_seq.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(batch)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **9. GRU con audio como input y 10 embeddings de video inception v3 como valor inicial del hidden state**

In [None]:
dataset = VideoDatasetAprox2()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox9(input_size=1, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        target_seq = input_seq.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(batch)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **10. RNN con audio como input y todos video embedding upsampleado como segunda serie temporal de dimension 10**

In [None]:
dataset = VideoDatasetAprox2()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox10(input_size=1025, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        target_seq = input_seq.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(batch)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **11. LSTM con audio como input y todos video embedding upsampleado como segunda serie temporal**

In [None]:
dataset = VideoDatasetAprox2()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox11(input_size=1025, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        target_seq = input_seq.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(batch)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **12. GRU con audio como input y todos video embedding upsampleado como segunda serie temporal**

In [None]:
dataset = VideoDatasetAprox2()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
dataloader = DataLoader(dataset, batch_size=5, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox12(input_size=1025, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        target_seq = input_seq.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(batch)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **13. RNN con audio como input y todos video embedding upsampleado (linear) como segunda serie temporal**
Mismas condiciones que nearest
## **14. LSTM con audio como input y todos video embedding upsampleado (linear) como segunda serie temporal**
Mismas condiciones que nearest
## **15. GRU con audio como input y todos video embedding upsampleado (linear) como segunda serie temporal**
Mismas condiciones que nearest

## **16. RNN con audio como input y todos video embedding deconvolucionado como segunda serie temporal**

In [None]:
dataset = VideoDatasetAprox2()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox13(input_size=1025, output_size=256, hidden_dim=64, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=1
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        target_seq = input_seq.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(batch)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **17. LSTM con audio como input y todos video embedding deconvolucionado como segunda serie temporal**

In [None]:
dataset = VideoDatasetAprox2()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox14(input_size=1025, output_size=256, hidden_dim=12, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        target_seq = input_seq.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(batch)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **18. GRU con audio como input y todos video embedding deconvolucionado como segunda serie temporal**

In [None]:
dataset = VideoDatasetAprox2()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox15(input_size=1025, output_size=256, hidden_dim=12, n_layers=1)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        target_seq = input_seq.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(batch)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **19. WaveNet solo audio**

In [None]:
dataset = VideoDatasetAprox3()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
batch_size=2
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox16(in_channels=256)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch
        input_seq = nn.functional.one_hot(input_seq.long(), num_classes=256)
        target_seq = batch.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        target_seq = target_seq.resize_((batch_size,80000))
        target_seq = target_seq[:,-70000:]
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(input_seq)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **20. WaveNet con video upsampleado (nearest) y audio**

In [None]:
dataset = VideoDatasetAprox4()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
batch_size=1
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox17(in_channels=1280)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        input_seq = nn.functional.one_hot(input_seq.long(), num_classes=256)
        target_seq = batch[0].view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        target_seq = target_seq.resize_((batch_size,80000))
        target_seq = target_seq[:,-70000:]
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model((input_seq,batch[1]))
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **21. WaveNet con video deconvolucionado y audio**

In [None]:
dataset = VideoDatasetAprox4()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
batch_size=1
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox18(in_channels=1280)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch[0]
        input_seq = nn.functional.one_hot(input_seq.long(), num_classes=256)
        target_seq = batch[0].view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        target_seq = target_seq.resize_((batch_size,80000))
        target_seq = target_seq[:,-70000:]
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model((input_seq,batch[1]))
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **22. WaveNet con video upsampleado (linear) y audio**

## **23. BatchPredictionWaveNet solo audio x2 faster**

In [None]:
dataset = VideoDatasetAprox3()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
batch_size=1
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox19(in_channels=256)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch
        input_seq = nn.functional.one_hot(input_seq.long(), num_classes=256)
        target_seq = batch.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        target_seq = target_seq.resize_((batch_size,80000))
        target_seq = target_seq[:,-70000:]
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(input_seq)
        print("Shape final output",output.shape)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **24. BatchPredictionWaveNet solo audio x4 faster**

In [None]:
dataset = VideoDatasetAprox3()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
batch_size=1
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox19(in_channels=256, boost=4)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch
        input_seq = nn.functional.one_hot(input_seq.long(), num_classes=256)
        target_seq = batch.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        target_seq = target_seq.resize_((batch_size,80000))
        target_seq = target_seq[:,-70000:]
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(input_seq)
        print("Shape final output",output.shape)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

## **25. BatchPredictionWaveNet solo audio x8 faster**

In [None]:
dataset = VideoDatasetAprox3()

In [None]:
print("Dataset con %d instancias con las que poder trabajar, lo que equivale a %s de almacenamiento"%(len(dataset), dataset.size()))

Inicializamos el dataloader con el que poder ir extrayendo datos como un flujo

In [None]:
batch_size=1
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Construimos el modelo correspondiente
model = ModelAprox19(in_channels=256, boost=8)
# Llevamos el modelo al dispositivo que lo procesará
device = torch.device("cpu")
model.to(device)

# Definimos hiperparámetros
n_epochs = 100
lr=0.01

# Definimos función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Definimos la función de evaluación

In [None]:
def evaluation(input_seq, target_seq):
    for sample in input_seq:
        values_generated = sample[:68000]
        n_value = 68000
        for value in sample[68000:]:
            if n_value>68000:
                values_generated = torch.cat((values_generated,output[:,None]), 0)
            value = values_generated[None,:]
            output = model(value)
            output = torch.argmax(output[-1,:])
            print(output)
            output = torch.tensor([float(output.numpy())])
            n_value+=1

Entrenamos el modelo

In [None]:
n_epochs=5
for epoch in range(1, n_epochs + 1):
    n_batch = 0
    for batch in iter(dataloader):
        input_seq = batch
        input_seq = nn.functional.one_hot(input_seq.long(), num_classes=256)
        target_seq = batch.view(-1).long()[1:]
        target_seq = torch.cat((target_seq, torch.tensor([float(target_seq[-1].numpy())])), 0).long()
        target_seq = target_seq.resize_((batch_size,80000))
        target_seq = target_seq[:,-70000:]
        optimizer.zero_grad() # Clears existing gradients from previous epoch
        input_seq.to(device)
        output = model(input_seq)
        print("Shape final output",output.shape)
        loss = criterion(output, target_seq)
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        n_batch += 32
        print('batch: {}/{}.............'.format(n_batch, len(dataset)), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

### Una vez probado solo con audio vemos como van con otros