In [None]:
print(">>> [Paso 1] Instalando librerías...")


import os, random, time, json, types, itertools, math, copy
import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from munch import Munch
import numpy as np, librosa, soundfile as sf
from datasets import load_dataset
from google.colab import drive
from IPython.display import Audio, display
from torch.nn.utils import weight_norm, remove_weight_norm
from torch.nn import Conv1d, ConvTranspose1d

print(">>> [Paso 1] Librerías instaladas y cargadas.")
drive.mount('/content/drive')
print(">>> [Paso 1] Google Drive montado con éxito.")

>>> [Paso 1] Instalando librerías...
>>> [Paso 1] Librerías instaladas y cargadas.
Mounted at /content/drive
>>> [Paso 1] Google Drive montado con éxito.


In [None]:
print("\n>>> [Paso 2] Configurando parámetros del proyecto...")
DIR = "/content/drive/My Drive/" # Nueva carpeta
DRIVE_ROOT_DIR = "/content/drive/My Drive/stargan_vc2_final_attempt" # Nueva carpeta
SAVED_MODELS_DIR = os.path.join(DRIVE_ROOT_DIR, "saved_models")
HIFIGAN_DIR = os.path.join(DIR, "hifigan")
OUTPUT_AUDIO_DIR = os.path.join(DRIVE_ROOT_DIR, "output_audio")
os.makedirs(SAVED_MODELS_DIR, exist_ok=True); os.makedirs(OUTPUT_AUDIO_DIR, exist_ok=True)

NUM_EPOCHS, BATCH_SIZE, LEARNING_RATE_G, LEARNING_RATE_D = 200, 4, 1e-4, 1e-4
LAMBDA_RECON, LAMBDA_STYLE, LAMBDA_ADV, LAMBDA_CYC = 10.0, 1.0, 1.0, 5.0 # Añadimos pérdida de ciclo
SAMPLE_RATE, N_FFT, HOP_LENGTH, N_MELS = 22050, 1024, 256, 80
SEGMENT_SECONDS = 2; SEGMENT_LENGTH = SAMPLE_RATE * SEGMENT_SECONDS # Segmentos más cortos para más datos
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {DEVICE}")
print(">>> [Paso 2] Parámetros configurados.")


>>> [Paso 2] Configurando parámetros del proyecto...
Usando dispositivo: cuda
>>> [Paso 2] Parámetros configurados.


In [None]:
# ==============================================================================
#           PASO 1, 2 (Sin cambios)
# ==============================================================================
# ...

# ==============================================================================
#         PASO 3: CARGA Y PREPARACIÓN DE LOS DATASETS (CON CARGA PEREZOSA)
# ==============================================================================
print("\n>>> [Paso 3] Preparando los datasets con carga perezosa (Lazy Loading)...")

class MelSpectrogramLazyDataset(Dataset):
    def __init__(self, hf_dataset, segment_length, sr, n_fft, n_mels, hop_length):
        self.hf_dataset = hf_dataset
        self.sr, self.segment_length = sr, segment_length
        self.n_fft, self.n_mels, self.hop_length = n_fft, n_mels, hop_length

        # Ya no pre-cargamos todo. Solo creamos una lista de "punteros" a los audios.
        self.audio_segments = self._create_segment_pointers()

    def _create_segment_pointers(self):
        segment_pointers = []
        print(f"Creando punteros a segmentos para {len(self.hf_dataset)} audios...")
        for idx, item in enumerate(self.hf_dataset):
            # Estimamos la duración para saber cuántos segmentos podemos sacar
            duration = len(item['audio']['array']) / item['audio']['sampling_rate']
            num_segments = int(duration // (self.segment_length / self.sr))
            for i in range(num_segments):
                # Guardamos el índice del audio y el índice del segmento dentro de ese audio
                segment_pointers.append({'audio_idx': idx, 'segment_idx': i})

        print(f"Se encontraron {len(segment_pointers)} segmentos potenciales.")
        return segment_pointers

    def __len__(self):
        return len(self.audio_segments)

    def __getitem__(self, index):
        # Cuando se pide un ítem, AHORA es cuando hacemos el trabajo.
        pointer = self.audio_segments[index]
        audio_idx = pointer['audio_idx']
        segment_idx = pointer['segment_idx']

        # 1. Cargar el audio original del dataset de Hugging Face
        item = self.hf_dataset[audio_idx]
        audio, orig_sr = item['audio']['array'], item['audio']['sampling_rate']

        # 2. Remuestrear y convertir a mono
        if orig_sr != self.sr:
            audio = librosa.resample(audio, orig_sr=orig_sr, target_sr=self.sr)
        if audio.ndim > 1:
            audio = librosa.to_mono(audio)

        # 3. Cortar el segmento específico
        start_sample = segment_idx * self.segment_length
        end_sample = start_sample + self.segment_length
        segment = audio[start_sample:end_sample]

        # 4. Convertir a Mel-spectrograma y devolver
        mel = np.log(np.clip(librosa.feature.melspectrogram(y=segment, sr=self.sr, n_fft=self.n_fft, n_mels=self.n_mels, hop_length=self.hop_length), a_min=1e-5, a_max=None))
        return torch.FloatTensor(mel)

print("Cargando y dividiendo datasets completos...")

clean_ds_full = load_dataset("lopezjm96/spanish_voices", split="train")
radio_ds_full = load_dataset("jacktol/atc-dataset", split="train")
clean_ds = clean_ds_full.train_test_split(test_size=0.1, seed=42)
radio_ds = radio_ds_full.train_test_split(test_size=0.1, seed=42)
# Usamos la nueva clase de Dataset "perezoso"
train_clean_loader = DataLoader(MelSpectrogramLazyDataset(clean_ds['train'], SEGMENT_LENGTH, SAMPLE_RATE, N_FFT, N_MELS, HOP_LENGTH), batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=2)
val_clean_loader = DataLoader(MelSpectrogramLazyDataset(clean_ds['test'], SEGMENT_LENGTH, SAMPLE_RATE, N_FFT, N_MELS, HOP_LENGTH), batch_size=BATCH_SIZE, shuffle=False, drop_last=True, num_workers=2)
train_radio_loader = DataLoader(MelSpectrogramLazyDataset(radio_ds['train'], SEGMENT_LENGTH, SAMPLE_RATE, N_FFT, N_MELS, HOP_LENGTH), batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=2)

print(">>> [Paso 3] Datasets y DataLoaders listos.")


# ==============================================================================
#           PASO 4, 5, 6 (Sin cambios)
# ==============================================================================
# ... (El resto del código es idéntico al último que te proporcioné) ...


>>> [Paso 3] Preparando los datasets con carga perezosa (Lazy Loading)...
Cargando y dividiendo datasets completos...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

Repo card metadata block was not found. Setting CardData to empty.


combine.zip:   0%|          | 0.00/288M [00:00<?, ?B/s]

spanish_voices.zip:   0%|          | 0.00/1.04G [00:00<?, ?B/s]

Generating train split:   0%|          | 0/6148 [00:00<?, ? examples/s]

README.md: 0.00B [00:00, ?B/s]

train-00000-of-00002.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

train-00001-of-00002.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/161M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/11868 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2927 [00:00<?, ? examples/s]

Creando punteros a segmentos para 5533 audios...
Se encontraron 9004 segmentos potenciales.
Creando punteros a segmentos para 615 audios...
Se encontraron 994 segmentos potenciales.
Creando punteros a segmentos para 10681 audios...
Se encontraron 12812 segmentos potenciales.
>>> [Paso 3] Datasets y DataLoaders listos.


In [None]:
!pip install -U datasets

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, datasets
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
  Attempting uninstall: datasets
    Found existing installation: datasets 2.14.4
    Uninstalling datasets-2.14.4:
      Successfully uninstalled datasets-2.14.4
[31mERROR: pip's dependency r

In [None]:
print("\n>>> [Paso 4] Definiendo las arquitecturas de los modelos (StarGANv2-VC)...")

class ResBlk(nn.Module):
    def __init__(self, dim_in, dim_out, actv=nn.LeakyReLU(0.2), normalize=False, downsample=False):
        super().__init__(); self.actv = actv; self.normalize = normalize; self.downsample = downsample; self.learned_sc = dim_in != dim_out
        self.conv1 = nn.Conv2d(dim_in, dim_in, 3, 1, 1); self.conv2 = nn.Conv2d(dim_in, dim_out, 3, 1, 1)
        if self.normalize: self.norm1 = nn.InstanceNorm2d(dim_in, affine=True); self.norm2 = nn.InstanceNorm2d(dim_in, affine=True)
        if self.learned_sc: self.conv1x1 = nn.Conv2d(dim_in, dim_out, 1, 1, 0, bias=False)
    def _shortcut(self, x):
        if self.learned_sc: x = self.conv1x1(x)
        if self.downsample: x = F.avg_pool2d(x, 2)
        return x
    def _residual(self, x):
        if self.normalize: x = self.norm1(x)
        x = self.actv(x); x = self.conv1(x)
        if self.downsample: x = F.avg_pool2d(x, 2)
        if self.normalize: x = self.norm2(x)
        x = self.actv(x); x = self.conv2(x)
        return x
    def forward(self, x): return (self._shortcut(x) + self._residual(x)) / math.sqrt(2)
class AdaIN(nn.Module):
    def __init__(self, style_dim, num_features):
        super().__init__(); self.norm = nn.InstanceNorm2d(num_features, affine=False); self.fc = nn.Linear(style_dim, num_features*2)
    def forward(self, x, s):
        h = self.fc(s); h = h.view(h.size(0), h.size(1), 1, 1); gamma, beta = torch.chunk(h, chunks=2, dim=1)
        return (1 + gamma) * self.norm(x) + beta
class AdainResBlk(nn.Module):
    def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2), upsample=False):
        super().__init__(); self.actv = actv; self.upsample = upsample; self.learned_sc = dim_in != dim_out
        self.conv1 = nn.Conv2d(dim_in, dim_out, 3, 1, 1); self.conv2 = nn.Conv2d(dim_out, dim_out, 3, 1, 1)
        self.norm1 = AdaIN(style_dim, dim_in); self.norm2 = AdaIN(style_dim, dim_out)
        if self.learned_sc: self.conv1x1 = nn.Conv2d(dim_in, dim_out, 1, 1, 0, bias=False)
    def _shortcut(self, x):
        if self.upsample: x = F.interpolate(x, scale_factor=2, mode='nearest')
        if self.learned_sc: x = self.conv1x1(x)
        return x
    def _residual(self, x, s):
        x = self.norm1(x, s); x = self.actv(x)
        if self.upsample: x = F.interpolate(x, scale_factor=2, mode='nearest')
        x = self.conv1(x); x = self.norm2(x, s); x = self.actv(x); x = self.conv2(x)
        return x
    def forward(self, x, s): return (self._shortcut(x) + self._residual(x, s)) / math.sqrt(2)
class Generator(nn.Module):
    def __init__(self, dim_in=N_MELS, style_dim=64, max_conv_dim=512, repeat_num=4):
        super().__init__()
        self.stem = nn.Conv2d(1, 64, 3, 1, 1); self.encode = nn.ModuleList(); self.decode = nn.ModuleList()
        self.to_out = nn.Conv2d(64, 1, 1, 1, 0)
        dim_in = 64
        for i in range(repeat_num):
            dim_out = min(dim_in*2, max_conv_dim)
            self.encode.append(ResBlk(dim_in, dim_out, normalize=True, downsample=True))
            self.decode.insert(0, AdainResBlk(dim_out, dim_in, style_dim, upsample=True))
            dim_in = dim_out
        for _ in range(2):
            self.encode.append(ResBlk(dim_out, dim_out, normalize=True))
            self.decode.insert(0, AdainResBlk(dim_out, dim_out, style_dim))
    def forward(self, x, s):
        x_in = x.unsqueeze(1)
        x = self.stem(x_in)
        for block in self.encode: x = block(x)
        for block in self.decode: x = block(x, s)
        x = self.to_out(x)
        # --- CORRECCIÓN FINAL ---
        # Forzar el tamaño de salida para que coincida con la entrada original
        return F.interpolate(x, size=x_in.shape[2:], mode='bilinear', align_corners=False).squeeze(1)
class StyleEncoder(nn.Module):
    def __init__(self, dim_in=N_MELS, style_dim=64, max_conv_dim=512, repeat_num=4):
        super().__init__()
        blocks = [nn.Conv2d(1, 64, 3, 1, 1)]; dim_in = 64
        for _ in range(repeat_num):
            dim_out = min(dim_in*2, max_conv_dim)
            blocks.append(ResBlk(dim_in, dim_out, downsample=True)); dim_in = dim_out
        blocks.extend([nn.LeakyReLU(0.2), nn.Conv2d(dim_out, dim_out, 5, 1, 0), nn.AdaptiveAvgPool2d(1)])
        self.shared = nn.Sequential(*blocks)
        self.fc = nn.Linear(dim_out, style_dim)
    def forward(self, x):
        x = x.unsqueeze(1); h = self.shared(x); h = h.view(h.size(0), -1)
        return self.fc(h)
class Discriminator(nn.Module):
    def __init__(self, dim_in=N_MELS, max_conv_dim=512, repeat_num=4):
        super().__init__()
        blocks = [nn.Conv2d(1, 64, 3, 1, 1)]; dim_in = 64
        for _ in range(repeat_num):
            dim_out = min(dim_in*2, max_conv_dim)
            blocks.append(ResBlk(dim_in, dim_out, downsample=True)); dim_in = dim_out
        blocks.extend([nn.LeakyReLU(0.2), nn.Conv2d(dim_out, dim_out, 5, 1, 0), nn.LeakyReLU(0.2), nn.Conv2d(dim_out, 1, 1, 1, 0)])
        self.main = nn.Sequential(*blocks)
    def forward(self, x): return self.main(x.unsqueeze(1))
class ResBlockHIFI(torch.nn.Module):
    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)):
        super().__init__(); self.h=h; self.convs1=nn.ModuleList([weight_norm(Conv1d(channels,channels,kernel_size,1,dilation=d,padding=(k*d-d)//2)) for k,d in zip([kernel_size]*3,dilation)]); self.convs2=nn.ModuleList([weight_norm(Conv1d(channels,channels,kernel_size,1,dilation=1,padding=(kernel_size-1)//2)) for _ in dilation])
    def forward(self,x):
        for c1,c2 in zip(self.convs1,self.convs2): xt=F.leaky_relu(x,0.1); xt=c1(xt); xt=F.leaky_relu(xt,0.1); xt=c2(xt); x=xt+x
        return x
class GeneratorHIFI(torch.nn.Module):
    def __init__(self,h):
        super().__init__(); self.h=h; self.num_kernels=len(h.resblock_kernel_sizes); self.num_upsamples=len(h.upsample_rates); self.conv_pre=weight_norm(Conv1d(80,h.upsample_initial_channel,7,1,padding=3)); self.ups=nn.ModuleList()
        for i,(u,k) in enumerate(zip(h.upsample_rates,h.upsample_kernel_sizes)): self.ups.append(weight_norm(ConvTranspose1d(h.upsample_initial_channel//(2**i),h.upsample_initial_channel//(2**(i+1)),k,u,padding=(k-u)//2)))
        self.resblocks=nn.ModuleList()
        for i in range(len(self.ups)):
            ch=h.upsample_initial_channel//(2**(i+1))
            for j,(k,d) in enumerate(zip(h.resblock_kernel_sizes,h.resblock_dilation_sizes)): self.resblocks.append(ResBlockHIFI(h,ch,k,d))
        self.conv_post=weight_norm(Conv1d(ch,1,7,1,padding=3))
    def forward(self,x):
        x=self.conv_pre(x)
        for i in range(self.num_upsamples):
            x=F.leaky_relu(x,0.1); x=self.ups[i](x); xs=None
            for j in range(self.num_kernels):
                if xs is None: xs=self.resblocks[i*self.num_kernels+j](x)
                else: xs+=self.resblocks[i*self.num_kernels+j](x)
            x=xs/self.num_kernels
        x=F.leaky_relu(x); x=self.conv_post(x); x=torch.tanh(x)
        return x
print(">>> [Paso 4] Arquitecturas definidas.")


>>> [Paso 4] Definiendo las arquitecturas de los modelos (StarGANv2-VC)...
>>> [Paso 4] Arquitecturas definidas.


In [None]:
# ==============================================================================
#          PASO 5: ENTRENAMIENTO CON PÉRDIDA ADVERSARIAL
# ==============================================================================
print("\n>>> [Paso 5] Iniciando el proceso de entrenamiento...")

# --- Inicialización de Modelos y Optimizadores ---
generator = Generator().to(DEVICE)
style_encoder = StyleEncoder().to(DEVICE)
discriminator = Discriminator().to(DEVICE)

g_params = list(generator.parameters()) + list(style_encoder.parameters())
d_params = list(discriminator.parameters())
optimizer_g = torch.optim.Adam(g_params, lr=LEARNING_RATE_G, betas=(0.8, 0.99))
optimizer_d = torch.optim.Adam(d_params, lr=LEARNING_RATE_D, betas=(0.8, 0.99))
l1_loss = nn.L1Loss()

# --- Configuración de Checkpoints ---
best_model_path = os.path.join(SAVED_MODELS_DIR, "stargan_vc2_best.pth")
checkpoint_path = os.path.join(SAVED_MODELS_DIR, "stargan_vc2_checkpoint.pth")
start_epoch, best_val_loss, patience_counter = 0, float('inf'), 0

# --- Lógica para Reanudar Entrenamiento ---
if os.path.exists(checkpoint_path):
    print("--- Reanudando entrenamiento desde checkpoint. ---")
    checkpoint = torch.load(checkpoint_path)
    generator.load_state_dict(checkpoint['generator'])
    style_encoder.load_state_dict(checkpoint['style_encoder'])
    discriminator.load_state_dict(checkpoint['discriminator'])
    optimizer_g.load_state_dict(checkpoint['optimizer_g'])
    optimizer_d.load_state_dict(checkpoint['optimizer_d'])
    start_epoch = checkpoint['epoch'] + 1
    best_val_loss = checkpoint.get('best_val_loss', float('inf'))
    print(f"Reanudando desde la época {start_epoch+1}. Mejor pérdida anterior: {best_val_loss:.4f}")
else:
    print("--- Empezando entrenamiento desde cero. ---")

# --- Bucle Principal de Épocas ---
print("Comenzando bucle de entrenamiento...")
for epoch in range(start_epoch, NUM_EPOCHS):

    # --- Fase de Entrenamiento (Bucle Interno sobre los Lotes) ---
    generator.train(); style_encoder.train(); discriminator.train()

    # Manejo de datasets de tamaños desiguales
    if len(train_clean_loader) < len(train_radio_loader):
        loader_a, loader_b = itertools.cycle(train_clean_loader), train_radio_loader
    else:
        loader_a, loader_b = train_clean_loader, itertools.cycle(train_radio_loader)

    for i, (mel_a, mel_b) in enumerate(zip(loader_a, loader_b)):
        clean_mel, radio_mel = mel_a.to(DEVICE), mel_b.to(DEVICE)

        # --- Entrenar Discriminador ---
        optimizer_d.zero_grad()
        s_trg_d = style_encoder(radio_mel)
        x_fake_d = generator(clean_mel, s_trg_d).detach()
        d_real = discriminator(radio_mel)
        d_fake = discriminator(x_fake_d)
        loss_d_real = torch.mean((d_real - 1)**2)
        loss_d_fake = torch.mean(d_fake**2)
        loss_d = loss_d_real + loss_d_fake
        loss_d.backward()
        optimizer_d.step()

        # --- Entrenar Generador y Style Encoder ---
        optimizer_g.zero_grad()
        s_org = style_encoder(clean_mel)
        s_trg = style_encoder(radio_mel)
        x_recon = generator(clean_mel, s_org)
        x_fake = generator(clean_mel, s_trg)

        d_fake_g = discriminator(x_fake)
        loss_adv = torch.mean((d_fake_g - 1)**2)
        loss_recon = l1_loss(x_recon, clean_mel)

        s_pred = style_encoder(x_fake)
        loss_style = l1_loss(s_pred, s_trg)

        x_fake_style_recon = generator(x_fake, s_org)
        loss_cyc = l1_loss(x_fake_style_recon, clean_mel)

        loss_g = (loss_adv * LAMBDA_ADV) + (loss_recon * LAMBDA_RECON) + (loss_style * LAMBDA_STYLE) + (loss_cyc * LAMBDA_CYC)
        loss_g.backward()
        optimizer_g.step()

    # --- Fase de Validación (Al final de cada época) ---
    # Esta sección está correctamente indentada, al mismo nivel que el `for epoch...`
    generator.eval(); style_encoder.eval()
    total_val_loss = 0
    with torch.no_grad():
        # Usamos el val_loader que es más corto para una validación más rápida
        for clean_mel_val in val_clean_loader:
            clean_mel_val = clean_mel_val.to(DEVICE)
            s_org_val = style_encoder(clean_mel_val)
            reconstructed_mel_val = generator(clean_mel_val, s_org_val)
            total_val_loss += l1_loss(reconstructed_mel_val, clean_mel_val).item()

    avg_val_loss = total_val_loss / len(val_clean_loader) if len(val_clean_loader) > 0 else 0
    print(f"\nÉpoca {epoch+1}/{NUM_EPOCHS} | G Loss: {loss_g.item():.4f} | D Loss: {loss_d.item():.4f} | Val Recon Loss: {avg_val_loss:.4f}")

    # --- Lógica de Guardado y Early Stopping ---
    if avg_val_loss < best_val_loss:
        print(f"Mejora en validación! ({best_val_loss:.4f} --> {avg_val_loss:.4f}). Guardando checkpoint...")
        best_val_loss = avg_val_loss
        torch.save({'generator': generator.state_dict(), 'style_encoder': style_encoder.state_dict()}, best_model_path)
        checkpoint = {
            'epoch': epoch,
            'generator': generator.state_dict(),
            'style_encoder': style_encoder.state_dict(),
            'discriminator': discriminator.state_dict(),
            'optimizer_g': optimizer_g.state_dict(),
            'optimizer_d': optimizer_d.state_dict(),
            'best_val_loss': best_val_loss
        }
        torch.save(checkpoint, checkpoint_path)
        patience_counter = 0
    else:
        patience_counter += 1
        print(f"No hay mejora. Paciencia: {patience_counter}/20")
        if patience_counter >= 20:
            print("--- ¡Parada temprana activada! ---")
            break

print(">>> [Paso 5] Entrenamiento finalizado.")


>>> [Paso 5] Iniciando el proceso de entrenamiento...
--- Reanudando entrenamiento desde checkpoint. ---
Reanudando desde la época 7. Mejor pérdida anterior: 0.5342
Comenzando bucle de entrenamiento...


In [None]:
!pip install munch

Collecting munch
  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading munch-4.0.0-py2.py3-none-any.whl (9.9 kB)
Installing collected packages: munch
Successfully installed munch-4.0.0
