In [1]:
import torch
import pickle
from utils import extract_full_vector, extract_subset_vector, extract_subset_vector_exp_lip, unflatten_vector, map_subset_to_full_vector, extract_subset_vector_xs_lip
import tqdm
from collections import defaultdict
import numpy as np
import pickle
import numpy as np
from tqdm import tqdm
from collections import defaultdict
import math
import os
import yaml
from datetime import datetime

In [6]:
with open('pkls/full_dataset_descriptors/live_portrait_descriptor_all_with_mead.pkl', 'rb') as file:
    all_descriptors = pickle.load(file) # frame name is the key and the value is the descriptor

In [7]:
# Dictionary to store video frames
video_dict = defaultdict(list)

# Populate the video_dict with frame arrays in order
for key, value in all_descriptors.items():
    parts = key.split('/')
    if key[0] == "M": ## mead dataset
        video_name = "/".join(parts[:-1])
        frame_number = parts[-1].split('.')[0].split("_")[-1]
    else:
        video_name = parts[1] #rawdes
        frame_number = parts[-1].split('.')[0]
    video_dict[video_name].append((frame_number, value))

# Sort video_dict by keys
video_dict = dict(sorted(video_dict.items()))

# Sort each video's frames by frame number
for video_name in video_dict:
    video_dict[video_name].sort(key=lambda x: int(x[0]))

all_vectors_full = []
for video_name, frames in tqdm(video_dict.items(), desc="Extracting vectors for clustering"):
    video_frame_vectors = []
    for frame_number, frame_data in frames:
        vector_full = extract_subset_vector(extract_full_vector(frame_data))
        all_vectors_full.append(vector_full)
all_vectors_full = np.vstack(all_vectors_full)


Extracting vectors for clustering: 100%|██████████| 9282/9282 [05:37<00:00, 27.51it/s]


In [8]:
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import torch
# Split into train (90%) and temp (10%)
train_data, temp_data = train_test_split(all_vectors_full, test_size=0.1, random_state=42)

# Split temp into validation (5%) and test (5%)
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

In [9]:
# Convert to PyTorch tensors
train_tensor = torch.tensor(train_data, dtype=torch.float32)
val_tensor = torch.tensor(val_data, dtype=torch.float32)
test_tensor = torch.tensor(test_data, dtype=torch.float32)

In [10]:
# Create TensorDatasets
train_dataset = TensorDataset(train_tensor, train_tensor)  # Input and target are the same
val_dataset = TensorDataset(val_tensor, val_tensor)
test_dataset = TensorDataset(test_tensor, test_tensor)

In [11]:
# Create DataLoaders
batch_size = 512
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [12]:
import torch
import torch.nn as nn

# Function to build a custom MLP (Multi-Layer Perceptron)
def build_mlp(layers, activation_functions):
    modules = []
    for i in range(len(layers) - 1):
        modules.append(nn.Linear(layers[i], layers[i + 1]))
        if activation_functions[i] is not None:
            modules.append(activation_functions[i]())
    return nn.Sequential(*modules)

# Encoder Module
class Encoder(nn.Module):
    def __init__(self, input_dim, layer_dims, activations):
        super(Encoder, self).__init__()
        self.encoder = build_mlp([input_dim] + layer_dims, activations)

    def forward(self, x):
        return self.encoder(x)

# Decoder Module
class Decoder(nn.Module):
    def __init__(self, latent_dim, layer_dims, activations):
        super(Decoder, self).__init__()
        self.decoder = build_mlp([latent_dim] + layer_dims, activations)

    def forward(self, x):
        return self.decoder(x)

# Autoencoder combining Encoder and Decoder
class Autoencoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(Autoencoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


In [13]:
# Example configurations
input_dim = 72
encoded_dim = 16
encoder_layers = [input_dim, 512, 256, encoded_dim]  # Number of neurons in each encoder layer
encoder_activations = [nn.ReLU, nn.ReLU, nn.ReLU, nn.ReLU]  # Activation functions for encoder
decoder_layers = [256, 512, input_dim]  # Number of neurons in each decoder layer
decoder_activations = [nn.ReLU, nn.ReLU, None]  # Activation functions for decoder
latent_dim = encoder_layers[-1]  # Latent dimension is the output of the last encoder layer

# Initialize encoder and decoder
encoder = Encoder(input_dim=input_dim, layer_dims=encoder_layers, activations=encoder_activations)
decoder = Decoder(latent_dim=latent_dim, layer_dims=decoder_layers, activations=decoder_activations)

# Combine into autoencoder
autoencoder = Autoencoder(encoder=encoder, decoder=decoder)

In [14]:
# Loss and Optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.001)

In [15]:
from tqdm import tqdm

# Early stopping parameters
patience = 2  # Number of epochs to wait for improvement
threshold = 1e-4  # Minimum change in validation loss to consider as an improvement
best_val_loss = float('inf')
epochs_no_improve = 0

num_epochs = 50

for epoch in range(num_epochs):
    # Training Phase
    autoencoder.train()
    train_loss = 0
    train_steps = 0
    train_progress = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}] Training", leave=False)

    for inputs, _ in train_progress:
        optimizer.zero_grad()
        outputs = autoencoder(inputs)
        loss = criterion(outputs, inputs)
        loss.backward()
        optimizer.step()

        # Update cumulative average loss
        train_loss += loss.item()
        train_steps += 1
        avg_train_loss = train_loss / train_steps
        train_progress.set_postfix({"Train Loss": avg_train_loss})

    # Validation Phase
    autoencoder.eval()
    val_loss = 0
    val_steps = 0
    val_progress = tqdm(val_loader, desc=f"Epoch [{epoch+1}/{num_epochs}] Validation", leave=False)

    with torch.no_grad():
        for inputs, _ in val_progress:
            outputs = autoencoder(inputs)
            loss = criterion(outputs, inputs)

            # Update cumulative average loss
            val_loss += loss.item()
            val_steps += 1
            avg_val_loss = val_loss / val_steps
            val_progress.set_postfix({"Val Loss": avg_val_loss})

    # Print final losses for the epoch
    print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

    # Early Stopping Check
    if avg_val_loss < best_val_loss - threshold:
        best_val_loss = avg_val_loss
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        print(f"Validation loss did not improve for {epochs_no_improve} consecutive epochs.")

    if epochs_no_improve >= patience:
        print(f"Early stopping triggered at epoch {epoch+1}. Best Val Loss: {best_val_loss:.4f}")
        break


                                                                                             

Epoch [1/50] - Train Loss: 0.0164, Val Loss: 0.0006


                                                                                               

Epoch [2/50] - Train Loss: 0.0006, Val Loss: 0.0004


                                                                                               

Epoch [3/50] - Train Loss: 0.0004, Val Loss: 0.0004
Validation loss did not improve for 1 consecutive epochs.


                                                                                               

Epoch [4/50] - Train Loss: 0.0003, Val Loss: 0.0001


                                                                                               

Epoch [5/50] - Train Loss: 0.0003, Val Loss: 0.0003
Validation loss did not improve for 1 consecutive epochs.


                                                                                               

Epoch [6/50] - Train Loss: 0.0002, Val Loss: 0.0004
Validation loss did not improve for 2 consecutive epochs.
Early stopping triggered at epoch 6. Best Val Loss: 0.0001




In [16]:
autoencoder.eval()
test_loss = 0
with torch.no_grad():
    for inputs, _ in test_loader:
        outputs = autoencoder(inputs)
        loss = criterion(outputs, inputs)
        test_loss += loss.item()

print(f"Test Loss: {test_loss/len(test_loader):.10f}")
# Epoch [4/50] - Train Loss: 0.0014, Val Loss: 0.0008 ## 20 ## Test Loss: 0.0012
# Epoch [7/50] - Train Loss: 0.0010, Val Loss: 0.0008 ## 16 ## Test Loss: 0.0008
# Epoch [6/50] - Train Loss: 0.0011, Val Loss: 0.0008 ## 72 ## Test Loss: 0.0008

#latest
#Epoch [6/50] - Train Loss: 0.0003, Val Loss: 0.0002 ## Test loss: 0.0002


Test Loss: 0.0003585673


In [17]:
from torch.utils.data import DataLoader, TensorDataset
import torch
import numpy as np

# Convert numpy array to PyTorch tensor
input_tensor = torch.tensor(all_vectors_full, dtype=torch.float32)

# Create a DataLoader for batching
batch_size = 512
dataset = TensorDataset(input_tensor)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

# Ensure the encoder is in evaluation mode
autoencoder.encoder.eval()

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
autoencoder.encoder.to(device)

# Container for encoded vectors
encoded_vectors_list = []

# Process the input data in batches
with torch.no_grad():
    for batch in data_loader:
        batch_inputs = batch[0].to(device)  # Get inputs and move to device
        encoded_batch = autoencoder.encoder(batch_inputs)  # Encode the batch
        encoded_vectors_list.append(encoded_batch.cpu())  # Move to CPU and store

# Concatenate all batches into a single tensor
encoded_vectors = torch.cat(encoded_vectors_list, dim=0)

# Convert back to NumPy array (if needed)
encoded_vectors_numpy = encoded_vectors.numpy()

# Print the resulting 20-dimensional vectors
print("Encoded Vectors Shape:", encoded_vectors_numpy.shape)

Encoded Vectors Shape: (1156143, 16)


In [14]:
# import pickle

# # Save the encoded vectors (NumPy array) to a pickle file
# output_file = "auto_encoder_vectors.pkl"

# with open(output_file, "wb") as f:
#     pickle.dump(encoded_vectors_numpy, f)

# print(f"Encoded vectors saved to {output_file}")

def map_subset_to_full_vector_exp_lip(full_vector, subset_output):
    c_lip = subset_output[:1]
    exp = subset_output[1:]
    full_vector[5:6] = c_lip
    full_vector[13:76] = exp
    return full_vector


In [18]:
autoencoder= autoencoder.cuda()

In [19]:
new_dict = dict()
dict_encoder_descriptors = dict()
for key, value in all_descriptors.items():
    full_vector = extract_full_vector(all_descriptors[key])
    subset_vector = extract_subset_vector(full_vector)
    torch_subset_vector = torch.tensor(subset_vector, dtype=torch.float32).cuda()
    encoder_output= autoencoder.encoder(torch_subset_vector)
    decoder_output = autoencoder.decoder(encoder_output).cpu().detach().numpy()

    encoder_output_numpy = autoencoder.encoder(torch_subset_vector).cpu().detach().numpy()
    new_dict[key] = encoder_output_numpy

    full_vector = map_subset_to_full_vector(full_vector=full_vector, subset_output=decoder_output) ### recheck it
    dict_encoder_descriptors[key] = unflatten_vector(full_vector)

In [20]:
import pickle
# Save the encoded vectors (NumPy array) to a pickle file
output_file = "pkls/auto_encoder_output/encoded_live_portrait_descriptor_all_with_mead.pkl"

with open(output_file, "wb") as f:
    pickle.dump(new_dict, f)

print(f"Encoded vectors saved to {output_file}")

Encoded vectors saved to pkls/auto_encoder_output/encoded_live_portrait_descriptor_all_with_mead.pkl


In [21]:
import pickle
# Save the encoded vectors (NumPy array) to a pickle file
output_file = "pkls/auto_encoder_output/autodecoded_descriptors_live_portrait_descriptor_all_with_mead.pkl"

with open(output_file, "wb") as f:
    pickle.dump(dict_encoder_descriptors, f)

print(f"Encoded vectors saved to {output_file}")

Encoded vectors saved to pkls/auto_encoder_output/autodecoded_descriptors_live_portrait_descriptor_all_with_mead.pkl


In [22]:
import torch
from datetime import datetime

# Get the current date and time
current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")

# File path to save the encoder with the current date and time
encoder_save_path = f"trained_models/encoder_{current_datetime}_{latent_dim}.pth"

# Save the encoder's state dictionary
torch.save(autoencoder.encoder.state_dict(), encoder_save_path)

print(f"Encoder saved successfully to {encoder_save_path}")


Encoder saved successfully to trained_models/encoder_20250105_195522_16.pth
