In [1]:
# force the notebook to auto reload external python modules | useful for development
%load_ext autoreload
%autoreload 2

In [3]:
# imports
import sys

sys.path.append("..\\")
from src.data import MyDataset, AudioTripletDataset, LandmarkTripletDataset, AudioLandmarkTripletDataset
from src.features import TripletGenerator
# from src.models import main

import torch
from torch.utils.data import random_split
from torch.utils.data import DataLoader

random_seed = 42

In [4]:
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'

In [5]:
# paths
raw_data_path = "..\\data\\raw\\"
processed_data_path = "..\\data\\processed\\"

---

In [6]:
my_dataset = MyDataset(location=raw_data_path)

In [5]:
# create & pickle dataset | RUN THIS CELL ONLY ONCE TO CREATE THE DATASET
my_dataset.create_dataset()
my_dataset.save_dataset(processed_data_path)



In [5]:
# load dataset | RUN THIS CELL TO LOAD THE SAVED DATASET
my_dataset.load_dataset(processed_data_path)
sample_size = len(my_dataset)
print(f"Loaded dataset with {sample_size} samples.")

Loaded dataset with 323 samples.


In [6]:
# train-valid-test split
# to maintain independence between train and test sets, split the dataset before creating the triplets

train_size = int(0.8 * sample_size)
val_size = int(0.1 * sample_size)
test_size = sample_size - train_size - val_size

train_set, val_set, test_set = random_split(my_dataset, [train_size, val_size, test_size], generator=torch.Generator().manual_seed(random_seed))

train_size, valid_size, test_size = len(train_set), len(val_set), len(test_set)
print(f"Train dataset size: {train_size} \nValid dataset size: {valid_size} \nTest dataset size: {test_size}")

Train dataset size: 258 
Valid dataset size: 32 
Test dataset size: 33


In [7]:
# CREATE & PICKLE triplets of indicies | RUN THIS CELL ONLY ONCE TO CREATE THE TRIPLETS
# ensure diversity of triplets by setting num_triplets to a large number
# essentially 10 to 20 triplets per sample

# TRAIN
train_triplets = TripletGenerator(train_set, num_triplets=train_size*10, prefix="train")
train_triplets.save_triplets(processed_data_path)

# VALID
valid_triplets = TripletGenerator(val_set, num_triplets=valid_size*10, prefix="valid")
valid_triplets.save_triplets(processed_data_path)

# TEST
test_triplets = TripletGenerator(test_set, num_triplets=test_size*10, prefix="test")
test_triplets.save_triplets(processed_data_path)

print(f"Created and saved: \
      \n {len(train_triplets.triplets)} train triplets \
      \n {len(valid_triplets.triplets)} valid triplets \
      \n {len(test_triplets.triplets)} test triplets"
      )

Created and saved:       
 2580 train triplets       
 320 valid triplets       
 330 test triplets


---

In [7]:
# LOAD triplets | RUN THIS CELL TO LOAD THE SAVED TRIPLETS

# TRAIN
train_triplets = TripletGenerator(load=True, root_path=processed_data_path, prefix="train")
train_AL_triplets = AudioLandmarkTripletDataset(my_dataset.data, train_triplets.triplets)
train_audio_triplet_dataset = AudioTripletDataset(my_dataset.data, train_triplets.triplets)
train_landmark_triplet_dataset = LandmarkTripletDataset(my_dataset.data, train_triplets.triplets)

# VALID
valid_triplets = TripletGenerator(load=True, root_path=processed_data_path, prefix="valid")
valid_AL_triplets = AudioLandmarkTripletDataset(my_dataset.data, valid_triplets.triplets)
valid_audio_triplet_dataset = AudioTripletDataset(my_dataset.data, valid_triplets.triplets)
valid_landmark_triplet_dataset = LandmarkTripletDataset(my_dataset.data, valid_triplets.triplets)

# TEST
test_triplets = TripletGenerator(load=True, root_path=processed_data_path, prefix="test")
test_AL_triplets = AudioLandmarkTripletDataset(my_dataset.data, test_triplets.triplets)
test_audio_triplet_dataset = AudioTripletDataset(my_dataset.data, test_triplets.triplets)
test_landmark_triplet_dataset = LandmarkTripletDataset(my_dataset.data, test_triplets.triplets)

print(f"Loaded: \
      \n {len(train_triplets.triplets)} train triplets \
      \n {len(valid_triplets.triplets)} valid triplets \
      \n {len(test_triplets.triplets)} test triplets"
      )

Loaded:       
 2580 train triplets       
 320 valid triplets       
 330 test triplets


In [8]:
# imports
from src.config import SystemConfig, TrainingConfig
from src.logging import setup_log_directory
from src.models import audio_model, landmark_model, combined_model, main
from src.visualization import plot_loss_accuracy
from src.features import Graph

from torch.utils.tensorboard import SummaryWriter

In [None]:
def train_model(
        train_dataset, 
        valid_dataset, 
        train_model,
    ):
    
    # create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
    audio_data_shape = (250, 400) # 250 frames, 400 features

    # train model
    model = train_model(audio_data_shape)
    print(model)

    training_config = TrainingConfig()

    # Model checkpoint log dir setup.
    training_config, current_version_name = setup_log_directory(training_config)

    # Tensorboard log dir setup.
    summary_writer = SummaryWriter(training_config.log_dir)

    # Train and Validate
    train_loss, train_acc, val_loss, val_acc = main(
        model,
        (train_loader, valid_loader),
        summary_writer=summary_writer,
        scheduler=None,
        system_config=SystemConfig(),
        training_config=training_config,
        data_augmentation=False,
    )

    # plot loss and accuracy
    plot_loss_accuracy(
        train_loss=[train_loss],
        val_loss=[val_loss],
        train_acc=[train_acc],
        val_acc=[val_acc],
        colors=["blue"],
        loss_legend_loc="upper center",
        acc_legend_loc="upper left",
    )
    

### Audio model

In [None]:
train_model(
    train_audio_triplet_dataset, 
    valid_audio_triplet_dataset, 
    audio_model
)

In [9]:
# create dataloaders
train_loader = DataLoader(train_audio_triplet_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_audio_triplet_dataset, batch_size=32, shuffle=False)
audio_data_shape = (250, 400) # 250 frames, 400 features

In [10]:
# train model
model = audio_model(audio_data_shape)
print(model)

training_config = TrainingConfig()

# Model checkpoint log dir setup.
training_config, current_version_name = setup_log_directory(training_config)

# Tensorboard log dir setup.
summary_writer = SummaryWriter(training_config.log_dir)

SiameseModel(
  (siamese_network): LSTM(
    (lstm): LSTM(400, 64, batch_first=True)
    (attention): Linear(in_features=64, out_features=1, bias=True)
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (dense1): Linear(in_features=64, out_features=128, bias=True)
    (dense2): Linear(in_features=128, out_features=64, bias=True)
    (dense3): Linear(in_features=64, out_features=32, bias=True)
  )
)
Logging at: ..\output\Logs_Checkpoints\Model_logs\version_17
Model Checkpoint at: ..\output\Logs_Checkpoints\Model_checkpoints\version_17


In [11]:
# Train and Validate
train_loss, train_acc, val_loss, val_acc = main(
    model,
    (train_loader, valid_loader),
    summary_writer=summary_writer,
    scheduler=None,
    system_config=SystemConfig(),
    training_config=training_config,
    data_augmentation=False,
)

Train:	Epoch: 1/10:   0%|          | 0/81 [00:00<?, ?it/s]


IndexError: list index out of range

In [13]:
plot_loss_accuracy(
    train_loss=[train_loss],
    val_loss=[val_loss],
    train_acc=[train_acc],
    val_acc=[val_acc],
    colors=["blue"],
    loss_legend_loc="upper center",
    acc_legend_loc="upper left",
)

: 

### Landmark Model

In [30]:
# create dataloaders
train_loader = DataLoader(train_landmark_triplet_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_landmark_triplet_dataset, batch_size=32, shuffle=False)
graph = Graph()
adj_matrix = torch.tensor(graph.A, dtype=torch.float32)

In [31]:
# train model
model = landmark_model()
print(model)

training_config = TrainingConfig()

# Model checkpoint log dir setup.
training_config, current_version_name = setup_log_directory(training_config)

# Tensorboard log dir setup.
summary_writer = SummaryWriter(training_config.log_dir)

SiameseModel(
  (siamese_network): STGCN(
    (temporal_conv1): Conv2d(3, 64, kernel_size=(3, 1), stride=(1, 1))
    (graph_conv1): GraphConvLayer(
      (fc): Linear(in_features=64, out_features=32, bias=True)
    )
    (temporal_conv2): Conv2d(32, 64, kernel_size=(3, 1), stride=(1, 1))
  )
)
Logging at: ..\output\Logs_Checkpoints\Model_logs\version_13
Model Checkpoint at: ..\output\Logs_Checkpoints\Model_checkpoints\version_13


In [None]:
# Train and Validate
train_loss, train_acc, val_loss, val_acc = main(
    model,
    (train_loader, valid_loader),
    summary_writer=summary_writer,
    scheduler=None,
    system_config=SystemConfig(),
    training_config=training_config,
    data_augmentation=False,
    adj=adj_matrix,
)

In [None]:
plot_loss_accuracy(
    train_loss=[train_loss],
    val_loss=[val_loss],
    train_acc=[train_acc],
    val_acc=[val_acc],
    colors=["blue"],
    loss_legend_loc="upper center",
    acc_legend_loc="upper left",
)

### Combined model

In [39]:
# dataloaders
train_loader = DataLoader(train_AL_triplets, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_AL_triplets, batch_size=32, shuffle=False)
audio_data_shape = (250, 400) # 250 frames, 400 features
graph = Graph()
adj_matrix = torch.tensor(graph.A, dtype=torch.float32)

In [40]:
# train model
model = combined_model(audio_data_shape)
print(model)

training_config = TrainingConfig()

# Model checkpoint log dir setup.
training_config, current_version_name = setup_log_directory(training_config)

# Tensorboard log dir setup.
summary_writer = SummaryWriter(training_config.log_dir)

CombinedSiameseNetwork(
  (audio_network): LSTM(
    (lstm): LSTM(400, 64, batch_first=True)
    (attention): Linear(in_features=64, out_features=1, bias=True)
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (dense1): Linear(in_features=64, out_features=128, bias=True)
    (dense2): Linear(in_features=128, out_features=64, bias=True)
    (dense3): Linear(in_features=64, out_features=32, bias=True)
  )
  (landmarks_network): STGCN(
    (temporal_conv1): Conv2d(3, 64, kernel_size=(3, 1), stride=(1, 1))
    (graph_conv1): GraphConvLayer(
      (fc): Linear(in_features=64, out_features=32, bias=True)
    )
    (temporal_conv2): Conv2d(32, 64, kernel_size=(3, 1), stride=(1, 1))
  )
  (fc): Linear(in_features=64, out_features=32, bias=True)
)
Logging at: ..\output\Logs_Checkpoints\Model_logs\version_14
Model Checkpoint at: ..\output\Logs_Checkpoints\Model_checkpoints\version_14


In [None]:
# Train and Validate
train_loss, train_acc, val_loss, val_acc = main(
    model,
    (train_loader, valid_loader),
    summary_writer=summary_writer,
    scheduler=None,
    system_config=SystemConfig(),
    training_config=training_config,
    data_augmentation=False,
    adj=adj_matrix,
)

In [None]:
plot_loss_accuracy(
    train_loss=[train_loss],
    val_loss=[val_loss],
    train_acc=[train_acc],
    val_acc=[val_acc],
    colors=["blue"],
    loss_legend_loc="upper center",
    acc_legend_loc="upper left",
)

### Testing

In [None]:
# code for testing goes here - TBD