# Sample Code Demo

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "5"

In [2]:
import gc

import pickle
import torch
from sklearn.preprocessing import StandardScaler
from torch import nn
from torch.utils.data import DataLoader, random_split

from datautils import FixedFramesDataset, get_fixed_frame_data
from model_house import MLP
from trainutils import device, prediction, train


## Hyper Parameters

In [3]:
nframes = 7
batch_size = 512
learning_rate = 1e-3
trial_name = "MLP-FS-[512-512-512-512-512]-BN-Dropout-5W"

## Define Dataset

In [4]:
train_X, train_y = get_fixed_frame_data(nframes, feat_dir="./data/libriphone/feat/train",
                                        split_filepath="./data/libriphone/train_split.txt",
                                        labels_filepath="./data/libriphone/train_labels.txt")
test_X = get_fixed_frame_data(
    nframes, feat_dir="./data/libriphone/feat/test", split_filepath="./data/libriphone/test_split.txt"
)

train_X, test_X = train_X.numpy(), test_X.numpy()
# print("Compute scaling info.")
# scaler = StandardScaler().fit(np.concatenate([train_X.reshape(-1, train_X.shape[-1]), test_X.reshape(-1, test_X.shape[-1])], axis=0))
with open("./scaler.skl", "rb") as fp:
    scaler = pickle.load(fp)
print("Scaling training data.")
train_X = scaler.transform(train_X.reshape(-1, train_X.shape[-1])).reshape(train_X.shape)
print("Scaling test data.")
test_X = scaler.transform(test_X.reshape(-1, test_X.shape[-1])).reshape(test_X.shape)


  0%|          | 0/4286 [00:00<?, ?it/s]

unsupported pickle protocol: 10
'utf-8' codec can't decode byte 0x81 in position 36: invalid start byte


In [None]:
train_X = torch.FloatTensor(train_X)
test_X = torch.FloatTensor(test_X)
train_dataset = FixedFramesDataset(train_X, train_y)
test_dataset = FixedFramesDataset(test_X)

del train_X, train_y, test_X
gc.collect()

train_len = int(len(train_dataset) * 0.8)
valid_len = len(train_dataset) - train_len
train_dataset, valid_dataset = random_split(train_dataset, [train_len, valid_len])

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

## Define Model

In [None]:
model = MLP(input_dim=(2 * nframes + 1) * 39, num_classes=41).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
train(train_dataloader, valid_dataloader, model, criterion, optimizer, 50000, 100, trial_name)

2022-12-01 17:26:30.099742: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-01 17:26:30.270391: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-12-01 17:26:30.306716: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-12-01 17:26:31.009987: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

  0%|          | 0/50000 [00:00<?, ?it/s]

100 steps - Train loss: 1.5661227703094482 | Train acc: 0.52734375 | Valid loss: 1.5953207602610033 | Valid acc: 0.5201314988064478
100 steps: Saving model with acc 0.520
200 steps - Train loss: 1.4584009647369385 | Train acc: 0.548828125 | Valid loss: 1.4691233256788436 | Valid acc: 0.5501556724938241
200 steps: Saving model with acc 0.550
300 steps - Train loss: 1.2699373960494995 | Train acc: 0.626953125 | Valid loss: 1.3895010390339022 | Valid acc: 0.5763646405913663
300 steps: Saving model with acc 0.576
400 steps - Train loss: 1.3266429901123047 | Train acc: 0.578125 | Valid loss: 1.3424594179270184 | Valid acc: 0.5883777148110177
400 steps: Saving model with acc 0.588
500 steps - Train loss: 1.2302411794662476 | Train acc: 0.63671875 | Valid loss: 1.3082242704987084 | Valid acc: 0.5965680173414997
500 steps: Saving model with acc 0.597
600 steps - Train loss: 1.3368089199066162 | Train acc: 0.5859375 | Valid loss: 1.2842053358302754 | Valid acc: 0.6005856160885537
600 steps: Sav

## Prediction

In [None]:
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)


model.load_state_dict(torch.load(f"././models/{trial_name}.ckpt"))
prediction(test_dataloader, model)


  0%|          | 0/1263 [00:00<?, ?it/s]