#### Сеть энкодер для стейджинга

In [71]:
import os
import sys
import time
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

from tqdm import tqdm

from jaw_gen3d_3lm import Landmark_gen
from csv_parser_pd_ver2 import set_gen_fr_csv_pd_ver3
from models import get_autoencoder

from torch.utils.tensorboard import SummaryWriter


In [72]:
# run tensorboard: tensorboard --logdir=runs
# NOTE!!! перед стартом tensrboard удалить все данные из папки
# runs от предыдущих запусков. иначе ничего не запускается.

In [73]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
# device = "cpu"

# количество зубов в обоих челюстях, всегда работаем с обеими сразу
n_teeth = 16*2
landmarks_data_len = 15     # количество значений лендмарок = 5*3 одного зуба
inp_dim = 480               # длина вектора входных значений 
code_dim = inp_dim//6       # размер скрытого слоя энкодера
batch_size = None

# csv_fpath = r"C:\Projects\torchEncoder\csv\11k.csv" # main for training now loads 5 min
# csv_fpath = r"C:\Projects\torchEncoder\csv\diego_landmarks_over_900.csv" # main for training now loads 5 min
# csv_fpath = r"D:\Nick_cases\landmarks.csv"
# csv_fpath = r"C:\Projects\torchEncoder\csv\11k_head.csv"
csv_fpath = r"C:\Projects\torchEncoder\csv\Diego_1k.csv"

models_dir = "models"
dataloadersDir = 'dataLoaders'
# dir for save libtorch models ( *.pt )
libtorch_models_dir = "libtorch_models"

epochs = 3000
train_mode = (False, True)[1]
# creates new dataloaders from csv
create_new_DataLoaders = (False, True)[0]

# создадим директорию для либторч моделей если ее нет
if not os.path.isdir(libtorch_models_dir):
    os.mkdir(libtorch_models_dir)

# то же для даталоадеров
if not os.path.isdir(dataloadersDir):
    os.mkdir(dataloadersDir)

print(f"{'train mode' if train_mode else 'test mode'} ")
print(f"{'new dataLoaders will be created from csv' if create_new_DataLoaders else 'Existing Dataloaders will be used'}")


Using cuda device
train mode 
Existing Dataloaders will be used


берем данные из файла csv

In [74]:
# сохраняем данные в даталодыре, если челюсти по отдельности, то проходим 2 раза
# если вместе, то один.

base_path = os.getcwd()  # c:\Projects\jaw_encoder
if create_new_DataLoaders:  # means create new pth files from csv then
    # use that files to load Dataloader from them
    print(f"Loading from {csv_fpath}")
    # выбираем данные из файла
    # формат [([<210>T1], [<210>T2]), <next case tuple>, <next...>, ...]
    # делаем сразу для двух челюстей - 2 пары даталоадеров и сразу их сохраняем.
    # потом для работы выбираем нужный, подгружая его из pth

    # формируем датасеты из файла csv
    dataset = set_gen_fr_csv_pd_ver3(csv_fpath)
    # print(f"dataset dw jaw len {len(dataset_dw_jaw)}   dataset up jaw len {len(dataset_up_jaw)}")
    print(f"\nData loaded from csv")
    print(f"dataset len = {len(dataset)}")
    # разобьем датасет на тренировочную и тестовую части
    data_len = len(dataset)
    set_divider_ = round(data_len*0.9)

    train = dataset[:set_divider_]
    test = dataset[set_divider_:]

    print(f"length train / val    =    {len(train)} / {len(test)} ")

    batch_size = 128
    # делаем DataLoader
    # шафлить только в трейне
    train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test, batch_size=batch_size, shuffle=False)

    # ''' save dataloader as a pth files separately for Upper and Down Jaws'''
    if not os.path.isdir(dataloadersDir):
        os.mkdir(dataloadersDir)

    torch.save(train_loader,    f'{dataloadersDir}/train_loader.pth')
    torch.save(test_loader,     f'{dataloadersDir}/test_loader.pth')
    print(f"Dataloaders saved in {dataloadersDir} folder")


In [75]:

# always load dataloaders from pth files
train_loader_path = f'{dataloadersDir}/train_loader.pth'
test_loader_path = f'{dataloadersDir}/test_loader.pth'
assert os.path.isfile(train_loader_path), "train dataloader file not found"
assert os.path.isfile(test_loader_path), "test dataloader file not found"

train_loader = torch.load(train_loader_path)
# train_loader.train_data.to(torch.device("cuda"))
test_loader = torch.load(test_loader_path)
batch_size = train_loader.batch_size
print(f"All data loaded successfilly")
print(f"train_loader_path = {train_loader_path}")
print(f"test_Loader_path = {test_loader_path}")


All data loaded successfilly
train_loader_path = dataLoaders/train_loader.pth
test_Loader_path = dataLoaders/test_loader.pth


In [76]:
assert batch_size, 'batch_size is not set'
learning_rate = 1e-3
# loss_fn = nn.MSELoss()
# loss_fn = nn.CrossEntropyLoss()

# create model
# model = nn.Transformer(inp_dim).to(device)  # nhead=16, num_encoder_layers=12)
model = get_autoencoder(inp_dim, code_dim, stages=1).to(device)
# criterion = nn.NLLLoss()
criterion = nn.MSELoss()

optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
loss_min = [1e10]
device


'cuda'

In [77]:
# train transformer
best_test_loss = [None, ]
def train_loop(epoch):
    # model = torch.load("models/ep3000_17.48.pth").to(device)
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    # ntokens = 61440 # len(train_loader) ??
    loss = 0.
    loss_test = 0.0
    # for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
    for batch, (X, y) in enumerate(train_loader):
        
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        model.zero_grad()
        # optimizer.zero_grad()
        
        X = X.to(device)
        y = y.to(device)
        output = model(X)
        # output.forward
        
        # Backpropagation
        loss = criterion(output, y)
        loss.backward()
        loss = loss.item()
        writer.add_scalar("Loss/train", loss, epoch)
        optimizer.step()
        
    # evaluating
    model.eval()
    with torch.no_grad():
        for batch_test, (X, y) in enumerate(test_loader):
            
            X = X.to(device)
            y = y.to(device)

            output = model(X)
            
            loss_test = criterion(output, y)
            loss_test = loss_test.item()
            writer.add_scalar("Loss/val", loss_test, epoch)

    # Save the model if the validation loss is the best we've seen so far.
    if not best_test_loss[0] or loss_test < best_test_loss[0]:
        #with open("models", 'wb') as f:
        if loss_test<1.938:
            torch.save(model, f"models/ep{epoch}_{loss_test:.4}.pth")
        best_test_loss[0] = loss_test
    if epoch%500 ==0:
        #if epoch%1000==0:
        optimizer.param_groups[0]['lr'] /= 2
        print(f"!!!!!!!!!!!!!!!cahnge lr to` {optimizer.param_groups[0]['lr']}")

    print(f"Epoch {epoch} - loss {loss:.5}  /  val loss {loss_test:.5} lr = {optimizer.param_groups[-1]['lr']} grs = {len(optimizer.param_groups)} ")


In [78]:
# train all
transfer_learning = 0
if transfer_learning:
    model = torch.load(r"trans1.pth")

if train_mode:
    writer = SummaryWriter()  # writes data for tensorboard default logdir=runs
    for epoch in range(1, epochs+1):
        train_loop(epoch)
    print("Training done!")
    writer.flush()


Epoch 1 - loss 191.47  /  val loss 178.65 lr = 0.001 grs = 1 
Epoch 2 - loss 180.93  /  val loss 147.57 lr = 0.001 grs = 1 
Epoch 3 - loss 149.87  /  val loss 94.761 lr = 0.001 grs = 1 
Epoch 4 - loss 96.892  /  val loss 77.436 lr = 0.001 grs = 1 
Epoch 5 - loss 78.654  /  val loss 44.03 lr = 0.001 grs = 1 
Epoch 6 - loss 45.868  /  val loss 27.197 lr = 0.001 grs = 1 
Epoch 7 - loss 29.733  /  val loss 26.694 lr = 0.001 grs = 1 
Epoch 8 - loss 29.578  /  val loss 24.754 lr = 0.001 grs = 1 
Epoch 9 - loss 27.813  /  val loss 26.165 lr = 0.001 grs = 1 
Epoch 10 - loss 29.26  /  val loss 27.259 lr = 0.001 grs = 1 
Epoch 11 - loss 30.324  /  val loss 23.545 lr = 0.001 grs = 1 
Epoch 12 - loss 26.612  /  val loss 22.534 lr = 0.001 grs = 1 
Epoch 13 - loss 25.585  /  val loss 22.463 lr = 0.001 grs = 1 
Epoch 14 - loss 25.451  /  val loss 20.773 lr = 0.001 grs = 1 
Epoch 15 - loss 23.65  /  val loss 19.71 lr = 0.001 grs = 1 
Epoch 16 - loss 22.416  /  val loss 20.271 lr = 0.001 grs = 1 
Epoch

In [79]:
# найти лучшую сеть - с минимальным значением ф-ции потерь и загрузить ее
mdls_names_lst = os.listdir(models_dir)
best_losses = [float(name.split("_")[-1][:-4])
               for name in mdls_names_lst]  # список наменьших лоссов
print(f"best_losses {best_losses}")
best_mdl_name = [mdl_name for mdl_name in mdls_names_lst if str(
    min(best_losses)) in mdl_name][0]
model = torch.load(models_dir + "/" + best_mdl_name)
print(f"best model {models_dir + '/' + best_mdl_name} loaded")

# конвертация моели в libtorch формат 
if 0:  # train_mode:
    # тестовый тензор для генерации модели без него не работает
    example = torch.rand(1, 1, dense_dim, dense_dim)

    # Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
    traced_script_module = torch.jit.trace(model, example)
    output = traced_script_module(torch.ones(1, 1, dense_dim))
    # print(f"output[0, :5] {output[0, :5]}")
    pt_model_name = f"pedro_{dense_dim}_{str(min(best_losses))}.pt"
    # save it also in orthoplatform Designer bin folder
    path_ = r'C:\Projects\Spark\orthoplatform\Build\Bin\SparkDesigner\Release\Resources\MLModels\Autosetup/' + \
        f"Pedro{dense_dim}.pt"  # Pedro3Jaw480
    # traced_script_module.save(path_)

    import datetime
    print(
        f"libtorch model {libtorch_models_dir}/{pt_model_name} saved. datetime{datetime.datetime.now()}")


best_losses [1.847, 1.847, 1.847, 1.847, 1.847, 1.847, 1.847, 1.847, 1.847, 1.847, 1.846, 1.846, 1.846, 1.846, 1.846, 1.846, 1.846, 1.846, 1.846, 1.846, 1.846, 1.846, 1.845, 1.845, 1.845, 1.845, 1.845, 1.845, 1.845, 1.845, 1.845, 1.845, 1.845, 1.845, 1.844, 1.844, 1.844, 1.844, 1.844, 1.844, 1.844, 1.844, 1.844, 1.844, 1.844, 1.843, 1.843, 1.843, 1.843, 1.843, 1.843, 1.843, 1.843, 1.843, 1.843, 1.843, 1.843, 1.842, 1.842, 1.842, 1.842, 1.842, 1.842, 1.842, 1.842, 1.842, 1.842, 1.842, 1.842, 1.841, 1.841, 1.841, 1.841, 1.841, 1.841, 1.841, 1.841, 1.841, 1.841, 1.841, 1.841, 1.84, 1.84, 1.84, 1.84, 1.84, 1.84, 1.84, 1.84, 1.84, 1.84, 1.84, 1.839, 1.839, 1.839, 1.839, 1.839, 1.839, 1.839, 1.839, 1.839, 1.839, 1.838, 1.838, 1.838, 1.838, 1.838, 1.838, 1.838, 1.838, 1.838, 1.838, 1.837, 1.837, 1.837, 1.837, 1.837, 1.837, 1.837, 1.837, 1.837, 1.836, 1.836, 1.836, 1.836, 1.836, 1.836, 1.836, 1.836, 1.835, 1.835, 1.835, 1.835, 1.835, 1.835, 1.835, 1.835, 1.835, 1.834, 1.834, 1.834, 1.834, 1.83

### Evaluating

In [80]:
# Evaluating the model
# проверка кейса 100497.oas
if 0:
    # case100497 = set_gen_fr_csv_pd_ver3(csv_fpath, 1, one_case="100497.oas")
    # loader = DataLoader(case100497, batch_size=1) # лодырь только для одного кейса - для теста
    case = set_gen_fr_csv_pd_ver3(csv_fpath, 1, one_case="909346.oas")
    # лодырь только для одного кейса - для теста
    loader = DataLoader(case, batch_size=1)

else:
    loader = test_loader  # one_loader # test_loader

assert len(test_loader) > 0
print(f"loader batch size = {loader.batch_size} batches in it = {len(loader)}")
T1, T2 = None, None
# print(f"train_loader batch size = {train_loader.batch_size} batches in it = {len(train_loader)}")
with torch.no_grad():
    # помни! предикт идет по батчам. если перебираешь все, то в pred будет последний батч.
    # его размер будет остатком от деления размера датасета на размер батча!
    # print(f" test_loader.batch_sampler \n{test_loader.__getitem__}")
    # итерируемся по всем кейсам единственного батча ( см ниже break )
    for T1, T2 in loader:
        # почему так сделано - data_loader - хитрый объект. взять первый батч как первый член из списка не выходит.
        # print(f"len X {len(X)}")
        # T1[:,105:120]=0 # заменяет нулями 37-й(17-й) зуб во всех кейсах батча, от этого корежится предикт
        #                         X[:,195:210]=0 # заменяет нулями 47-й(27-й) зуб во всех кейсах батча, от этого корежится предикт
        # удаляем нули
        # for item in T1
        # print(X[3])
        # синтезируем лендмарки, если их нет.
        T1 = T1.to(device)
        T2 = T2.to(device)   
        pred = model(T1)
        break  # достаточно первого батча, нет необходимости по всем шариться.
# _loader.__len__()
print(f" pred.__len__() = {pred.__len__()}")
#size = len(train_loader.dataset)


loader batch size = 2048 batches in it = 1
 pred.__len__() = 97


In [81]:
%matplotlib qt
# %matplotlib widget
# import ipywidgets as widgets

# класс генератора искусственных лендмарков нужен чтобы использовать его метод для рисовки графиков
inst_ = Landmark_gen()
T1=T1.to("cpu")
T2=T2.to("cpu")
pred = pred.to("cpu")
T1 = np.reshape(T1,              (-1, n_teeth, inp_dim//n_teeth))
T2 = np.reshape(T2,              (-1, n_teeth, inp_dim//n_teeth))
pred = np.reshape(pred,            (-1, n_teeth, inp_dim//n_teeth))
print(f"T2 {T2}")
print(f"pred {pred}")
# pred = pred[:,:12,:]

#  убираем вторые моляры в челюсти T2 (это просто тесты)
# T2e = T2.clone().detach() # copy tensor
# T2e[:,0,:]=0
# T2e[:,13,:]=0

k = 4  # колво графиков больше 20 не надо - не рисует
m = 1  # кейсов на графике
sft = 0  # сдвиг от начала датасета
# assert n>=m*k+sft

for i in range(k):
    inst_.draw_3d(
                    #T1[i*m+sft:(i+1)*m+sft],  # входные       T1
                  T2[i*m+sft:(i+1)*m+sft],  # тренировочные T2
                  pred[i*m+sft:(i+1)*m+sft],  # предиктные    T2
                  # h - горизонтальное расположение графиков, v - верт(дефолт)
                  style='h',
                  show=True)


T2 tensor([[[22.7100, 14.1100,  0.1200,  ..., 26.4900, 20.4700, -2.1800],
         [22.4900,  3.8600, -1.2000,  ..., 26.4300, 10.1500, -5.7800],
         [19.1100, -3.2600, -0.8300,  ..., 22.9400, -0.3300, -6.0900],
         ...,
         [26.6400, 17.0900,  0.3100,  ..., 27.5200, 21.1400,  5.4100],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[23.7700, 13.8900, -0.7800,  ..., 29.1200, 20.6400, -3.6800],
         [21.9900,  2.3100, -1.2300,  ..., 27.2000,  9.2300, -6.5800],
         [19.4900, -4.8000, -0.3300,  ..., 23.4700, -0.7600, -7.2600],
         ...,
         [27.3600, 13.6100, -2.6600,  ..., 29.1600, 16.5900,  5.1700],
         [29.4200, 19.6900,  5.6400,  ..., 31.2900, 20.4200,  8.6700],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[23.5000, 13.3600, -0.3200,  ..., 27.2700, 17.6800, -6.2800],
         [23.7000,  2.5400,  0.1300,  ...,

In [82]:
def train_loop_encoder(train_loader, test_loader, model, loss_fn, optimizer, t):

    for batch, (X, y) in enumerate(train_loader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        writer.add_scalar("Loss/train", loss, t)
        optimizer.step()

    loss = loss.item()

    for batch_t, (Xt, yt) in enumerate(test_loader):
        pred_t = model(Xt)
        loss_t = loss_fn(pred_t, yt)
        writer.add_scalar("Loss/test", loss_t, t)

    loss_t = loss_t.item()

    if t % 100 == 0:
        # loss, current = loss.item(), batch * len(X)
        print(f"Epoch {t} - loss {loss:.5}  /  val loss {loss_t:.5}")

    if (t > 100) & (loss_t < min(loss_min)):  # работаем по test loss
        loss_min.append(loss_t)
        # print(f"Ep: {t+1}  loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        print(f"Ep: {t+1}  loss: {loss:>7f}  /  val loss {loss_t:.5}")
        if (t > 100):
            torch.save(
                model, f"{models_dir}/autoencoder_Epoch_{t}_loss_{loss_t:.5}.pth")
            print(f"Saved {t+1}")

    # print(f"loss: {loss:>7f}  [{current:>5d}")


In [83]:
# model params calc
from prettytable import PrettyTable


def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad:
            continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params += params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

# count_parameters(model)
