# Automatic Mixed Precision

## Notes

- NVIDIA - [Training With Mixed Precision](https://docs.nvidia.com/deeplearning/performance/mixed-precision-training/index.html)
- [A developer-friendly guide to mixed precision training with PyTorch](https://spell.ml/blog/mixed-precision-training-with-pytorch-Xuk7YBEAACAASJam)
- Pytorch docs
    - [Recipe implemented below](https://pytorch.org/tutorials/recipes/recipes/amp_recipe.html)
    - [Examples](https://pytorch.org/docs/stable/notes/amp_examples.html)
    - [docs](https://pytorch.org/docs/stable/amp.html#)
- fastai
    - [docs](https://docs.fast.ai/text.data.html#Numericalizing)
    - [code for multiple of 8](https://github.com/fastai/fastai/tree/master/fastai/text/data.py#L15)
        - `return vocab + [f'xxfake' for i in range(0, 8-len(vocab)%8)]`

## Recipe from Pytorch Docs

In [None]:
import torch, time, gc

# Timing utilities
start_time = None

def start_timer():
    global start_time
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_max_memory_allocated()
    torch.cuda.synchronize()
    start_time = time.time()

def end_timer_and_print(local_msg):
    torch.cuda.synchronize()
    end_time = time.time()
    print("\n" + local_msg)
    print("Total execution time = {:.3f} sec".format(end_time - start_time))
    print("Max memory used by tensors = {} bytes".format(torch.cuda.max_memory_allocated()))

In [None]:
def make_model(in_size, out_size, num_layers):
    layers = []
    for _ in range(num_layers - 1):
        layers.append(torch.nn.Linear(in_size, in_size))
        layers.append(torch.nn.ReLU())
    layers.append(torch.nn.Linear(in_size, out_size))
    return torch.nn.Sequential(*tuple(layers)).cuda()

In [None]:
batch_size = 512 # Try, for example, 128, 256, 513.
in_size = 4096
out_size = 4096
num_layers = 3
num_batches = 50
epochs = 3

# Creates data in default precision.
# The same data is used for both default and mixed precision trials below.
# You don't need to manually change inputs' dtype when enabling mixed precision.
data = [torch.randn(batch_size, in_size, device="cuda") for _ in range(num_batches)]
targets = [torch.randn(batch_size, out_size, device="cuda") for _ in range(num_batches)]

loss_fn = torch.nn.MSELoss().cuda()

### Analysis

In [None]:
net = make_model(in_size, out_size, num_layers)
net

Sequential(
  (0): Linear(in_features=4096, out_features=4096, bias=True)
  (1): ReLU()
  (2): Linear(in_features=4096, out_features=4096, bias=True)
  (3): ReLU()
  (4): Linear(in_features=4096, out_features=4096, bias=True)
)

In [None]:
count_parameters(net)

total: 50,343,936, trainable: 50,343,936, non_trainable: 0


(50343936, 50343936, 0)

In [None]:
data[49].shape

torch.Size([512, 4096])

In [None]:
print(f'{50*512*4096:,}')

104,857,600


pts * { (lstm input * age span) + demog addition to linear layers)
pts * { (rec_dim * age_span) + demog_dim }

In [None]:
print(f'{6645 * ((88 * 120) + 120):,}')  #10K pts

70,968,600


In [None]:
print(f'{13317 * ((88 * 120) + 120):,}')  #20K pts

142,225,560


### Default Precision

In [None]:
net = make_model(in_size, out_size, num_layers)
opt = torch.optim.SGD(net.parameters(), lr=0.001)

start_timer()
for epoch in range(epochs):
    for input, target in zip(data, targets):
        output = net(input)
        loss = loss_fn(output, target)
        loss.backward()
        opt.step()
        opt.zero_grad() # set_to_none=True here can modestly improve performance
end_timer_and_print("Default precision:")


Default precision:
Total execution time = 2.076 sec
Max memory used by tensors = 2138153984 bytes


### AMP
1. Forward pass under autocast
2. Exit autocast before backward
3. Use GradScaler to scale loss, then call backward() on that to create **scaled gradients**
    - Same GradScaler instance should be used for the entire convergence run 
        - same GradScaler instance for a single experiment
        - a dedicated fresh GradScaler instance for each new experiment (convergence run)

In [None]:
use_amp = True

net = make_model(in_size, out_size, num_layers)
opt = torch.optim.SGD(net.parameters(), lr=0.001)
scaler = torch.cuda.amp.GradScaler(enabled=use_amp)

start_timer()
for epoch in range(epochs):
    for input, target in zip(data, targets):
        with torch.cuda.amp.autocast(enabled=use_amp):
            output = net(input)
            loss = loss_fn(output, target)
        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()
        opt.zero_grad() # set_to_none=True here can modestly improve performance
end_timer_and_print("Mixed precision:")


Mixed precision:
Total execution time = 1.141 sec
Max memory used by tensors = 2230453248 bytes


# `lemonpie` AMP Testing

In [None]:
from lemonpie.basics import * 
from lemonpie.preprocessing.vocab import * #for loading vocabs
from lemonpie.preprocessing.transform import * #for loading ptlist thru EHRData
from lemonpie.data import * #for EHRData
from lemonpie.learn import * #for fit/predict stuff
from lemonpie.metrics import * #for auroc_score
from lemonpie.models import * #for models 
from fastai.imports import *

## Preprocess - Do Once

In [None]:
preprocess_ehr_dataset(PATH_10K, SYNTHEA_DATAGEN_DATES['10K'], conditions_dict=CONDITIONS, age_start=240, age_stop=360, age_in_months=True)

Since data is pre-cleaned, skipping Cleaning, Splitting and Vocab-creation
------------------- Creating patient lists -------------------
6645 total patients completed, saved patient list to /home/vinod/.lemonpie/datasets/synthea/10K/processed/months_240_to_360/train
2215 total patients completed, saved patient list to /home/vinod/.lemonpie/datasets/synthea/10K/processed/months_240_to_360/valid
2216 total patients completed, saved patient list to /home/vinod/.lemonpie/datasets/synthea/10K/processed/months_240_to_360/test


## Tests

### 1K

In [None]:
labels = ['diabetes', 'stroke', 'alzheimers', 'coronary_heart', 'breast_cancer', 'epilepsy']

In [None]:
ehr_1K_data = EHRData(PATH_1K, labels, age_start=240, age_stop=360, age_in_months=True, lazy_load_gpu=False)
demograph_dims, rec_dims, demograph_dims_wd, rec_dims_wd = get_all_emb_dims(EhrVocabList.load(PATH_1K))
train_dl, valid_dl, train_pos_wts, valid_pos_wts = ehr_1K_data.get_data(bs=1024)

#### `EHR_LSTM`

In [None]:
model = EHR_LSTM(demograph_dims, rec_dims, demograph_dims_wd, rec_dims_wd, num_labels=len(labels)).to(DEVICE)
train_loss_fn, valid_loss_fn = get_loss_fn(train_pos_wts), get_loss_fn(valid_pos_wts)
optimizer = torch.optim.Adagrad(model.parameters())

In [None]:
len(train_dl), len(valid_dl)

(1, 1)

In [None]:
model

EHR_LSTM(
  (embs): ModuleList(
    (0): Embedding(40, 8)
    (1): Embedding(16, 8)
    (2): Embedding(128, 8)
    (3): Embedding(8, 8)
    (4): Embedding(8, 8)
    (5): Embedding(8, 8)
    (6): Embedding(8, 8)
    (7): Embedding(248, 16)
    (8): Embedding(208, 16)
    (9): Embedding(8, 8)
    (10): Embedding(184, 16)
  )
  (embgs): ModuleList(
    (0): EmbeddingBag(536, 16, mode=mean)
    (1): EmbeddingBag(32, 8, mode=mean)
    (2): EmbeddingBag(56, 8, mode=mean)
    (3): EmbeddingBag(232, 16, mode=mean)
    (4): EmbeddingBag(16, 8, mode=mean)
    (5): EmbeddingBag(144, 8, mode=mean)
    (6): EmbeddingBag(184, 16, mode=mean)
    (7): EmbeddingBag(24, 8, mode=mean)
  )
  (input_dp): InputDropout()
  (lstm): LSTM(88, 88, num_layers=4, batch_first=True, dropout=0.3)
  (lin): Sequential(
    (0): Linear(in_features=208, out_features=416, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=416, out_features=832, bias=True)
    (4): ReL

In [None]:
h_1K = RunHistory(labels)

`use_amp=False`

In [None]:
%time h_1K = fit(10, h_1K, model, train_loss_fn, valid_loss_fn, optimizer, auroc_score, \
              train_dl, valid_dl, to_chkpt_path=MODEL_STORE, lazy=False, from_chkpt_path=None, verbosity=1, use_amp=False)

epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    0 |          1.401 | [0.520 0.368 0.570 0.498]             24.931 | [0.613 0.733 0.975 0.814]
    1 |         27.871 | [0.537 0.838 0.925 0.729]              6.179 | [0.449 0.637 0.760 0.731]
    2 |          7.961 | [0.466 0.678 0.718 0.666]              1.321 | [0.648 0.575 0.406 0.644]
    3 |          1.366 | [0.723 0.511 0.626 0.552]              1.261 | [0.654 0.707 0.977 0.837]
    4 |          1.258 | [0.752 0.764 0.909 0.719]              1.167 | [0.661 0.688 0.982 0.840]
    5 |          1.144 | [0.754 0.871 0.924 0.713]              1.084 | [0.654 0.704 0.979 0.836]
    6 |          1.044 | [0.747 0.865 0.920 0.732]              1.356 | [0.661 0.699 0.980 0.830]
    7 |          1.285 | [0.756 0.868 0.921 0.741]              1.776 | [0.679 0.722 0.982 0.838]
    8 |          1.730

`use_amp=True`

In [None]:
%time h_1K = fit(10, h_1K, model, train_loss_fn, valid_loss_fn, optimizer, auroc_score, \
              train_dl, valid_dl, to_chkpt_path=MODEL_STORE, lazy=False, from_chkpt_path=None, verbosity=1, use_amp=True)

epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    0 |          1.140 | [0.774 0.877 0.937 0.737]              1.113 | [0.692 0.717 0.986 0.836]
    1 |          1.073 | [0.777 0.878 0.931 0.737]              1.070 | [0.694 0.720 0.986 0.841]
    2 |          0.989 | [0.788 0.884 0.968 0.762]              1.049 | [0.698 0.712 0.987 0.843]
    3 |          0.937 | [0.785 0.907 0.957 0.779]              1.031 | [0.705 0.716 0.987 0.841]
    4 |          0.924 | [0.783 0.894 0.953 0.771]              1.033 | [0.699 0.714 0.989 0.841]
    5 |          0.873 | [0.771 0.914 0.954 0.775]              1.169 | [0.695 0.710 0.990 0.840]
    6 |          0.884 | [0.810 0.899 0.954 0.792]              1.122 | [0.701 0.725 0.992 0.845]
    7 |          1.016 | [0.814 0.911 0.954 0.774]              1.074 | [0.695 0.718 0.992 0.845]
    8 |          0.874

#### `EHR_CNN`

In [None]:
model = EHR_CNN(demograph_dims, rec_dims, demograph_dims_wd, rec_dims_wd, num_labels=len(labels)).to(DEVICE)
train_loss_fn, valid_loss_fn = get_loss_fn(train_pos_wts), get_loss_fn(valid_pos_wts)
optimizer = torch.optim.Adagrad(model.parameters())

In [None]:
model

EHR_CNN(
  (embs): ModuleList(
    (0): Embedding(40, 8)
    (1): Embedding(16, 8)
    (2): Embedding(128, 8)
    (3): Embedding(8, 8)
    (4): Embedding(8, 8)
    (5): Embedding(8, 8)
    (6): Embedding(8, 8)
    (7): Embedding(248, 16)
    (8): Embedding(208, 16)
    (9): Embedding(8, 8)
    (10): Embedding(184, 16)
  )
  (embgs): ModuleList(
    (0): EmbeddingBag(536, 16, mode=mean)
    (1): EmbeddingBag(32, 8, mode=mean)
    (2): EmbeddingBag(56, 8, mode=mean)
    (3): EmbeddingBag(232, 16, mode=mean)
    (4): EmbeddingBag(16, 8, mode=mean)
    (5): EmbeddingBag(144, 8, mode=mean)
    (6): EmbeddingBag(184, 16, mode=mean)
    (7): EmbeddingBag(24, 8, mode=mean)
  )
  (input_dp): InputDropout()
  (lin): Sequential(
    (0): Linear(in_features=632, out_features=1264, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=1264, out_features=2528, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.3, inplace=False)
    (6): Li

In [None]:
h_cnn_1K = RunHistory(labels)

`use_amp=False`

In [None]:
%time h_cnn_1K = fit(10, h_cnn_1K, model, train_loss_fn, valid_loss_fn, optimizer, auroc_score, \
              train_dl, valid_dl, to_chkpt_path=MODEL_STORE, lazy=False, from_chkpt_path=None, verbosity=1, use_amp=False)

epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    0 |          1.379 | [0.461 0.568 0.404 0.516]           1325.173 | [0.355 0.666 0.272 0.362]
    1 |       1548.317 | [0.430 0.668 0.451 0.421]              1.954 | [0.584 0.641 0.974 0.837]
    2 |          2.631 | [0.627 0.467 0.758 0.576]              1.285 | [0.703 0.737 0.971 0.713]
    3 |          1.308 | [0.744 0.872 0.870 0.701]              1.396 | [0.690 0.728 0.980 0.799]
    4 |          1.504 | [0.764 0.880 0.914 0.719]              1.529 | [0.688 0.730 0.979 0.842]
    5 |          1.585 | [0.740 0.885 0.947 0.761]              1.241 | [0.697 0.765 0.986 0.815]
    6 |          1.227 | [0.775 0.836 0.913 0.747]              1.157 | [0.694 0.737 0.986 0.825]
    7 |          1.099 | [0.781 0.889 0.943 0.754]              1.234 | [0.688 0.739 0.984 0.809]
    8 |          1.177

`use_amp=True`

In [None]:
%time h_cnn_1K = fit(10, h_cnn_1K, model, train_loss_fn, valid_loss_fn, optimizer, auroc_score, \
              train_dl, valid_dl, to_chkpt_path=MODEL_STORE, lazy=False, from_chkpt_path=None, verbosity=1, use_amp=True)

epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    0 |          1.010 | [0.786 0.880 0.958 0.791]              1.079 | [0.691 0.728 0.982 0.823]
    1 |          1.030 | [0.773 0.912 0.959 0.787]              1.099 | [0.691 0.728 0.981 0.822]
    2 |          0.922 | [0.788 0.921 0.960 0.772]              1.050 | [0.695 0.726 0.979 0.824]
    3 |          0.885 | [0.796 0.927 0.971 0.786]              1.191 | [0.693 0.719 0.972 0.800]
    4 |          0.812 | [0.787 0.939 0.968 0.793]              1.124 | [0.683 0.711 0.975 0.793]
    5 |          0.787 | [0.836 0.935 0.982 0.821]              3.587 | [0.659 0.674 0.962 0.732]
    6 |          1.960 | [0.827 0.897 0.960 0.799]              3.587 | [0.659 0.674 0.962 0.732]
    7 |          2.126 | [0.818 0.898 0.939 0.786]              3.587 | [0.659 0.674 0.962 0.732]
    8 |          2.118

### 10K

In [None]:
labels = ['diabetes', 'stroke', 'alzheimers', 'coronary_heart', 'lung_cancer', 'epilepsy']

In [None]:
ehr_10K_data = EHRData(PATH_10K, labels, age_start=240, age_stop=360, age_in_months=True, lazy_load_gpu=False)
demograph_dims, rec_dims, demograph_dims_wd, rec_dims_wd = get_all_emb_dims(EhrVocabList.load(PATH_10K), αd=5)
train_dl, valid_dl, train_pos_wts, valid_pos_wts = ehr_10K_data.get_data(bs=512)

In [None]:
len(train_dl), len(valid_dl)

(13, 3)

In [None]:
ehr_10K_data.splits.train

PatientList (6645 items)
base path:/home/vinod/.lemonpie/datasets/synthea/10K; split:train; age span:120 months
age_start:240; age_stop:360; age_type:months
ptid:891ff39a-15fe-4214-998b-4c1633307855, birthdate:1973-06-27, [('diabetes', False), ('stroke', False)].., device:cuda:0
ptid:b24eb6c6-3c0a-4034-b322-4dc961f9ea37, birthdate:1962-03-05, [('diabetes', False), ('stroke', False)].., device:cuda:0
ptid:14166659-9295-42a9-b1d5-aed8e974eece, birthdate:2018-08-23, [('diabetes', False), ('stroke', False)].., device:cuda:0
ptid:1bd6b4a0-2f31-40fe-9d1f-a43a4d26d9db, birthdate:1954-08-02, [('diabetes', True), ('stroke', False)].., device:cuda:0
ptid:dd40e4ec-56ea-4a7b-8be2-6d287236df1c, birthdate:1964-01-27, [('diabetes', False), ('stroke', False)].., device:cuda:0
ptid:b46bf715-0c95-4688-992d-448d3295067a, birthdate:1964-10-28, [('diabetes', False), ('stroke', False)].., device:cuda:0
ptid:3ae3378b-e29f-420e-9eaf-2bee003e1a75, birthdate:2008-10-27, [('diabetes', False), ('stroke', False)].

In [None]:
demograph_dims_wd, rec_dims_wd

(848, 752)

#### `EHR_LSTM`

In [None]:
model = EHR_LSTM(demograph_dims, rec_dims, demograph_dims_wd, rec_dims_wd, num_labels=len(labels), linear_layers=2).to(DEVICE)
train_loss_fn, valid_loss_fn = get_loss_fn(train_pos_wts), get_loss_fn(valid_pos_wts)
optimizer = torch.optim.Adagrad(model.parameters())

In [None]:
count_parameters(model)

total: 44,343,094, trainable: 44,343,094, non_trainable: 0


(44343094, 44343094, 0)

In [None]:
model

EHR_LSTM(
  (embs): ModuleList(
    (0): Embedding(33, 72)
    (1): Embedding(14, 56)
    (2): Embedding(124, 96)
    (3): Embedding(5, 48)
    (4): Embedding(8, 48)
    (5): Embedding(26, 64)
    (6): Embedding(4, 40)
    (7): Embedding(328, 128)
    (8): Embedding(332, 128)
    (9): Embedding(3, 40)
    (10): Embedding(306, 128)
  )
  (embgs): ModuleList(
    (0): EmbeddingBag(658, 152, mode=mean)
    (1): EmbeddingBag(27, 64, mode=mean)
    (2): EmbeddingBag(55, 80, mode=mean)
    (3): EmbeddingBag(247, 120, mode=mean)
    (4): EmbeddingBag(11, 56, mode=mean)
    (5): EmbeddingBag(144, 104, mode=mean)
    (6): EmbeddingBag(215, 112, mode=mean)
    (7): EmbeddingBag(20, 64, mode=mean)
  )
  (input_dp): InputDropout()
  (lstm): LSTM(752, 752, num_layers=4, batch_first=True, dropout=0.3)
  (lin): Sequential(
    (0): Linear(in_features=1608, out_features=3216, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=3216, out_features=64

##### `use_amp=False`

In [None]:
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.synchronize()

In [None]:
h_10K = RunHistory(labels)

In [None]:
%time h_10K = fit(5, h_10K, model, train_loss_fn, valid_loss_fn, optimizer, auroc_score, \
              train_dl, valid_dl, to_chkpt_path=MODEL_STORE, lazy=False, from_chkpt_path=None, verbosity=1, use_amp=False)

epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    0 |         23.472 | [0.512 0.525 0.587 0.508]              1.300 | [0.740 0.821 0.897 0.734]
    1 |          1.170 | [0.653 0.758 0.779 0.696]              1.465 | [0.749 0.836 0.927 0.761]
    2 |          1.140 | [0.653 0.787 0.826 0.700]              1.179 | [0.760 0.836 0.934 0.767]
    3 |          0.961 | [0.743 0.843 0.920 0.772]              1.080 | [0.760 0.839 0.948 0.782]
    4 |          0.896 | [0.759 0.872 0.947 0.792]              1.075 | [0.739 0.837 0.949 0.784]
Checkpointed to "/home/vinod/.lemonpie/models/checkpoint.tar"
CPU times: user 3min 6s, sys: 1.7 s, total: 3min 8s
Wall time: 2min 45s


In [None]:
print(f'{torch.cuda.max_memory_allocated():,}')

6,551,656,960


##### `use_amp=True`

In [None]:
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.synchronize()

In [None]:
%time h_10K = fit(5, h_10K, model, train_loss_fn, valid_loss_fn, optimizer, auroc_score, \
              train_dl, valid_dl, to_chkpt_path=MODEL_STORE, lazy=False, from_chkpt_path=None, verbosity=1, use_amp=True)

epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    0 |          0.579 | [0.848 0.924 0.978 0.889]              1.328 | [0.715 0.825 0.945 0.760]
    1 |          0.538 | [0.843 0.924 0.982 0.889]              1.839 | [0.740 0.827 0.942 0.758]
    2 |          0.500 | [0.865 0.933 0.983 0.900]              2.029 | [0.748 0.828 0.941 0.757]
    3 |          0.492 | [0.861 0.934 0.983 0.903]              2.426 | [0.734 0.826 0.940 0.760]
    4 |          0.457 | [0.847 0.942 0.987 0.919]              1.795 | [0.732 0.825 0.941 0.757]
Checkpointed to "/home/vinod/.lemonpie/models/checkpoint.tar"
CPU times: user 2min 39s, sys: 1.48 s, total: 2min 40s
Wall time: 2min 18s


In [None]:
print(f'{torch.cuda.max_memory_allocated():,}')

3,736,733,696


#### `EHR_CNN`

In [None]:
model = EHR_CNN(demograph_dims, rec_dims, demograph_dims_wd, rec_dims_wd, num_labels=len(labels), linear_layers=2).to(DEVICE)
train_loss_fn, valid_loss_fn = get_loss_fn(train_pos_wts), get_loss_fn(valid_pos_wts)
optimizer = torch.optim.Adagrad(model.parameters())

In [None]:
count_parameters(model)

total: 19,079,534, trainable: 19,079,534, non_trainable: 0


(19079534, 19079534, 0)

In [None]:
model

EHR_CNN(
  (embs): ModuleList(
    (0): Embedding(33, 72)
    (1): Embedding(14, 56)
    (2): Embedding(124, 96)
    (3): Embedding(5, 48)
    (4): Embedding(8, 48)
    (5): Embedding(26, 64)
    (6): Embedding(4, 40)
    (7): Embedding(328, 128)
    (8): Embedding(332, 128)
    (9): Embedding(3, 40)
    (10): Embedding(306, 128)
  )
  (embgs): ModuleList(
    (0): EmbeddingBag(658, 152, mode=mean)
    (1): EmbeddingBag(27, 64, mode=mean)
    (2): EmbeddingBag(55, 80, mode=mean)
    (3): EmbeddingBag(247, 120, mode=mean)
    (4): EmbeddingBag(11, 56, mode=mean)
    (5): EmbeddingBag(144, 104, mode=mean)
    (6): EmbeddingBag(215, 112, mode=mean)
    (7): EmbeddingBag(20, 64, mode=mean)
  )
  (input_dp): InputDropout()
  (lin): Sequential(
    (0): Linear(in_features=1368, out_features=2736, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=2736, out_features=5472, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.3, inpl

##### `use_amp=False`

In [None]:
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.synchronize()



In [None]:
h_cnn_10K = RunHistory(labels)

In [None]:
%time h_cnn_10K = fit(5, h_cnn_10K, model, train_loss_fn, valid_loss_fn, optimizer, auroc_score, \
              train_dl, valid_dl, to_chkpt_path=MODEL_STORE, lazy=False, from_chkpt_path=None, verbosity=1, use_amp=False)

epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    0 |         16.057 | [0.616 0.655 0.661 0.608]              1.182 | [0.761 0.836 0.940 0.788]
    1 |          0.928 | [0.736 0.870 0.940 0.817]              1.054 | [0.772 0.838 0.951 0.797]
    2 |          0.754 | [0.795 0.899 0.968 0.838]              1.227 | [0.763 0.832 0.951 0.781]
    3 |          0.624 | [0.827 0.927 0.976 0.888]              1.492 | [0.763 0.829 0.947 0.780]
    4 |          0.481 | [0.868 0.946 0.989 0.911]              1.776 | [0.756 0.830 0.946 0.772]
Checkpointed to "/home/vinod/.lemonpie/models/checkpoint.tar"
CPU times: user 2min 34s, sys: 1.45 s, total: 2min 36s
Wall time: 2min 17s


In [None]:
print(f'{torch.cuda.max_memory_allocated():,}')

5,898,519,040


##### `use_amp=True`

In [None]:
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.synchronize()



In [None]:
%time h_cnn_10K = fit(5, h_cnn_10K, model, train_loss_fn, valid_loss_fn, optimizer, auroc_score, \
              train_dl, valid_dl, to_chkpt_path=MODEL_STORE, lazy=False, from_chkpt_path=None, verbosity=1, use_amp=True)

epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    0 |          0.384 | [0.890 0.959 0.991 0.954]              2.337 | [0.758 0.829 0.946 0.763]
    1 |          0.306 | [0.925 0.975 0.995 0.968]              2.684 | [0.743 0.828 0.945 0.759]
    2 |          0.247 | [0.948 0.983 0.996 0.981]              3.107 | [0.727 0.822 0.943 0.757]
    3 |          0.205 | [0.957 0.991 0.997 0.988]              3.592 | [0.745 0.823 0.941 0.750]
    4 |          0.161 | [0.985 0.993 0.998 0.990]              4.492 | [0.735 0.824 0.941 0.742]
Checkpointed to "/home/vinod/.lemonpie/models/checkpoint.tar"
CPU times: user 2min 37s, sys: 1.39 s, total: 2min 39s
Wall time: 2min 25s


In [None]:
print(f'{torch.cuda.max_memory_allocated():,}')

6,260,646,912


#### Predict

In [None]:
test_dl, test_pos_wts = ehr_10K_data.get_test_data()

In [None]:
len(test_dl), test_pos_wts

(18, tensor([ 13.,  17.,  34.,  15., 147.,  53.]))

In [None]:
test_loss_fn = get_loss_fn(test_pos_wts)

In [None]:
h_cnn_10K = predict(h_cnn_10K, model, test_loss_fn, auroc_score, test_dl, chkpt_path=MODEL_STORE)

From "/home/vinod/.lemonpie/models/checkpoint.tar", loading model ...
test loss = 4.048989103900062
test aurocs = [0.712495 0.830429 0.917362 0.753938 0.81881  0.525422]
