## Step 1

<span style="color: #006bff; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">🏷️ Environment Setup</span>


In [None]:
# Pydicom used for dcm image resize for Inference
# !conda install '/kaggle/input/pydicom-conda-helper/libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2' -c conda-forge -y
# !conda install '/kaggle/input/pydicom-conda-helper/libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -c conda-forge -y
# !conda install '/kaggle/input/pydicom-conda-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
# !conda install '/kaggle/input/pydicom-conda-helper/conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
# !conda install '/kaggle/input/pydicom-conda-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
# !conda install '/kaggle/input/pydicom-conda-helper/openssl-1.1.1k-h7f98852_0.tar.bz2' -c conda-forge -y

In [None]:
# !pip install -U fastai --upgrade
!pip install -q git+https://github.com/rwightman/pytorch-image-models.git
!pip install -q iterative-stratification
!pip install -q wandb --upgrade
!pip install -q nbdev

!pip install timm

In [None]:
# # !pip list | grep "torch\|cuda\|fast"

# #Restart Kernel    
# import IPython
# IPython.Application.instance().kernel.do_shutdown(True) #automatically restarts kernel

In [None]:
import os

device = 'CPU'
if 'TPU_NAME' in os.environ.keys():
    if os.environ['XRT_TPU_CONFIG'] is not None: device = 'TPU'
elif 'CUDA_VERSION' in os.environ.keys():
    if os.environ['CUDA_VERSION'] is not None: device = 'GPU'

print(device)

<span style="color: #006bff; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">Imports and Seeding</span>


In [None]:
import pandas as pd
import timm
from timm import *

from fastai.vision.all import *
from fastai.vision.learner import _update_first_layer
from fastai.callback.wandb import *
from nbdev.showdoc import show_doc

In [None]:
PATH = '/kaggle/input/siim-covid19-512-images-and-metadata/train'
path_model_save = Path('/kaggle/working/models')

<span style="color: #006bff; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">Data Preparation</span>

In [None]:
df_study_lvl = pd.read_csv("../input/siim-covid19-detection/train_study_level.csv")
df_study_lvl.rename({'id':'study_id',
                      'Negative for Pneumonia':'negative',
                      'Typical Appearance':'typical',
                      'Indeterminate Appearance':'indeterminate',
                      'Atypical Appearance':'atypical'}, axis=1, inplace=True)

df_image_lvl = pd.read_csv('/kaggle/input/siim-covid19-detection/train_image_level.csv')
df_image_lvl['study_id'] = df_image_lvl['StudyInstanceUID'].apply(lambda idx: idx+"_study")

df_annotations = df_image_lvl.merge(df_study_lvl, on='study_id', how='outer')
df_annotations.head(3)

In [None]:
# df_image_level = pd.read_csv('/kaggle/input/siim-covid19-detection/train_image_level.csv')
df_annotations['image_path'] = df_annotations['id'].map(lambda x:os.path.join(PATH,
                                                                              str(x)+'.png'))

In [None]:
label_names = ['negative','typical','indeterminate','atypical']

def get_labels(row):
    labels_str = ''
    for key in label_names:
        if row[key]==1:
             labels_str = labels_str+' '+key if labels_str else key
    return labels_str

df_annotations['labels'] = df_annotations[label_names].apply(get_labels, axis=1)

In [None]:
# timm.list_models('efficientnet*')

<span style="color: #000000; font-family: Segoe UI; font-size: 1.2em; font-weight: 400;">📌 Architecture with the tf_ prefix are the original weights ported from Google, so it uses manual padding to match TensorFlow's "same" padding, which adds GPU overhead and a general slowdown.</span>

<span style="color: #006bff; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">⚙️ Global Configuration & Seed</span>


In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    if device=='GPU':
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True

In [None]:
class Config:
    seed_val = 111
    seed_everything(seed_val)
    fold_num = 0
    job = 1
    num_classes = 4
    input_dims = 512
    model_arch = "efficientnetv2_rw_s"
    batch_size = 32
    num_workers = 0
    kfold = 5
    loss_func = CrossEntropyLossFlat() # or LabelSmoothingCrossEntropyFlat()
    metrics = [error_rate, accuracy, RocAuc(average='macro'), F1Score(average='macro')]
    job_name = f'{model_arch}_fold{fold_num}_job{job}'
    print("Job Name:", job_name)

    wandb_project = 'SIIM_classifier_public'
    wandb_run_name = job_name
    
    if device=='GPU':
        fp16 = True
    else:
        fp16 = False
    
cfg = Config()

In [None]:
# Converting global config class object to a dictionary to log using Wandb

config_dict = dict(vars(Config))
config_dict = {k:(v if type(v)==int else str(v)) for (k,v) in config_dict.items() if '__' not in k}
config_dict

<span style="color: #006bff; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">KFold Split</span>

In [None]:
print('Unique labels:', df_annotations.labels.unique())

Apparantly, only one class is assigned to each image and therefore this is a multiclass classification problem.

In [None]:
from sklearn.model_selection import GroupKFold, train_test_split

df_annotations['fold'] = -1
grp_kfold  = GroupKFold(n_splits = cfg.kfold)
for fold, (train_index, val_index) in enumerate(grp_kfold.split(df_annotations,
                                                              groups=df_annotations.study_id.tolist())):
    df_annotations.loc[val_index, 'fold'] = fold
df_annotations.sample(3)

<span style="color: #006bff; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">Augmentation and Dataloader Preparation</span>

In [None]:
import albumentations as A

# Source: https://forums.fast.ai/t/albumentation-transformations-for-train-and-test-dataset/82642
class AlbumentationsTransform(RandTransform):
    split_idx,order=None,2
    def __init__(self, train_aug, valid_aug): store_attr()
    
    def before_call(self, b, split_idx):
        self.idx = split_idx
    
    def encodes(self, img: PILImage):
        if self.idx == 0:
            aug_img = self.train_aug(image=np.array(img))['image']
        else:
            aug_img = self.valid_aug(image=np.array(img))['image']
        return PILImage.create(aug_img)
    
def get_train_aug(): 
    return A.Compose([
#         A.RandomResizedCrop(cfg.input_dims,cfg.input_dims), 
        A.Resize(cfg.input_dims, cfg.input_dims, p=1.0),
        A.HorizontalFlip(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.02, scale_limit=0.1, rotate_limit=10, p=0.5),
        A.IAAPerspective(scale=(0.02, 0.04), p=0.5),
        A.RandomBrightnessContrast(0.1, 0.1, p=0.5),
        A.OneOf([A.CLAHE(),
                 A.HueSaturationValue(0.2, 0.2, 0.2, p=0.5)
                ],p=0.4),
        A.OneOf([A.CoarseDropout(),
                 A.Cutout()], p=0.5)
    ])

def get_valid_aug():
    return A.Compose([A.Resize(cfg.input_dims, cfg.input_dims, p=1.0)], p=1.0)

## Batch tfms on gpu --> so faster
## Only fastai has it so make use of these transformations as much as possible
## Check augtransforms
## item tfms on cpu and happens to one image at a time
## RandomResizedCrop behaves differently with train and val transforms (tries to get the biggeset centre crop from an image)

item_tfms = AlbumentationsTransform(get_train_aug(), get_valid_aug())
batch_tfms = [Normalize.from_stats(*imagenet_stats)]

<span style="color: #000000; font-family: Segoe UI; font-size: 1.2em; font-weight: 400;">📌 Fastai provideds two ways to perform augmentations:</span>

<span style="color: #000000; font-family: Segoe UI; font-size: 1.1em; font-weight: 400;">&emsp;&emsp; - Item transforms (item_tfms) which are computed on the CPU</span>

<span style="color: #000000; font-family: Segoe UI; font-size: 1.1em; font-weight: 400;">&emsp;&emsp; - Batch transforms (batch_tfms) are done on the GPU on whole batches.</span>

<span style="color: #000000; font-family: Segoe UI; font-size: 1.1em; font-weight: 400;">If there is sufficient memory left in the GPU after loading the model into memory, batch transforms can be used to perform augmentations as it would result in a major boost in training speed.</span>

In [None]:
## Explain different dataset and data loading mechanisms

val_indices = list(df_annotations[df_annotations['fold'] == cfg.fold_num].image_path.unique())

data_block = DataBlock(blocks=(ImageBlock, CategoryBlock),
                   splitter=MaskSplitter(list(df_annotations['fold'] == fold)),
                   get_x=ColReader('image_path'),
                   get_y=ColReader('labels'),
                   item_tfms=item_tfms,
                   batch_tfms=batch_tfms)

dls = data_block.dataloaders(df_annotations,
                            bs=cfg.batch_size,
                            num_workers=cfg.num_workers)

dls.show_batch(figsize=(18,15), max_n=8, nrows=2)

In [None]:
print("Class2Index Mapping:")
dls.vocab.o2i

<span style="color: #006bff; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">Create Fastai Learner</span>

Essentially to use TIMM models in fastai, we have to take the model architecuture and slice off the last Linear layer (resulting in a "body" that outputs unpooled features). We devise a function to acheive this by taking the `create_body` fastai function called in [create_cnn_model](https://github.com/fastai/fastai/blob/eda1a2e50980b1ec2df127ae431b8bdbf1a84877/fastai/vision/learner.py#L139) as an example.

Now we need to create a head and for this we need to calculate the number of input features our head needs to have with the `num_features_model` method. We then pass concat_pool=True to have fastai create a head with two pooling layers: AdaptiveConcatPool2d and nn.AdaptiveAvgPool2d.

We then wrap the two in a `nn.Sequential` and we now have a PyTorch model ready to be trained.

We pass this to a `Learner`, specifying our splitter to be the  `default_splitter` which expects the body in `model[0]` and the head in `model[1]` to split our layer groups and we enable mixed precision training by calling the `to_non_native_fp16()` method. 

Please note that if you're facing NaN issues while using mixed precision, you may need to use try the `native to_fp16()` method or even try tuning the growth_factor parameter of the `to_non_native_fp16()` method to resolve it. The mixed precision feature does have some compatibility issues and it may throw errors if used with some fastai metrics.

For further reference: https://walkwithfastai.com/vision.external.timm

<br>

<span style="color: #000000; font-family: Segoe UI; font-size: 1.2em; font-weight: 400;">📌 Let's look at the original fastai functions before we build ours. An easy way to see the usage and details of a fastai class or function is to use the `doc()` function</span>

In [None]:
# doc(create_body)

In [None]:
# doc(create_model)

In [None]:
# doc(create_body)

In [None]:
# Adapted from https://walkwithfastai.com/vision.external.timm

from fastai.vision.learner import _add_norm

def create_timm_body(arch:str, pretrained=True, cut=None, n_in=3):
    "Creates a body from any model in the `timm` library."
    model = create_model(arch, pretrained=pretrained, num_classes=0, global_pool='')
    _update_first_layer(model, n_in, pretrained)
    if cut is None:
        ll = list(enumerate(model.children()))
        cut = next(i for i,o in reversed(ll) if has_pool_type(o))
    if isinstance(cut, int): return nn.Sequential(*list(model.children())[:cut])
    elif callable(cut): return cut(model)
    else: raise NamedError("cut must be either integer or function")

def create_timm_model(arch:str, n_out, cut=None, pretrained=True, n_in=3,
                      init=nn.init.kaiming_normal_, custom_head=None,
                      concat_pool=True, **kwargs):
    "Create custom architecture using `arch`, `n_in` and `n_out` from the `timm` library"
    body = create_timm_body(arch, pretrained, None, n_in)
    if custom_head is None:
        nf = num_features_model(nn.Sequential(*body.children()))
        head = create_head(nf, n_out, concat_pool=concat_pool, **kwargs)
    else: head = custom_head
    model = nn.Sequential(body, head)
    if init is not None: apply_init(model[1], init)
    return model

def timm_learner(dls, arch:str, loss_func=None, pretrained=True, cut=None, splitter=None,
                y_range=None, config=None, n_out=None, normalize=True, fp16=False, **kwargs):
    "Build a convnet style learner from `dls` and `arch` using the `timm` library"
    if config is None: config = {}
    if n_out is None: n_out = get_c(dls)
    assert n_out, "`n_out` is not defined, and could not be inferred from data, set `dls.c` or pass `n_out`"
    if y_range is None and 'y_range' in config: y_range = config.pop('y_range')
    model = create_timm_model(arch, n_out, default_split, pretrained, y_range=y_range, **config)
    learn = Learner(dls, model, loss_func=loss_func, splitter=default_split, **kwargs)
    if pretrained: learn.freeze()
    
    # Enable Mixed Precision Training
    if fp16: learn.to_non_native_fp16()
#     if fp16: learn.to_fp16(growth_factor=1.0)
    return learn

<span style="color: #006bff; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">Configure Wandb Callback</span>

In [None]:
# doc(WandbCallback)

In [None]:
# import wandb
# # from kaggle_secrets import UserSecretsClient

# # user_secrets = UserSecretsClient()

# # # I have saved my API token with "wandb_api" as the Label. 
# # # If you use some other Label make sure to change the same below. 
# # wandb_api = user_secrets.get_secret("wandb_key") 

# # wandb.login(key=wandb_api)
# wandb.login()

# wandb.init(project=cfg.wandb_project, name=cfg.wandb_run_name, config=config_dict)

<span style="color: #006bff; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">💽 Training</span>

In [None]:
cbs = [
#     WandbCallback(log='gradients',
#                   log_preds=True,
#                   log_model=True,
#                   log_dataset=False,
#                   dataset_name=None,
#                   valid_dl=None,
#                   n_preds=36,
#                   seed=cfg.seed_val,
#                   reorder=True),   
    SaveModelCallback(monitor='valid_loss',
                      comp=None,
                      min_delta=0.0,
                      fname=cfg.job_name,
                      every_epoch=False,
                      with_opt=False,
                      reset_on_fit=True)
      ]

In [None]:
learn = timm_learner(dls,
                     cfg.model_arch,
                     loss_func=cfg.loss_func,
                     pretrained=True,
                     opt_func=ranger,
#                      splitter=default_split,
                     fp16=cfg.fp16,
                     metrics=cfg.metrics,
                     cbs=cbs)
# learn.summary()

<span style="color: #006bff; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">Transfer Learning with Pre-trained Weights</span>


In [None]:
frozen_params = filter(lambda p: not p.requires_grad, learn.model.parameters())
unfrozen_params = filter(lambda p: p.requires_grad, learn.model.parameters())

print(f'Total Parameters: {sum([np.prod(p.size()) for p in learn.model.parameters()])}')
print(f'Frozen Parameters: {sum([np.prod(p.size()) for p in frozen_params])}')
print(f'Unfrozen Parameters: {sum([np.prod(p.size()) for p in unfrozen_params])}')

The default learning rates may not be the best match for every architecture we try. We want the learning rate to be high so that the training happens quickly and the model converges faster but, not so high as to skip minimums.

Fastai uses a idea developed by Leslie Smith (yes, the same person who invented the learning rate finder!) in his article "Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates". He designed a schedule for learning rate separated into two phases: one where the learning rate grows from the minimum value to the maximum value (warmup), and one where it decreases back to the minimum value (annealing). Smith called this combination of approaches 1cycle training. We also also call this schedule cosine annealing.

1cycle training allows us to use a much higher maximum learning rate than other types of training, which gives two benefits:

* By training with higher learning rates, we train faster—a phenomenon Smith named super-convergence.
* By training with higher learning rates, we overfit less because we skip over the sharp local minima to end up in a smoother (and therefore more generalizable) part of the loss.

We can use 1cycle training in fastai by calling `fit_one_cycle`.

In [None]:
# learn.fit_one_cycle(10, 5e-3)
learn.fit_one_cycle(3, 5e-3)

<span style="color: #006bff; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">Fine-tuning of All Layers by Unfreezing</span>

<span style="color: #000000; font-family: Segoe UI; font-size: 1.2em;">📌 Fastai lets us pass a Python slice object anywhere that a learning rate is expected. The first value passed will be the learning rate in the earliest layer of the neural network. The second value will be the learning rate in the final layer and the layers in between will have learning rates that are multiplicatively equidistant throughout that range. </span>

<span style="color: #000000; font-family: Segoe UI; font-size: 1.2em;">The deepest layers of our pretrained model might not need as high a learning rate as the last ones, so we should probably use different learning rates for those. This is known as using <span style="font-weight: 600;">Discriminative Learning Rates</span>.</span>


In [None]:
from fastai.callback.tracker import SaveModelCallback, CancelStepException
model_save_name = path_model_save / 'step1-StudyClass-4Cl'
sm = SaveModelCallback(fname=str(model_save_name))

In [None]:

# Save Model
learn.unfreeze()
# learn.fit_one_cycle(3, lr_max=slice(1e-7, 5e-5), cbs=sm)
learn.fit_one_cycle(1, lr_max=slice(1e-7, 5e-5), cbs=sm)

In [None]:
# learn.load(file=cfg.job_name)

# interp = ClassificationInterpretation.from_learner(learn)
# interp.plot_confusion_matrix(figsize=(8,8), dpi=60)

<span style="color: #000000; font-family: Segoe UI; font-size: 1.2em;">📌 Learner already has everything it needs to perform inference</span>

In [None]:
# # Predict
# image_paths = df_annotations[df_annotations.fold==0].image_path[:3].tolist()
# test_dl = learn.dls.test_dl(image_paths)
# preds = learn.get_preds(dl=test_dl)

# class_idxs = preds[0].argmax(dim=1)
# res = [dls.vocab[c] for c in class_idxs]

# print("Actual Labels:", res)
# print("Predicted Labels:", list(df_annotations[df_annotations.fold==0].labels[:3]))

# Run Predictions on Test Image Subset

Outline of Model Inference:  
* Study Prediction - 4 Class (EffDet)  
* Study Prediction - 2 Class (EffDet)  
- not sure why doing the 2 class eval
* Image Object Detection - Yolo5

In [None]:
# #Predictions on Test
# # Inference adapted from: https://www.kaggle.com/bnapora/siim-cov19-efnb7-yolov5-infer

# # import os

# # from PIL import Image
# # import pandas as pd
# # from tqdm.auto import tqdm

# # import numpy as np
# import pydicom
# from pydicom.pixel_data_handlers.util import apply_voi_lut

### Prepare Test Images

In [None]:
# df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
# if df.shape[0] == 2477:
#     fast_sub = True
#     fast_df = pd.DataFrame(([['00086460a852_study', 'negative 1 0 0 1 1'], 
#                          ['000c9c05fd14_study', 'negative 1 0 0 1 1'], 
#                          ['65761e66de9f_image', 'none 1 0 0 1 1'], 
#                          ['51759b5579bc_image', 'none 1 0 0 1 1']]), 
#                        columns=['id', 'PredictionString'])
#     print(fast_df.head())
# else:
#     fast_sub = False


In [None]:
# def read_xray(path, voi_lut = True, fix_monochrome = True):
#     # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
#     dicom = pydicom.read_file(path)
    
#     # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
#     # "human-friendly" view
#     if voi_lut:
#         data = apply_voi_lut(dicom.pixel_array, dicom)
#     else:
#         data = dicom.pixel_array
               
#     # depending on this value, X-ray may look inverted - fix that:
#     if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
#         data = np.amax(data) - data
        
#     data = data - np.min(data)
#     data = data / np.max(data)
#     data = (data * 255).astype(np.uint8)
        
#     return data

# def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
#     # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
#     im = Image.fromarray(array)
    
#     if keep_ratio:
#         im.thumbnail((size, size), resample)
#     else:
#         im = im.resize((size, size), resample)
    
#     return im

In [None]:
# image_resize = 512
# split = 'test'
# save_dir = f'/kaggle/working/test-images-{image_resize}/{split}/'
# os.makedirs(save_dir, exist_ok=True)

In [None]:
# save_dir_study = save_dir + 'study/'
# os.makedirs(save_dir_study, exist_ok=True)

# if fast_sub:
#     xray = read_xray('/kaggle/input/siim-covid19-detection/train/00086460a852/9e8302230c91/65761e66de9f.dcm')
#     im = resize(xray, size=image_resize)  
#     study = '00086460a852' + '_study.png'
#     im.save(os.path.join(save_dir_study, study))
#     xray = read_xray('/kaggle/input/siim-covid19-detection/train/000c9c05fd14/e555410bd2cd/51759b5579bc.dcm')
#     im = resize(xray, size=image_resize)  
#     study = '000c9c05fd14' + '_study.png'
#     im.save(os.path.join(save_dir_study, study))
# else:   
#     for dirname, _, filenames in tqdm(os.walk(f'/kaggle/input/siim-covid19-detection/{split}')):
#         for file in filenames:
#             # set keep_ratio=True to have original aspect ratio
#             xray = read_xray(os.path.join(dirname, file))
#             im = resize(xray, size=image_resize)  
#             study = dirname.split('/')[-2] + '_study.png'
#             im.save(os.path.join(save_dir, study))


In [None]:
# image_id = []
# dim0 = []
# dim1 = []
# splits = []
# save_dir_image = save_dir + 'image/'
# os.makedirs(save_dir_image, exist_ok=True)

# if fast_sub:
#     xray = read_xray('/kaggle/input/siim-covid19-detection/train/00086460a852/9e8302230c91/65761e66de9f.dcm')
#     im = resize(xray, size=512)  
#     im.save(os.path.join(save_dir_image,'65761e66de9f_image.png'))
#     image_id.append('65761e66de9f.dcm'.replace('.dcm', ''))
#     dim0.append(xray.shape[0])
#     dim1.append(xray.shape[1])
#     splits.append(split)
#     xray = read_xray('/kaggle/input/siim-covid19-detection/train/000c9c05fd14/e555410bd2cd/51759b5579bc.dcm')
#     im = resize(xray, size=512)  
#     im.save(os.path.join(save_dir_image, '51759b5579bc_image.png'))
#     image_id.append('51759b5579bc.dcm'.replace('.dcm', ''))
#     dim0.append(xray.shape[0])
#     dim1.append(xray.shape[1])
#     splits.append(split)
# else:
#     for dirname, _, filenames in tqdm(os.walk(f'/kaggle/input/siim-covid19-detection/{split}')):
#         for file in filenames:
#             # set keep_ratio=True to have original aspect ratio
#             xray = read_xray(os.path.join(dirname, file))
#             im = resize(xray, size=512)  
#             im.save(os.path.join(save_dir_image, file.replace('.dcm', '_image.png')))
#             image_id.append(file.replace('.dcm', ''))
#             dim0.append(xray.shape[0])
#             dim1.append(xray.shape[1])
#             splits.append(split)
# meta = pd.DataFrame.from_dict({'image_id': image_id, 'dim0': dim0, 'dim1': dim1, 'split': splits})

# meta.head()

### Study - Predictions

In [None]:
# import numpy as np 
# import pandas as pd
# if fast_sub:
#     df = fast_df.copy()
# else:
#     df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
# id_laststr_list  = []
# for i in range(df.shape[0]):
#     id_laststr_list.append(df.loc[i,'id'][-1])
# df['id_last_str'] = id_laststr_list

# study_len = df[df['id_last_str'] == 'y'].shape[0]
# print('Count of Studies: ', study_len)

In [None]:
# if fast_sub:
#     sub_df = fast_df.copy()
# else:
#     sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
    
# sub_df = sub_df[:study_len]
# test_paths = save_dir_study + sub_df['id'] +'.png'

# sub_df['negative'] = 0
# sub_df['typical'] = 0
# sub_df['indeterminate'] = 0
# sub_df['atypical'] = 0

# label_cols = sub_df.columns[2:]

In [None]:
# # Prepare new learner object
# learn_study = timm_learner(dls,
#                      cfg.model_arch,
#                      loss_func=cfg.loss_func,
#                      pretrained=True,
#                      opt_func=ranger,
# #                      splitter=default_split,
#                      fp16=cfg.fp16,
#                      metrics=cfg.metrics,
#                      cbs=cbs)
# learn_study.load('/kaggle/working/models/efficientnetv2_rw_s_fold0_job1')

In [None]:
# # Predict - Study Label
# test_dl = learn_study.dls.test_dl(test_paths)
# preds = learn_study.get_preds(dl=test_dl)

# # #Add labels
# # class_idxs = preds[0].argmax(dim=1)
# # res = [dls.vocab[c] for c in class_idxs]
# # print("Actual Labels:", res)

# #Update submission df
# sub_df[label_cols] = preds[0]
# print(sub_df.head())


In [None]:
# sub_df.columns = ['id', 'PredictionString1', 'negative', 'typical', 'indeterminate', 'atypical']
# df = pd.merge(df, sub_df, on = 'id', how = 'left')
# df.head()

## Detection 
### study string

In [None]:
# for i in range(study_len):
#     negative = df.loc[i,'negative']
#     typical = df.loc[i,'typical']
#     indeterminate = df.loc[i,'indeterminate']
#     atypical = df.loc[i,'atypical']
#     df.loc[i, 'PredictionString'] = f'negative {negative} 0 0 1 1 typical {typical} 0 0 1 1 indeterminate {indeterminate} 0 0 1 1 atypical {atypical} 0 0 1 1'

# df_study = df[['id', 'PredictionString']]
# df_study.head()

### 2 class  
-trained model with only 2 classes

In [None]:
# if fast_sub:
#     sub_df = fast_df.copy()
# else:
#     sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')

# sub_df = sub_df[study_len:]
# test_paths = save_dir_image + sub_df['id'] +'.png'

# sub_df['none'] = 0
# print(sub_df.head())

# label_cols = sub_df.columns[2]

In [None]:
# # Predict - Study Label
# test_dl = learn_study.dls.test_dl(test_paths)
# preds = learn_study.get_preds(dl=test_dl)

# #Update submission df
# sub_df[label_cols] = preds[0]
# # print(sub_df.head())

# df_2class = sub_df.reset_index(drop=True)
# print(df_2class)