## Notebook settings

In [128]:
%load_ext autoreload
%autoreload 2

%autosave 10

#%load_ext lab_black

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Autosaving every 10 seconds


In [129]:
import sys
import os

sys.path.insert(0, os.path.abspath(os.path.join("..")))

## Imports

In [130]:
from lfp_analysis.data import *
from lfp_analysis.process import *
from lfp_analysis.resnet import *
from lfp_analysis.svm import *

from fastai.vision import *
import torch.nn.functional as F
from torchvision.transforms import ToPILImage, ToTensor

In [131]:
import numpy as np
import pandas as pd
import h5py

from pathlib import Path

import matplotlib.pyplot as plt

%matplotlib widget

# Import data:

In [132]:
DATA_PATH = Path("./../data")
PAT_ID = Path('ET2')
DATASET_NAME = Path('Pouring_off')

H5_SOURCE = DATA_PATH / 'TF_and_df' / PAT_ID / DATASET_NAME.with_suffix('.h5')
CSV_SOURCE = DATA_PATH / 'TF_and_df' / PAT_ID / DATASET_NAME.with_suffix('.csv')


### Read TF

In [133]:
h5 = h5py.File(H5_SOURCE, "r")
TF = h5["TF"][:]
# h5.close()

In [134]:
TF.shape

(5, 99, 627750)

### Read df_data

In [135]:
df_data = pd.read_csv(CSV_SOURCE, index_col=0)

In [136]:
df_data["label"].value_counts(normalize=True)

False    0.542484
True     0.457516
Name: label, dtype: float64

In [137]:
df_data["label"].value_counts()

False    166
True     140
Name: label, dtype: int64

# Baseline Classifier:

In [138]:
BLClassifier(TF, df_data).classify()

On Train: 
              precision    recall  f1-score   support

       False       0.83      1.00      0.90       133
        True       1.00      0.75      0.86       111

    accuracy                           0.89       244
   macro avg       0.91      0.87      0.88       244
weighted avg       0.91      0.89      0.88       244

[[133   0]
 [ 28  83]]
AUC: 0.8738738738738738

On Valid:
              precision    recall  f1-score   support

       False       0.82      1.00      0.90        33
        True       1.00      0.76      0.86        29

    accuracy                           0.89        62
   macro avg       0.91      0.88      0.88        62
weighted avg       0.91      0.89      0.88        62

[[33  0]
 [ 7 22]]
AUC: 0.8793103448275862


(0.8852459016393442, 0.8870967741935484)

# Prepare learner:

## Dataloader prep

### Data stream:

In [None]:
def get_x(row):
    return torch.tensor(TF[:, :, row["id_start"] : row["id_end"]])


def get_y(row):
    return row["label"]


def splitter(df):
    train = df.index[df["is_valid"] == 0].tolist()
    valid = df.index[df["is_valid"] == 1].tolist()
    return train, valid

### Data transforms:

In [None]:
class Resizer(Transform):
    def __init__(self, size):
        self.size = size

    def encodes(self, X):
        if isinstance(X, TensorCategory):
            return X
        return torch.stack(
            [ToTensor()(ToPILImage()(x).resize(self.size)) for x in X]
        ).squeeze()


class LFPNormalizer(Transform):
    def __init__(self, means, stds):
        self.means, self.stds = means, stds

    def encodes(self, X):
        if isinstance(X, TensorCategory):
            return X
        xs = torch.unbind(X, 1)

        return torch.stack(
            [(x - self.means[ii]) / self.stds[ii] for ii, x in enumerate(xs)], 1
        )

In [None]:
def LFP_block():
    return TransformBlock(
        item_tfms=[Resizer((160, 160)), IntToFloatTensor],
        batch_tfms=LFPNormalizer([0.5, 0.5, 0.5, 0.5], [0.098, 0.098, 0.098, 0.098]),
    )

### Dataloader:

In [None]:
dblock = DataBlock(
    blocks=(LFP_block, CategoryBlock), get_x=get_x, get_y=get_y, splitter=splitter,
)

In [None]:
# dblock.summary(df_data)

In [None]:
dls = dblock.dataloaders(df_data, bs=64)

# Learner

In [12]:
resnet = ResNet(4, 2, [2, 3, 4, 3], 4)

In [13]:
resnet

ResNet(
  (0): ConvLayer(
    (0): Conv2d(4, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (1): ConvLayer(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (2): ConvLayer(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): ResBlock(
      (convs): Sequential(
        (0): ConvLayer(
          (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        

In [None]:
learn = Learner(
    dls,
    resnet,
    metrics=[accuracy],
    loss_func=F.cross_entropy,
    # cbs=[ActivationStats(with_hist=True)],
)

In [None]:
xb, yb = dls.one_batch()
yb.shape, xb.shape

In [None]:
init_loss = learn.loss_func(learn.model(xb), yb)
init_loss

In [None]:
del xb, yb

In [None]:
learn.lr_find()

In [None]:
learn.fit_one_cycle(10, 2*10e-5)