In [1]:
from fastai.vision.all import *
from fastai.vision.widgets import *
import fastai
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [2]:
RUN_NAME_OF_MODEL_TO_LOAD = '20210210-1221 - arch=tf_efficientnet_b4_ns - samples=1800 frozen=1 epochs=40 bs=8 res=480'


#f'20210209-0234 - arch=tf_efficientnet_b4_ns - samples=1800 frozen=1 epochs=40 bs=7 res=456'
EPOCH_TO_LOAD = 2
# RESOLUTION = round(380 * 1.2)  # 300
RESOLUTION=800
BATCH_SIZE = 10
SAMPLE_SIZE = 1000

path = '../data'
sub = 'train_images'  # 'test_images'
# dataset_path = Path(path)

In [3]:
from timm import create_model
from fastai.vision.learner import _update_first_layer

def create_timm_body(arch:str, pretrained=True, cut=None, n_in=3):
    "Creates a body from any model in the `timm` library."
    model = create_model(arch, pretrained=pretrained, num_classes=0, global_pool='')
    _update_first_layer(model, n_in, pretrained)
    if cut is None:
        ll = list(enumerate(model.children()))
        cut = next(i for i,o in reversed(ll) if has_pool_type(o))
    if isinstance(cut, int): return nn.Sequential(*list(model.children())[:cut])
    elif callable(cut): return cut(model)
    else: raise NamedError("cut must be either integer or function")
        
def create_timm_model(arch:str, n_out, cut=None, pretrained=True, n_in=3, init=nn.init.kaiming_normal_, custom_head=None,
                     concat_pool=False, **kwargs):
    "Create custom architecture using `arch`, `n_in` and `n_out` from the `timm` library"
    body = create_timm_body(arch, pretrained, None, n_in)
    if custom_head is None:
        nf = num_features_model(nn.Sequential(*body.children())) * (2 if concat_pool else 1)
        head = create_head(nf, n_out, concat_pool=concat_pool, **kwargs)
    else: head = custom_head
    model = nn.Sequential(body, head)
    if init is not None: apply_init(model[1], init)
    return model


def timm_learner(dls, arch:str, loss_func=None, pretrained=True, cut=None, splitter=None,
                y_range=None, config=None, n_out=None, normalize=True, **kwargs):
    "Build a convnet style learner from `dls` and `arch` using the `timm` library"
    if config is None: config = {}
    if n_out is None: n_out = get_c(dls)
    assert n_out, "`n_out` is not defined, and could not be inferred from data, set `dls.c` or pass `n_out`"
    if y_range is None and 'y_range' in config: y_range = config.pop('y_range')
    model = create_timm_model(arch, n_out, default_split, pretrained, y_range=y_range, **config)
    learn = Learner(dls, model, loss_func=loss_func, splitter=default_split, **kwargs)
    if pretrained: learn.freeze()
    return learn

In [4]:
# sample_df = pd.read_csv(f'{path}/sample_submission.csv')
sample_df = pd.read_csv(f'{path}/train.csv')
# sample_df = sample_df.head(50)

In [5]:
# sample_df['image_id'] =  f'{path}/' + sample_df['image_id'].astype(str)
cols = sample_df.columns.tolist()
cols = cols[::-1]
sample_df = sample_df[cols]

In [6]:
sample_df = sample_df.sample(n=SAMPLE_SIZE, replace=False)  # random sample

In [8]:
aug_tfms =  aug_transforms(size=RESOLUTION, pad_mode='reflection', do_flip=True, batch=False, 
                               p_affine=0.7, max_rotate=45, max_warp=0, min_zoom=1.0, max_zoom=2.5, 
                               mult=3,  p_lighting=0.5, max_lighting=0.1, min_scale=0.75) #,xtra_tfms=[sha])

sat = Saturation(p=0.7, max_lighting=0.2)

# batch_tfms = [ Normalize.from_stats(*imagenet_stats), sat]
batch_tfms = [ *aug_tfms, sat, Normalize.from_stats(*imagenet_stats)]#, sha]

#     item_tfms=RandomResizedCrop(RESOLUTION)
item_tfms=Resize(RESOLUTION, method='bilinear', pad_mode='zeros')
#     item_tfms=RatioResize(RESOLUTION)

dls = ImageDataLoaders.from_df(sample_df, folder=f'../data/{sub}', seed=42, label_col = 0, fn_col=1, 
                               batch_tfms=batch_tfms, bs=BATCH_SIZE, item_tfms=item_tfms)
test_dl = dls.test_dl(sample_df)

In [None]:
dls.test_dl

In [38]:
# model = load_learner(f'../models/{RUN_NAME_OF_MODEL_TO_LOAD}.pkl', cpu=False)
import timm
model = timm_learner(dls,
                    'tf_efficientnet_b4_ns',
                     opt_func=ranger,
                     loss_func=LabelSmoothingCrossEntropy(),
          #           cbs=cbs,
                     metrics = [accuracy]).to_fp16()

load_model(f'models/{RUN_NAME_OF_MODEL_TO_LOAD}_{EPOCH_TO_LOAD}.pth', model, opt=Adam, with_opt=False)

In [39]:
preds, something = model.tta(dl=test_dl, n=5, use_max=False, beta=0.18)

In [40]:
# test_dl.show_batch()

In [41]:
# preds, something = model.tta(dl=test_dl, n=5)  #, use_max=False, beta=0.1)
print(classification_report(sample_df.iloc[:,0], preds.argmax(dim=-1).numpy()))  # TTA

              precision    recall  f1-score   support

           0       0.59      0.71      0.65        49
           1       0.90      0.75      0.82       104
           2       0.80      0.82      0.81       106
           3       0.96      0.93      0.94       609
           4       0.71      0.83      0.76       132

    accuracy                           0.88      1000
   macro avg       0.79      0.81      0.80      1000
weighted avg       0.89      0.88      0.88      1000



In [35]:

preds2 = model.get_preds(dl=test_dl)
print(classification_report(sample_df.iloc[:,0], preds2[0].argmax(dim=-1).numpy()))  # straight

              precision    recall  f1-score   support

           0       0.67      0.69      0.68        49
           1       0.96      0.73      0.83       104
           2       0.65      0.91      0.76       106
           3       0.97      0.91      0.94       609
           4       0.75      0.84      0.79       132

    accuracy                           0.87      1000
   macro avg       0.80      0.82      0.80      1000
weighted avg       0.89      0.87      0.87      1000



In [29]:
print(classification_report(sample_df.iloc[:,0], ((preds + preds2[0]).argmax(dim=-1).numpy())))  # average

              precision    recall  f1-score   support

           0       0.63      0.69      0.66        49
           1       0.92      0.69      0.79       104
           2       0.63      0.89      0.74       106
           3       0.98      0.87      0.92       609
           4       0.67      0.89      0.76       132

    accuracy                           0.85      1000
   macro avg       0.77      0.81      0.78      1000
weighted avg       0.88      0.85      0.86      1000



In [None]:
print(classification_report(sample_df.iloc[:,0], (torch.maximum(preds,preds2[0]).argmax(dim=-1).numpy())))  # max

In [None]:
import albumentations as A


class AlbumentationsTransform(DisplayedTransform):
    split_idx,order=0,2
    def __init__(self, train_aug): store_attr()
    
    def encodes(self, img: PILImage):
        aug_img = self.train_aug(image=np.array(img))['image']
        return PILImage.create(aug_img)
    
# get_t_tfms():
#     return A.Compose()


aug = A.Sharpen(p=1, alpha=(0.99,1), lightness=(0.5,0.5))
sha = AlbumentationsTransform(aug)
def aug_tfm(img): 
    np_img = np.array(img)
    aug_img = aug(image=np_img)['image']
    return PILImage.create(aug_img)

In [None]:


# a_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.999]
a_list=[1]
# a_list = np.arange(0.5,1.0,0.1)
#[1] #[0,45,90,135]  #[750, 800, 850, 900]# list(np.arange(0, 5, 0.5)) # [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
scores_gs = []

for VARIABLE in a_list:
    aug_tfms =  aug_transforms(size=RESOLUTION, pad_mode='reflection', do_flip=True, batch=False, 
                                   p_affine=0.7, max_rotate=45, max_warp=0, min_zoom=1.0, max_zoom=2.5, 
                                   mult=3,  p_lighting=0.5, max_lighting=0.1, min_scale=0.75) #,xtra_tfms=[sha])

    sat = Saturation(p=0.7, max_lighting=0.2)

    # batch_tfms = [ Normalize.from_stats(*imagenet_stats), sat]
    batch_tfms = [ *aug_tfms, sat, Normalize.from_stats(*imagenet_stats)]#, sha]

#     item_tfms=RandomResizedCrop(RESOLUTION)
    item_tfms=Resize(RESOLUTION, method='bilinear', pad_mode='zeros')
#     item_tfms=RatioResize(RESOLUTION)

    dls = ImageDataLoaders.from_df(sample_df, folder=f'../data/{sub}', seed=42, label_col = 0, fn_col=1, 
                                   batch_tfms=batch_tfms, bs=BATCH_SIZE, item_tfms=item_tfms)
    test_dl = dls.test_dl(sample_df)

    preds, something = model.tta(dl=test_dl, n=5, use_max=False, beta=0.18)
    scores_gs.append(accuracy_score(sample_df.iloc[:,0], preds.argmax(dim=-1).numpy()))  # TTA
    
    print(f'best SO FAR: {a_list[np.argmax(scores_gs)]}, at {scores_gs[np.argmax(scores_gs)]}')


print(f'best n: {a_list[np.argmax(scores_gs)]}')
print(a_list)
print(scores_gs)