In [1]:
from fastai.vision.all import *
import timm
from sklearn.metrics import roc_auc_score
import cv2
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.model_selection import train_test_split

In [2]:
path = Path('/home/azaidi/Desktop/kaggle/seti_2021')
Path.BASE_PATH = path
path.ls()

(#16) [Path('kfold.ipynb'),Path('res50.ipynb'),Path('data'),Path('kf_sesresnext.ipynb'),Path('aug_training.ipynb'),Path('models'),Path('.git'),Path('3_arch_big.ipynb'),Path('initial.ipynb'),Path('.gitignore')...]

In [3]:
(path/'data/train').ls()

(#17) [Path('data/train/9'),Path('data/train/e'),Path('data/train/3'),Path('data/train/5'),Path('data/train/7'),Path('data/train/d'),Path('data/train/b'),Path('data/train/1'),Path('data/train/a'),Path('data/train/0')...]

In [4]:
(path/'data/train').ls()[0].ls()

(#3139) [Path('data/train/9/9f0dff2cd2b3.npy'),Path('data/train/9/91af44908241.npy'),Path('data/train/9/923ed9e2e4d8.npy'),Path('data/train/9/990f15496627.npy'),Path('data/train/9/92a91b53e5ee.npy'),Path('data/train/9/9567d242de97.npy'),Path('data/train/9/9f5cb9d7a0d6.npy'),Path('data/train/9/9aea15089923.npy'),Path('data/train/9/9deb1a9a72a5.npy'),Path('data/train/9/9e26f4ef6fc6.npy')...]

In [5]:
train_df = pd.read_csv(path/'data/train_labels.csv')
sub_df = pd.read_csv(path/'data/sample_submission.csv')
train_df.shape

(50165, 2)

In [6]:
data_path = path/'data'
train_df['path'] = train_df['id'].apply(lambda x: str(data_path/'train'/x[0]/x)+'.npy')

In [7]:
train_df.head(1)

Unnamed: 0,id,target,path
0,00034abb3629,0,/home/azaidi/Desktop/kaggle/seti_2021/data/train/0/00034abb3629.npy


In [8]:
class SETIDataset:
    def __init__(self, df):
        self.df = df
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        label = self.df.iloc[index].target
        filename = self.df.iloc[index].path
        data = np.load(filename).astype(np.float32)
        
        #this will return tensor of 256x819
        #data = data[[0,2,4],:, :]
        
        #if line above commented out....
        #this will return a 256x1638 tensor per sample
        data = data.astype(np.float32)
        data = np.vstack(data).transpose((1, 0))
        
        #this will get you a 512x512 tensor per sample
        #data = np.vstack(data).transpose((1, 0))
        #data = cv2.resize(data, dsize=(512,512))     
        
        data_tensor = torch.tensor(data).float().unsqueeze(0)

        return (data_tensor, torch.tensor(label))
        #return data

In [9]:
t_df, v_df = train_test_split(train_df, test_size=0.05)

In [10]:
t_ds = SETIDataset(t_df)
t_ds[0][0].shape

torch.Size([1, 256, 1638])

In [11]:
def get_dfs(df, v_idxs, fold):
    t_df = df.iloc[v_idxs[fold]].reset_index()
    v_df = df.drop(v_idxs[fold]).reset_index()
    return t_df, v_df

In [12]:
def roc_auc(preds,targ):
    try: return roc_auc_score(targ.cpu(),preds.squeeze().cpu())
    except: return 0.5

In [13]:
from timm import create_model
from fastai.vision.learner import _update_first_layer

def create_timm_body(arch:str, pretrained=True, cut=None, n_in=3):
    "Creates a body from any model in the `timm` library."
    model = create_model(arch, pretrained=pretrained, num_classes=0, global_pool='')
    _update_first_layer(model, n_in, pretrained)
    if cut is None:
        ll = list(enumerate(model.children()))
        cut = next(i for i,o in reversed(ll) if has_pool_type(o))
    if isinstance(cut, int): return nn.Sequential(*list(model.children())[:cut])
    elif callable(cut): return cut(model)
    else: raise NamedError("cut must be either integer or function")
        
def create_timm_model(arch:str, n_out, cut=None, pretrained=True, n_in=3, init=nn.init.kaiming_normal_, custom_head=None,
                     concat_pool=True, **kwargs):
    "Create custom architecture using `arch`, `n_in` and `n_out` from the `timm` library"
    body = create_timm_body(arch, pretrained, None, n_in)
    if custom_head is None:
        nf = num_features_model(nn.Sequential(*body.children()))
        head = create_head(nf, n_out, concat_pool=concat_pool, **kwargs)
    else: head = custom_head
    model = nn.Sequential(body, head)
    if init is not None: apply_init(model[1], init)
    return model.to('cuda')

def timm_learner(dls, arch:str, loss_func=None, pretrained=True, cut=None, splitter=None,
                y_range=None, config=None, n_in=3, n_out=None, normalize=True, **kwargs):
    "Build a convnet style learner from `dls` and `arch` using the `timm` library"
    if config is None: config = {}
    if n_out is None: n_out = get_c(dls)
    assert n_out, "`n_out` is not defined, and could not be inferred from data, set `dls.c` or pass `n_out`"
    if y_range is None and 'y_range' in config: y_range = config.pop('y_range')
    model = create_timm_model(arch, n_out, default_split, pretrained, n_in=n_in, y_range=y_range, **config)
    learn = Learner(dls, model, loss_func=loss_func, splitter=default_split, **kwargs)
    if pretrained: learn.freeze()
    return learn

In [14]:
def get_dls(fold, bs=32):
    #t_df, v_df = get_dfs(train_df, v_idxs, fold)
    t_df, v_df = train_test_split(train_df, test_size=0.05)
    t_ds = SETIDataset(t_df)
    v_ds = SETIDataset(v_df)
    train_dl = torch.utils.data.DataLoader(t_ds, batch_size=bs,num_workers=8)
    valid_dl = torch.utils.data.DataLoader(v_ds, batch_size=bs,num_workers=8)
    dls = DataLoaders(train_dl, valid_dl)
    return dls

In [15]:
models = ['seresnext26d_32x4d', 'resnext50_32x4d', 'efficientnet_b2']

In [17]:
for x in range(len(models)):
    dls = get_dls(x)
    model = models[x]
    learn = timm_learner(dls,model, pretrained=True,
                     n_in=1,n_out=1,metrics=[roc_auc], 
                     opt_func=ranger,
                     loss_func=BCEWithLogitsLossFlat()).to_fp16()
    learn.fit_flat_cos(12, 5e-2, wd=0.1, cbs=[ReduceLROnPlateau()])
    learn = learn.to_fp32()
    learn.save(f'{model}_full_data_7ep')

epoch,train_loss,valid_loss,roc_auc,time
0,0.129159,0.12185,0.929836,10:08
1,0.110574,0.203998,0.937231,10:08
2,0.094349,0.169403,0.934701,10:08
3,0.087687,0.121161,0.931397,10:08
4,0.083876,0.101098,0.927107,10:08
5,0.086413,0.181117,0.925878,10:08
6,0.08553,0.155333,0.933275,10:08
7,0.076144,0.103859,0.9305,10:08
8,0.068047,0.09442,0.929197,10:08


Epoch 1: reducing lr to 0.005
Epoch 2: reducing lr to 0.005
Epoch 5: reducing lr to 0.005
Epoch 6: reducing lr to 0.005
Epoch 7: reducing lr to 0.005


KeyboardInterrupt: 