In [4]:
import torch
import glob
import sys
sys.path.append('../pytorch-image-models')
import timm
from torch.utils.data.sampler import WeightedRandomSampler
import numpy as np
import pandas as pd
from collections import Counter
from torch.nn import Sequential, Linear, Dropout
from utils import stratify_split, make_holdout_df

In [6]:
data_df = pd.read_csv('data/train_cleaned-0.5.csv', engine='python') 
data_df, holdout_df = make_holdout_df(data_df, seed=123)
folds_df = stratify_split(data_df, 5, 123, 'label')
folds1 = pd.read_csv('trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53/folds.csv')
len(folds_df.image_id), len(folds1.image_id)

(16881, 18187)

In [None]:
preds = []

a = [1,2,3,4]
b = [0.1, 0.2, 0.3, 0.4]
preds.extend(list(x) for x in zip(a, b))
npa = np.array(preds)
print(npa[:,0])

In [None]:
a = [[[1, 0.2], [2, 0.3]], 
     [[2, 0.1], [4, 0.9]]]
a = np.array(a)
b = a[:,:,0]
print(b)
np.mean(b, axis=0)

In [None]:
model = timm.create_model('skresnext50d_32x4d', pretrained=False)
print(model)

In [None]:
ckpt = torch.load('trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.104_val_acc=0.884_fold2.ckpt')
print(ckpt['callbacks'])

In [None]:
model = timm.create_model('tf_efficientnet_b4_ns', pretrained=True)
model.classifier = Sequential(
                Dropout(p=0.3),
                Linear(model.classifier.in_features, 5)
            )
print(model)

In [None]:
torch.hub.list('rwightman/pytorch-image-models')

# WeightedRandomSampler

In [None]:

s = np.array(list(WeightedRandomSampler([0.1, 0.9, 0.4, 3.0, 0.6], 20000, replacement=True)))
counter = Counter(s)
print(counter)
for k,v in counter.items():
    counter[k] = v/20000
for i in range(0, 5):
    print(i, counter[i])

In [None]:
folds = pd.read_csv('./trained-models/sgd_onecycle_bnfrozen_smoothed/folds.csv')
values = folds.label.values

classcounts = Counter(values)
classcounts

In [None]:
train_df = folds.iloc[folds[folds.fold != 0].index].reset_index(drop=True)
print(len(train_df))
target = train_df.label.values
print('target classes', np.unique(target))

from sklearn.utils.class_weight import compute_class_weight
w = compute_class_weight({0:3,
                          1:2.,
                          2:2.3,
                          4:2.3}, np.unique(target), target)


class_sample_count = np.unique(target, return_counts=True)[1]
print(class_sample_count)
class_sample_count[0] *= 3
class_sample_count[1] *= 2
class_sample_count[2] *= 2.3
class_sample_count[4] *= 2.3

print(class_sample_count)

weight = 1. / class_sample_count
print('w', weight)
samples_weight = weight[target] # unpacks
print(len(samples_weight))
samples_weight = torch.from_numpy(samples_weight)

sampler = WeightedRandomSampler(samples_weight, len(samples_weight), replacement=True)

sampled = np.array(list(sampler))
sampled

# smooth cross entropy

In [None]:
a = np.array([[1,2,3],
             [4,5,6]])
a.sum(axis=0), a.sum(axis=1)

In [None]:
x = torch.tensor([[0.5, 0.25, 0.75],
                  [0.2, 0.7, 0.1],
                  [0.1, 0.1, 0.8]])
target = torch.tensor([0,2,1])

weight = torch.zeros_like(x)
smooth = 0.0
confidence = 1-smooth

weight.fill_(smooth / (3 - 1))
print(weight) # defaults

# randomly scatter self.confidence at indexes across each col dimension
print(weight.scatter_(1, target.unsqueeze(1), confidence))

# each row contains confidences for the given sample
# each column is  a class

dist_lsm_input = -weight * x
torch.mean(torch.sum(dist_lsm_input, dim=1)) # sum across the columns (each sample's predictions)

In [None]:
from loss_functions import bi_tempered_logistic_loss 

device = "cpu"

activations = torch.FloatTensor([[-0.5,  0.1,  2.0],
                                [0.1,2,3]]).to(device)
labels = torch.FloatTensor([[0.2, 0.5, 0.3],
                           [0.1,0.2,0.3]]).to(device)

# The standard logistic loss is obtained when t1 = t2 = 1.0
loss = bi_tempered_logistic_loss(activations=activations, labels=labels, t1=1.0, t2=1.0)
print("Loss, t1=1.0, t2=1.0: ", loss)

loss = bi_tempered_logistic_loss(activations=activations, labels=labels, t1=0.7, t2=1.3)
print("Loss, t1=0.7, t2=1.3: ", loss)