In [1]:
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
import os 
from warnings import simplefilter
import pandas as pd
from imblearn.over_sampling import SMOTE  

In [2]:

import models
import class_sampling
import train
import metric_utils
import inference
import loss_fns
import torchvision.ops 

NUM_CLASSES = 10
n_epochs = 30
batch_size_train = 64
batch_size_test = 1000
momentum = 0

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

CLASS_LABELS = {'airplane': 0,
                 'automobile': 1,
                 'bird': 2,
                 'cat': 3,
                 'deer': 4,
                 'dog': 5,
                 'frog': 6,
                 'horse': 7,
                 'ship': 8,
                 'truck': 9}


simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=UserWarning)
simplefilter(action='ignore', category=DeprecationWarning)

col_names = ["name", 
            "num_classes", 
            "classes_used", 
            "ratio", 
            "learning_rate", 
            "mean_0", "variance_0",
            "mean_10", "variance_10",
            "mean_20", "variance_20",
            "mean_30", "variance_30",
          #   "mean_40", "variance_40",
          #   "mean_50", "variance_50",
             "cap", "normalization", "other"]

rows = []

In [3]:
# 3 classes

NUM_CLASSES_REDUCED = 3
nums = (0, 3, 1)
ratio = (200, 20, 1)

norm=True

if norm:
    transform=torchvision.transforms.Compose([torchvision.transforms.Normalize(mean=[134.1855, 122.7346, 118.3749], std=[70.5125, 64.4848, 66.5604])])
else:
    transform=None

    
train_CIFAR10 = torchvision.datasets.CIFAR10('cifar10', train=True, download=True,
                             transform=transform)  

test_CIFAR10 = torchvision.datasets.CIFAR10('cifar10', train=False, download=True,
                             transform=transform)  

train_CIFAR10.data = train_CIFAR10.data.reshape(50000, 3, 32, 32)
test_CIFAR10.data = test_CIFAR10.data.reshape(10000, 3, 32, 32)


reduced_train_CIFAR10 = class_sampling.Reduce(train_CIFAR10, NUM_CLASSES_REDUCED, nums=nums, CIFAR=True, transform=transform)
reduced_test_CIFAR10 = class_sampling.Reduce(test_CIFAR10, NUM_CLASSES_REDUCED, nums=nums, CIFAR=True, transform=transform)

ratio_train_CIFAR10 = class_sampling.Ratio(train_CIFAR10, NUM_CLASSES_REDUCED, ratio, nums=nums, transform=transform)
targets = ratio_train_CIFAR10.labels 
class_count = np.unique(targets, return_counts=True)[1]

smote_train_CIFAR10 = class_sampling.Smote(ratio_train_CIFAR10, 5000 * NUM_CLASSES_REDUCED, transform=transform)

triplet_train_CIFAR10 = class_sampling.ForTripletLoss(reduced_train_CIFAR10, smote=False, transform=transform, num_classes=3)
triplet_ratio_train_CIFAR10 = class_sampling.ForTripletLoss(ratio_train_CIFAR10, smote=False, transform=transform, num_classes=3)
triplet_smote_train_CIFAR10 = class_sampling.ForTripletLoss(smote_train_CIFAR10, smote=True, transform=transform, num_classes=3)

weight = 1. / class_count
samples_weight = weight[targets]
samples_weight = torch.from_numpy(samples_weight)
oversampler = torch.utils.data.WeightedRandomSampler(samples_weight, int(max(class_count) * NUM_CLASSES_REDUCED), replacement=True)
sampler = torch.utils.data.WeightedRandomSampler(samples_weight, len(samples_weight), replacement=True)
undersampler = torch.utils.data.WeightedRandomSampler(samples_weight, int(min(class_count) * NUM_CLASSES_REDUCED), replacement=False)

weight *= max(class_count)

train_loader_reduced = DataLoader(reduced_train_CIFAR10, batch_size=batch_size_train, shuffle=True)  

train_loader_ratio = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, shuffle=True) 

train_loader_oversampled = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, sampler=oversampler)

train_loader_undersampled = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, sampler=undersampler)

train_loader_sampled = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, sampler=sampler)

train_loader_smote = DataLoader(smote_train_CIFAR10, batch_size=batch_size_train, shuffle=True)

train_loader_tripletloss = DataLoader(triplet_train_CIFAR10, batch_size=batch_size_train, shuffle=True)

train_loader_tripletloss_ratio = DataLoader(triplet_ratio_train_CIFAR10, batch_size=batch_size_train, shuffle=True)

train_loader_tripletloss_smote = DataLoader(triplet_smote_train_CIFAR10, batch_size=batch_size_train, shuffle=True)

test_loader_reduced = DataLoader(reduced_test_CIFAR10, batch_size=batch_size_test, shuffle=True)

# to be used in distance capped smote - get average tensor for the entire class 
dataset = train_loader_ratio.dataset
class0 = dataset.images[dataset.labels==0]
class1 = dataset.images[dataset.labels==1]
class2 = dataset.images[dataset.labels==1]
class0_avg = torch.mean(class0.float(), 0)
class1_avg = torch.mean(class1.float(), 0)
class2_avg = torch.mean(class2.float(), 0)
avg_tensors_list = [class0_avg, class1_avg, class2_avg]

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# 3 class normal

learning_rates = [5e-3]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_reduced, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["normal", 3, nums, (1, 1, 1), learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3], None, norm, None]
    rows.append(row)

In [None]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [None]:
#  3 class ratio

learning_rates =  [1e-4, 1e-3, 5e-4]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_ratio, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["ratio", 3, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3], None, norm, None]
    rows.append(row)

In [None]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [3]:
# 3 class oversampled 

learning_rates = [1e-4, 5e-4, 1e-3]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_oversampled, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["oversampled", 3, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3], None, norm, None]
    rows.append(row)


Test set: Avg. loss: 0.0033108042081197104, AUC: 0.5012746666666666


Test set: Avg. loss: 0.0010891745090484618, AUC: 0.5691605000000001


Test set: Avg. loss: 0.0010772271553675333, AUC: 0.6205455833333334


Test set: Avg. loss: 0.0010581616560618083, AUC: 0.66594325


Test set: Avg. loss: 0.009650338172912597, AUC: 0.64924925


Test set: Avg. loss: 0.0010989543994267782, AUC: 0.5006625


Test set: Avg. loss: 0.0010988156000773111, AUC: 0.5036341666666666


Test set: Avg. loss: 0.001098307967185974, AUC: 0.511603


Test set: Avg. loss: 0.0014643325408299763, AUC: 0.5624135


Test set: Avg. loss: 0.0010988226731618246, AUC: 0.5028216666666666


Test set: Avg. loss: 0.0010955985387166342, AUC: 0.5535775


Test set: Avg. loss: 0.0010860007206598917, AUC: 0.6126334166666666


Test set: Avg. loss: 0.0017339369455973306, AUC: 0.6287725000000001


Test set: Avg. loss: 0.0010934834877649943, AUC: 0.5562849166666667


Test set: Avg. loss: 0.0010735339721043905, AUC: 0.6279554166666667


Test

In [7]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [11]:
#  3 class SMOTE

learning_rates = [5e-3]

learning_rate_aucs = []

loss_fn_args = {}
loss_fn_args['loss_cap'] = None

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax_with_smote(epoch, train_loader_smote, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["smote", 3, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3], None, norm, None]
    rows.append(row)
    
print(rows)


Test set: Avg. loss: 0.006379682540893555, AUC: 0.5555167499999999


Test set: Avg. loss: 0.0010770999590555828, AUC: 0.7082001666666667


Test set: Avg. loss: 0.0011475319465001424, AUC: 0.7300091666666667


Test set: Avg. loss: 0.0012237483263015747, AUC: 0.7375256666666666


Test set: Avg. loss: 0.0021883630752563476, AUC: 0.5853621666666666


Test set: Avg. loss: 0.0011258338292439779, AUC: 0.6908154166666667


Test set: Avg. loss: 0.0012150108416875203, AUC: 0.7028374166666667


Test set: Avg. loss: 0.0011469882726669312, AUC: 0.7570853333333334


Test set: Avg. loss: 0.002026644229888916, AUC: 0.49485124999999996


Test set: Avg. loss: 0.001054565707842509, AUC: 0.7094285833333333


Test set: Avg. loss: 0.001168736457824707, AUC: 0.707386


Test set: Avg. loss: 0.0012017017602920532, AUC: 0.6683456666666667


Test set: Avg. loss: 0.0018511176109313964, AUC: 0.54186175


Test set: Avg. loss: 0.0011469180583953858, AUC: 0.6809189999999999


Test set: Avg. loss: 0.00110062996546427

In [12]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [15]:
# 3 class capped smote 


momentum=0
learning_rates = [5e-3]


cap_aucs = []

caps = [1, 5, 10]

for cap in caps:
    
    loss_fn_args = {}
    loss_fn_args['loss_cap'] = cap
    
    learning_rate_aucs = []

    for learning_rate in learning_rates:
        aucs = []
        for i in range(10):
            model_aucs = []
            network = models.ConvNet(NUM_CLASSES_REDUCED)
            optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
            _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
            model_aucs.append(auc)
            for epoch in range(n_epochs):
                _, _ = train.train_softmax_with_smote(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedCELoss, loss_fn_args=loss_fn_args)
                if (epoch + 1) % 10 == 0: 
                    _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                    model_aucs.append(auc)
            aucs.append(model_aucs)
        learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

    auc_mean = np.mean(learning_rate_aucs, axis=1)
    auc_variance = np.var(learning_rate_aucs, axis=1)
    
    
    cap_aucs.append([auc_mean, auc_variance])

    
    
for c in range(len(cap_aucs)):
    auc_mean = cap_aucs[c][0]
    auc_variance = cap_aucs[c][1]
    cap = caps[c]
    for i in range(len(learning_rates)): 
        row = ["capped_smote", NUM_CLASSES_REDUCED, nums, ratio, learning_rates[i],
                auc_mean[i][0], auc_variance[i][0], 
                auc_mean[i][1], auc_variance[i][1],
                auc_mean[i][2], auc_variance[i][2],
                auc_mean[i][3], auc_variance[i][3], cap, norm, None]
        rows.append(row)

print(rows)


Test set: Avg. loss: 0.004620168209075928, AUC: 0.5224699166666666


Test set: Avg. loss: 0.0032700355052947996, AUC: 0.65412675


Test set: Avg. loss: 0.0023182222843170165, AUC: 0.6341258333333334


Test set: Avg. loss: 0.0016645215352376302, AUC: 0.7060236666666667


Test set: Avg. loss: 0.00591958220799764, AUC: 0.505117


Test set: Avg. loss: 0.004119347174962362, AUC: 0.5946488333333333


Test set: Avg. loss: 0.001755180795987447, AUC: 0.7035093333333334


Test set: Avg. loss: 0.001964009126027425, AUC: 0.7295488333333333


Test set: Avg. loss: 0.0043713115056355795, AUC: 0.4085865833333333


Test set: Avg. loss: 0.0016593863169352214, AUC: 0.722733


Test set: Avg. loss: 0.0018062065442403157, AUC: 0.7048511666666665


Test set: Avg. loss: 0.0019965092738469443, AUC: 0.6980991666666667


Test set: Avg. loss: 0.013644606590270995, AUC: 0.4427893333333333


Test set: Avg. loss: 0.0019738929669062297, AUC: 0.7059536666666668


Test set: Avg. loss: 0.0017942049900690714, AUC: 0.692

In [18]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [48]:
# 3 class euclidean distance capped smote 


momentum=0
learning_rates = [1e-3, 5e-4, 1e-4]


cap_aucs = []

caps = [1, 5, 10]

loss_fn_args = {}
loss_fn_args['distance'] = 'euclidean'


for cap in caps:
    
    loss_fn_args['loss_cap'] = cap
    
    learning_rate_aucs = []

    for learning_rate in learning_rates:
        aucs = []
        for i in range(10):
            model_aucs = []
            network = models.ConvNetWithEmbeddings(NUM_CLASSES_REDUCED)
            optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
            _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True) 
            model_aucs.append(auc)
            for epoch in range(n_epochs):
                _, _ = train.train_softmax_with_embeddings(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedCELoss, loss_fn_args=loss_fn_args)
                if (epoch + 1) % 10 == 0: 
                    _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True)
                    model_aucs.append(auc)
            aucs.append(model_aucs)
        learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

    auc_mean = np.mean(learning_rate_aucs, axis=1)
    auc_variance = np.var(learning_rate_aucs, axis=1)
    
    
    cap_aucs.append([auc_mean, auc_variance])

    
    
for c in range(len(cap_aucs)):
    auc_mean = cap_aucs[c][0]
    auc_variance = cap_aucs[c][1]
    cap = caps[c]
    for i in range(len(learning_rates)): 
        row = ["distance_capped_smote", NUM_CLASSES_REDUCED, nums, ratio, learning_rates[i],
                auc_mean[i][0], auc_variance[i][0], 
                auc_mean[i][1], auc_variance[i][1],
                auc_mean[i][2], auc_variance[i][2],
                auc_mean[i][3], auc_variance[i][3], cap, norm, None]
        rows.append(row)

print(rows)



Test set: Avg. loss: 0.0053888258934020996, AUC: 0.5083743333333334


Test set: Avg. loss: 0.0011163065830866496, AUC: 0.5555713333333334


Test set: Avg. loss: 0.0011519194841384888, AUC: 0.6339973333333333


Test set: Avg. loss: 0.0012265472412109374, AUC: 0.6482106666666666


Test set: Avg. loss: 0.004785251299540202, AUC: 0.45894758333333335


Test set: Avg. loss: 0.0010515480041503906, AUC: 0.6808616666666666


Test set: Avg. loss: 0.001097139040629069, AUC: 0.6735478333333332


Test set: Avg. loss: 0.001158262848854065, AUC: 0.6645963333333333


Test set: Avg. loss: 0.002081297715504964, AUC: 0.5468329166666667


Test set: Avg. loss: 0.0010992099046707154, AUC: 0.6545398333333333


Test set: Avg. loss: 0.001181641697883606, AUC: 0.6227119999999999


Test set: Avg. loss: 0.0012087234656016032, AUC: 0.6747756666666667


Test set: Avg. loss: 0.0029571340878804526, AUC: 0.42823191666666666


Test set: Avg. loss: 0.0010765260457992554, AUC: 0.6793964166666666


Test set: Avg. loss: 0


Test set: Avg. loss: 0.0011079335610071819, AUC: 0.6403586666666666


Test set: Avg. loss: 0.0011695301135381062, AUC: 0.602681


Test set: Avg. loss: 0.0012256291309992473, AUC: 0.6285350833333333


Test set: Avg. loss: 0.005145518620808919, AUC: 0.44634475


Test set: Avg. loss: 0.0010701054732004801, AUC: 0.6592354166666666


Test set: Avg. loss: 0.0011435633500417074, AUC: 0.6172679166666667


Test set: Avg. loss: 0.0011791830857594808, AUC: 0.64276475


Test set: Avg. loss: 0.005740523020426432, AUC: 0.41456291666666667


Test set: Avg. loss: 0.0011070729494094848, AUC: 0.545332


Test set: Avg. loss: 0.0011548689603805542, AUC: 0.5669795000000001


Test set: Avg. loss: 0.001200473427772522, AUC: 0.6261900833333334


Test set: Avg. loss: 0.0022632593313852947, AUC: 0.4380240833333333


Test set: Avg. loss: 0.0010723592837651571, AUC: 0.6887599999999999


Test set: Avg. loss: 0.0011568965911865235, AUC: 0.6591684999999999


Test set: Avg. loss: 0.0012437734206517538, AUC: 0.604858


Test set: Avg. loss: 0.0011914784510930379, AUC: 0.6240734166666666


Test set: Avg. loss: 0.023554405212402343, AUC: 0.4700285


Test set: Avg. loss: 0.0010850404898325602, AUC: 0.6233955


Test set: Avg. loss: 0.0011104260285695394, AUC: 0.6535688333333333


Test set: Avg. loss: 0.0011573739051818847, AUC: 0.6540090833333333


Test set: Avg. loss: 0.00530566676457723, AUC: 0.4907729166666667


Test set: Avg. loss: 0.0011098883152008056, AUC: 0.5446788333333333


Test set: Avg. loss: 0.001145491639773051, AUC: 0.5830515833333333


Test set: Avg. loss: 0.0011320642630259195, AUC: 0.6684396666666665


Test set: Avg. loss: 0.0012915862401326498, AUC: 0.5365515


Test set: Avg. loss: 0.0011070706844329833, AUC: 0.5431425


Test set: Avg. loss: 0.0011233591238657633, AUC: 0.6475516666666667


Test set: Avg. loss: 0.0011704811652501425, AUC: 0.6520456666666666


Test set: Avg. loss: 0.009043893178304037, AUC: 0.3958669166666666


Test set: Avg. loss: 0.0010925205945968629, AUC: 0.606327833

In [49]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [10]:
# 3 class cosine distance capped smote 


momentum=0
learning_rates = [5e-4, 5e-3, 1e-3, 1e-4]


cap_aucs = []

caps = [10, 5, 1]

loss_fn_args = {}
loss_fn_args['distance'] = 'cosine'


start_epoch = 2

for cap in caps:
    
    #loss_fn_args['loss_cap'] = cap
    
    learning_rate_aucs = []

    for learning_rate in learning_rates:
        aucs = []
        for i in range(10):
            model_aucs = []
            network = models.ConvNetWithEmbeddings(NUM_CLASSES_REDUCED)
            optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
            _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True) 
            model_aucs.append(auc)
            for epoch in range(start_epoch):
                loss_fn_args['loss_cap'] = None
                loss_fn_args['print_capped'] = False
                _, _ = train.train_softmax_with_embeddings(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedCELoss, loss_fn_args=loss_fn_args)
            for epoch in range(start_epoch, n_epochs + 1):
                loss_fn_args['print_capped'] = False
                loss_fn_args['loss_cap'] = cap
                _, _ = train.train_softmax_with_embeddings(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedCELoss, loss_fn_args=loss_fn_args)
                if (epoch + 1) % 10 == 0: 
                    _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True)
                    model_aucs.append(auc)
            aucs.append(model_aucs)
        learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

    auc_mean = np.mean(learning_rate_aucs, axis=1)
    auc_variance = np.var(learning_rate_aucs, axis=1)
    
    
    cap_aucs.append([auc_mean, auc_variance])

    
    
for c in range(len(cap_aucs)):
    auc_mean = cap_aucs[c][0]
    auc_variance = cap_aucs[c][1]
    cap = caps[c]
    for i in range(len(learning_rates)): 
        row = ["cosine_distance_capped_smote", NUM_CLASSES_REDUCED, nums, ratio, learning_rates[i],
                auc_mean[i][0], auc_variance[i][0], 
                auc_mean[i][1], auc_variance[i][1],
                auc_mean[i][2], auc_variance[i][2],
                auc_mean[i][3], auc_variance[i][3], cap, norm, 'start_epoch=2']
        rows.append(row)

print(rows)



Test set: Avg. loss: 0.0022090151309967043, AUC: 0.482327


Test set: Avg. loss: 0.00109909454981486, AUC: 0.5001666666666668


Test set: Avg. loss: 0.0010986895163853964, AUC: 0.5001666666666668


Test set: Avg. loss: 0.0010929495890935263, AUC: 0.5460208333333334


Test set: Avg. loss: 0.001519055922826131, AUC: 0.4967521666666667


Test set: Avg. loss: 0.0010438531239827474, AUC: 0.6816228333333333


Test set: Avg. loss: 0.0010392544269561767, AUC: 0.6899668333333332


Test set: Avg. loss: 0.001037814458211263, AUC: 0.6980486666666667


Test set: Avg. loss: 0.005044939676920573, AUC: 0.6125878333333334


Test set: Avg. loss: 0.0010688435236612956, AUC: 0.6570639166666666


Test set: Avg. loss: 0.0010312530597050985, AUC: 0.697778


Test set: Avg. loss: 0.0010634669462839762, AUC: 0.6862102499999999


Test set: Avg. loss: 0.007575435161590576, AUC: 0.5238689166666667


Test set: Avg. loss: 0.00107310418287913, AUC: 0.6662355


Test set: Avg. loss: 0.001072569489479065, AUC: 0.674617


Test set: Avg. loss: 0.0011008405685424805, AUC: 0.5001766666666667


Test set: Avg. loss: 0.0011000505685806274, AUC: 0.5063233333333333


Test set: Avg. loss: 0.0010945692857106527, AUC: 0.5684515


Test set: Avg. loss: 0.002201924006144206, AUC: 0.5847720000000001


Test set: Avg. loss: 0.0010677912235260009, AUC: 0.6394275833333335


Test set: Avg. loss: 0.0010587460597356161, AUC: 0.6679229166666666


Test set: Avg. loss: 0.0010557381709416708, AUC: 0.6791149166666667


Test set: Avg. loss: 0.005102400302886963, AUC: 0.4865385


Test set: Avg. loss: 0.0010583461920420328, AUC: 0.6661760833333333


Test set: Avg. loss: 0.001045379360516866, AUC: 0.6912495


Test set: Avg. loss: 0.0010401338736216227, AUC: 0.7031131666666667


Test set: Avg. loss: 0.004726005872090658, AUC: 0.5488045


Test set: Avg. loss: 0.001022370219230652, AUC: 0.6852906666666666


Test set: Avg. loss: 0.0010080135663350423, AUC: 0.6974095833333335


Test set: Avg. loss: 0.000960826595624288, AUC: 0.7158985000


Test set: Avg. loss: 0.0010980440775553385, AUC: 0.7122550833333333


Test set: Avg. loss: 0.0029827739397684733, AUC: 0.49234633333333333


Test set: Avg. loss: 0.0010995362997055053, AUC: 0.5006681666666667


Test set: Avg. loss: 0.001045853098233541, AUC: 0.701257


Test set: Avg. loss: 0.0010761226812998454, AUC: 0.6972056666666666


Test set: Avg. loss: 0.004204115390777588, AUC: 0.5671574166666666


Test set: Avg. loss: 0.0010754772822062174, AUC: 0.6596719166666667


Test set: Avg. loss: 0.0011111609141031902, AUC: 0.6449171666666667


Test set: Avg. loss: 0.001150728146235148, AUC: 0.6672355


Test set: Avg. loss: 0.003342944860458374, AUC: 0.51839325


Test set: Avg. loss: 0.0010622838338216146, AUC: 0.6769604166666667


Test set: Avg. loss: 0.0010973263184229532, AUC: 0.674305


Test set: Avg. loss: 0.0011038169860839843, AUC: 0.6984254166666665


Test set: Avg. loss: 0.004540504614512126, AUC: 0.4444675


Test set: Avg. loss: 0.0010318257808685303, AUC: 0.7069801666666667




Test set: Avg. loss: 0.003582649310429891, AUC: 0.4832521666666667


Test set: Avg. loss: 0.0009886308113733928, AUC: 0.7111300000000002


Test set: Avg. loss: 0.0011806331078211467, AUC: 0.7298126666666667


Test set: Avg. loss: 0.0010766961177190145, AUC: 0.7630954166666667


Test set: Avg. loss: 0.0034387753009796143, AUC: 0.523011


Test set: Avg. loss: 0.0010720754861831665, AUC: 0.6975636666666668


Test set: Avg. loss: 0.0012911065816879272, AUC: 0.727802


Test set: Avg. loss: 0.0014056069453557331, AUC: 0.7328014999999999


Test set: Avg. loss: 0.0037713797092437744, AUC: 0.5320789166666667


Test set: Avg. loss: 0.0011224740346272787, AUC: 0.7285950833333334


Test set: Avg. loss: 0.0012205634514490763, AUC: 0.7380878333333335


Test set: Avg. loss: 0.00104001251856486, AUC: 0.6944266666666666


Test set: Avg. loss: 0.0021871374448140463, AUC: 0.5663385000000001


Test set: Avg. loss: 0.0010685537258783976, AUC: 0.72108675


Test set: Avg. loss: 0.0011838592290878296, AUC: 0

In [11]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [4]:
# 3 class cosine distance capped smote w/ average tensor 


momentum=0
learning_rates = [1e-3, 5e-4, 1e-4]


cap_aucs = []

caps = [1, 5]

loss_fn_args = {}
loss_fn_args['distance'] = 'cosine'


for cap in caps:
    
    loss_fn_args['loss_cap'] = cap
    
    learning_rate_aucs = []

    for learning_rate in learning_rates:
        aucs = []
        for i in range(10):
            model_aucs = []
            network = models.ConvNetWithEmbeddings(NUM_CLASSES_REDUCED)
            optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
            _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True) 
            model_aucs.append(auc)
            for epoch in range(n_epochs):
                loss_fn_args['avg_tensors'] = []
                for k in range(NUM_CLASSES_REDUCED):
                    _, avg_tensor = network(avg_tensors_list[k])
                    loss_fn_args['avg_tensors'].append(avg_tensor)
                _, _ = train.train_softmax_with_embeddings(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedCELossAvgDistance, loss_fn_args=loss_fn_args)
                if (epoch + 1) % 10 == 0: 
                    _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True)
                    model_aucs.append(auc)
            aucs.append(model_aucs)
        learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

    auc_mean = np.mean(learning_rate_aucs, axis=1)
    auc_variance = np.var(learning_rate_aucs, axis=1)
    
    
    cap_aucs.append([auc_mean, auc_variance])

    
    
for c in range(len(cap_aucs)):
    auc_mean = cap_aucs[c][0]
    auc_variance = cap_aucs[c][1]
    cap = caps[c]
    for i in range(len(learning_rates)): 
        row = ["cosine_distance_capped_smote_avg", NUM_CLASSES_REDUCED, nums, ratio, learning_rates[i],
                auc_mean[i][0], auc_variance[i][0], 
                auc_mean[i][1], auc_variance[i][1],
                auc_mean[i][2], auc_variance[i][2],
                auc_mean[i][3], auc_variance[i][3], cap, norm, None]
        rows.append(row)

print(rows)



Test set: Avg. loss: 0.0011114914417266846, AUC: 0.36460475000000003


Test set: Avg. loss: 0.0010337845484415691, AUC: 0.6661265833333333



KeyboardInterrupt: 

In [None]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [15]:
# 3 class triplet loss + cosine distance capped smote 


momentum=0
learning_rates = [(5e-6, 1e-4), (1e-5, 5e-5), (1e-3, 1e-2)]

cap_aucs = []
loss_caps = [1]

start_epoch = 5
loss_fn_args = {}
loss_fn_args['distance'] = 'cosine'



for cap in loss_caps:
    
    learning_rate_aucs = []

    for learning_rate in learning_rates:
        aucs = []
        for i in range(10):
            model_aucs = []
            embed_network = models.ConvNetOnlyEmbeddings(3)
            linear_probe = models.ConvNetLinearProbe(3)
            complete_network = models.CompleteConvNet(embed_network, linear_probe)
            embed_optimizer = optim.SGD(embed_network.parameters(), lr=learning_rate[0], momentum=momentum)
            linear_optimizer = optim.SGD(complete_network.parameters(), lr=learning_rate[1], momentum=momentum)
            _, auc = metric_utils.auc_softmax(test_loader_reduced, complete_network, embeddings=True) 
            model_aucs.append(auc)
            for epoch in range(start_epoch):
                _, train_losses = train.train_triplet_loss_smote(epoch, train_loader_tripletloss_smote, embed_network, embed_optimizer, verbose=False)
                print("Train loss: " + str(np.mean(np.array(train_losses))))
            for epoch in range(start_epoch, n_epochs+1):
                loss_fn_args['loss_cap'] = cap
                loss_fn_args['avg_tensors'] = []
                for k in range(3):
                    avg_tensor = embed_network(avg_tensors_list[k])
                    loss_fn_args['avg_tensors'].append(avg_tensor)
                _, _ = train.train_softmax_with_embeddings(epoch, train_loader_smote, complete_network, linear_optimizer, verbose=False, loss_fn=loss_fns.CappedCELossAvgDistance, loss_fn_args=loss_fn_args)
                if (epoch + 1) % 10 == 0: 
                    _, auc = metric_utils.auc_softmax(test_loader_reduced, complete_network, embeddings=True)
                model_aucs.append(auc)
            aucs.append(model_aucs)
        learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

    auc_mean = np.mean(learning_rate_aucs, axis=1)
    auc_variance = np.var(learning_rate_aucs, axis=1)


    cap_aucs.append([auc_mean, auc_variance])

    
for c in range(len(cap_aucs)):
    auc_mean = cap_aucs[c][0]
    auc_variance = cap_aucs[c][1]
    cap = loss_caps[c]
    for i in range(len(learning_rates)): 
        row = ["cosine_distance_capped_smote_with_smote_triplet_loss", NUM_CLASSES_REDUCED, nums, ratio, learning_rates[i],
                auc_mean[i][0], auc_variance[i][0], 
                auc_mean[i][1], auc_variance[i][1],
                auc_mean[i][2], auc_variance[i][2],
                auc_mean[i][3], auc_variance[i][3], None, norm, "start_epoch=5"]
        rows.append(row)

print(rows)



Test set: Avg. loss: 0.0010977579752604167, AUC: 0.5471043333333333

Train loss: 0.22670290546214328
Train loss: 0.24609810189997897
Train loss: 0.2162285436975195
Train loss: 0.21934155656936322
Train loss: 0.2423914039388616

Test set: Avg. loss: 0.001086990475654602, AUC: 0.6655497499999999


Test set: Avg. loss: 0.0010696454842885335, AUC: 0.6809255833333334


Test set: Avg. loss: 0.0010534924666086832, AUC: 0.6828300833333333


Test set: Avg. loss: 0.001096967101097107, AUC: 0.5540400833333333

Train loss: 0.3043331108194716
Train loss: 0.30202328118872135
Train loss: 0.3150767704273792
Train loss: 0.3039932352431277
Train loss: 0.31126081943511963

Test set: Avg. loss: 0.0010940040747324626, AUC: 0.6071353333333332


Test set: Avg. loss: 0.0010892706712086996, AUC: 0.6661772500000002


Test set: Avg. loss: 0.0010844313303629558, AUC: 0.6989626666666666


Test set: Avg. loss: 0.0011072221994400024, AUC: 0.4322525

Train loss: 0.25621940759902306
Train loss: 0.25718629537744725
Tr

Train loss: 0.22865817369298733
Train loss: 0.2219101591313139
Train loss: 0.21008287921864935
Train loss: 0.2049694005479204
Train loss: 0.18186203621803446

Test set: Avg. loss: 0.0010946365594863893, AUC: 0.5833615


Test set: Avg. loss: 0.001084994633992513, AUC: 0.6356315


Test set: Avg. loss: 0.0010774826606114705, AUC: 0.65485025


Test set: Avg. loss: 0.001106550971666972, AUC: 0.39227341666666665

Train loss: 0.12493679219103875
Train loss: 0.03730655477402058
Train loss: 0.027834622657045404
Train loss: 0.017303673003582244
Train loss: 0.01561295656447715

Test set: Avg. loss: 0.0009611998597780863, AUC: 0.7801376666666666


Test set: Avg. loss: 0.0015269558827082316, AUC: 0.7485891666666666


Test set: Avg. loss: 0.0014714843034744263, AUC: 0.8214801666666666


Test set: Avg. loss: 0.001093711773554484, AUC: 0.6077783333333334

Train loss: 0.06810512289087824
Train loss: 0.025651184548722936
Train loss: 0.019604093977745544
Train loss: 0.012258205261636288
Train loss: 0.025

In [16]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [None]:
# 3 class triplet loss capped smote


momentum=0
learning_rates = [5e-2]

cap_aucs = []

start_epoch = 2

loss_caps = [1, 5, 10]
loss_fn_args = {}


for loss_cap in loss_caps:
    
    loss_fn_args['loss_cap'] = loss_cap
    
    learning_rate_aucs = []

    for learning_rate in learning_rates:
        aucs = []
        for i in range(10):
            model_aucs = []
            network = models.ConvNetWithEmbeddings(NUM_CLASSES_REDUCED)
            optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
            _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True) 
            model_aucs.append(auc)
            for epoch in range(start_epoch):
                loss_fn_args['loss_cap'] = None
                _, _ = train.train_softmax_with_embeddings(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedCELoss, loss_fn_args=loss_fn_args)
            for epoch in range(start_epoch, n_epochs + 1):
                loss_fn_args['loss_cap'] = loss_cap
                _, _ = train.train_triplet_capped_loss(epoch, train_loader_tripletloss_smote, network, optimizer, verbose=False, cap_calc=loss_fns.TripletLoss,loss_fn=loss_fns.CappedCELoss, loss_fn_args=loss_fn_args)
                if (epoch + 1) % 10 == 0: 
                    _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True)
                    model_aucs.append(auc)
            aucs.append(model_aucs)
        learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

    auc_mean = np.mean(learning_rate_aucs, axis=1)
    auc_variance = np.var(learning_rate_aucs, axis=1)
    
    
    cap_aucs.append([auc_mean, auc_variance])

    
    
for c in range(len(cap_aucs)):
    auc_mean = cap_aucs[c][0]
    auc_variance = cap_aucs[c][1]
    cap = loss_caps[c]
    for i in range(len(learning_rates)): 
        row = ["triplet_loss_capped_smote", NUM_CLASSES_REDUCED, nums, ratio, learning_rates[i],
                auc_mean[i][0], auc_variance[i][0], 
                auc_mean[i][1], auc_variance[i][1],
                auc_mean[i][2], auc_variance[i][2],
                auc_mean[i][3], auc_variance[i][3], cap, norm]
        rows.append(row)

print(rows)



Test set: Avg. loss: 0.0010990204016367595, AUC: 0.5136136666666666


Test set: Avg. loss: 0.0019028458992640176, AUC: 0.7512252500000001


Test set: Avg. loss: 0.002086396058400472, AUC: 0.7691906666666668


Test set: Avg. loss: 0.002416961034138997, AUC: 0.7808670000000001


Test set: Avg. loss: 0.0010924596786499024, AUC: 0.5750945000000001


Test set: Avg. loss: 0.001673059900601705, AUC: 0.7794949999999999


Test set: Avg. loss: 0.002390830198923747, AUC: 0.7884782499999999


Test set: Avg. loss: 0.002485786517461141, AUC: 0.7885251666666666


Test set: Avg. loss: 0.0010973639885584513, AUC: 0.5168527500000001


Test set: Avg. loss: 0.001288370688756307, AUC: 0.7993951666666667


Test set: Avg. loss: 0.001963376998901367, AUC: 0.8154784999999999


Test set: Avg. loss: 0.0018575411240259806, AUC: 0.8272758333333333


Test set: Avg. loss: 0.0011002889474232991, AUC: 0.5800430833333333


Test set: Avg. loss: 0.0014540077050526937, AUC: 0.7614485000000001


Test set: Avg. loss: 0.002

In [None]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 