In [54]:
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
import os 
from warnings import simplefilter
import pandas as pd
from imblearn.over_sampling import SMOTE  


import models
import class_sampling
import train
import metric_utils
import inference
import loss_fns
import torchvision.ops 

NUM_CLASSES = 10
n_epochs = 30
batch_size_train = 64
batch_size_test = 1000
momentum = 0

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

CLASS_LABELS = {'airplane': 0,
                 'automobile': 1,
                 'bird': 2,
                 'cat': 3,
                 'deer': 4,
                 'dog': 5,
                 'frog': 6,
                 'horse': 7,
                 'ship': 8,
                 'truck': 9}


simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=UserWarning)
simplefilter(action='ignore', category=DeprecationWarning)

col_names = ["name", 
            "num_classes", 
            "classes_used", 
            "ratio", 
            "learning_rate", 
            "mean_0", "variance_0",
            "mean_10", "variance_10",
            "mean_20", "variance_20",
            "mean_30", "variance_30",
          #   "mean_40", "variance_40",
          #   "mean_50", "variance_50",
             "cap", "normalization"]

rows = []

In [55]:
# 3 classes

NUM_CLASSES_REDUCED = 3
nums = (0, 3, 1)
ratio = (200, 20, 1)

norm=True

if norm:
    transform=torchvision.transforms.Compose([torchvision.transforms.Normalize(mean=[134.1855, 122.7346, 118.3749], std=[70.5125, 64.4848, 66.5604])])
else:
    transform=None

    
train_CIFAR10 = torchvision.datasets.CIFAR10('cifar10', train=True, download=True,
                             transform=transform)  

test_CIFAR10 = torchvision.datasets.CIFAR10('cifar10', train=False, download=True,
                             transform=transform)  

train_CIFAR10.data = train_CIFAR10.data.reshape(50000, 3, 32, 32)
test_CIFAR10.data = test_CIFAR10.data.reshape(10000, 3, 32, 32)


reduced_train_CIFAR10 = class_sampling.Reduce(train_CIFAR10, NUM_CLASSES_REDUCED, nums=nums, CIFAR=True, transform=transform)
reduced_test_CIFAR10 = class_sampling.Reduce(test_CIFAR10, NUM_CLASSES_REDUCED, nums=nums, CIFAR=True, transform=transform)

ratio_train_CIFAR10 = class_sampling.Ratio(train_CIFAR10, NUM_CLASSES_REDUCED, ratio, nums=nums, transform=transform)
targets = ratio_train_CIFAR10.labels 
class_count = np.unique(targets, return_counts=True)[1]

smote_train_CIFAR10 = class_sampling.Smote(ratio_train_CIFAR10, 5000 * NUM_CLASSES_REDUCED, transform=transform)

triplet_train_CIFAR10 = class_sampling.ForTripletLoss(reduced_train_CIFAR10, smote=False, transform=transform, num_classes=3)
triplet_ratio_train_CIFAR10 = class_sampling.ForTripletLoss(ratio_train_CIFAR10, smote=False, transform=transform, num_classes=3)
triplet_smote_train_CIFAR10 = class_sampling.ForTripletLoss(smote_train_CIFAR10, smote=True, transform=transform, num_classes=3)

weight = 1. / class_count
samples_weight = weight[targets]
samples_weight = torch.from_numpy(samples_weight)
oversampler = torch.utils.data.WeightedRandomSampler(samples_weight, int(max(class_count) * NUM_CLASSES_REDUCED), replacement=True)
sampler = torch.utils.data.WeightedRandomSampler(samples_weight, len(samples_weight), replacement=True)
undersampler = torch.utils.data.WeightedRandomSampler(samples_weight, int(min(class_count) * NUM_CLASSES_REDUCED), replacement=False)

weight *= max(class_count)

train_loader_reduced = DataLoader(reduced_train_CIFAR10, batch_size=batch_size_train, shuffle=True)  

train_loader_ratio = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, shuffle=True) 

train_loader_oversampled = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, sampler=oversampler)

train_loader_undersampled = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, sampler=undersampler)

train_loader_sampled = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, sampler=sampler)

train_loader_smote = DataLoader(smote_train_CIFAR10, batch_size=batch_size_train, shuffle=True)

train_loader_tripletloss = DataLoader(triplet_train_CIFAR10, batch_size=batch_size_train, shuffle=True)

train_loader_tripletloss_ratio = DataLoader(triplet_ratio_train_CIFAR10, batch_size=batch_size_train, shuffle=True)

train_loader_tripletloss_smote = DataLoader(triplet_smote_train_CIFAR10, batch_size=batch_size_train, shuffle=True)

test_loader_reduced = DataLoader(reduced_test_CIFAR10, batch_size=batch_size_test, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [12]:
# 3 class normal

learning_rates = [1e-4, 1e-3, 5e-4]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_reduced, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["normal", 3, nums, (1, 1, 1), learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3], None, norm]
    rows.append(row)


Test set: Avg. loss: 0.006297228495279948, AUC: 0.40847066666666665


Test set: Avg. loss: 0.0011013633012771607, AUC: 0.4996666666666667


Test set: Avg. loss: 0.001100907325744629, AUC: 0.4998333333333333


Test set: Avg. loss: 0.0011005379756291707, AUC: 0.5


Test set: Avg. loss: 0.0034327227274576822, AUC: 0.5633294166666666


Test set: Avg. loss: 0.0011028808355331421, AUC: 0.49883833333333333


Test set: Avg. loss: 0.001102209726969401, AUC: 0.49883683333333334


Test set: Avg. loss: 0.001101684848467509, AUC: 0.49950083333333334


Test set: Avg. loss: 0.001514667789141337, AUC: 0.4563873333333334


Test set: Avg. loss: 0.001086326042811076, AUC: 0.5949351666666667


Test set: Avg. loss: 0.0010733679135640462, AUC: 0.6530543333333334


Test set: Avg. loss: 0.0010519079367319743, AUC: 0.6970986666666668


Test set: Avg. loss: 0.006923106988271078, AUC: 0.46537175000000003


Test set: Avg. loss: 0.001098378618558248, AUC: 0.5143645000000001


Test set: Avg. loss: 0.00109715338548

In [13]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [14]:
#  3 class ratio

learning_rates =  [1e-4, 1e-3, 5e-4]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_ratio, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["ratio", 3, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3], None, norm]
    rows.append(row)


Test set: Avg. loss: 0.0034308363596598306, AUC: 0.5116964166666667


Test set: Avg. loss: 0.0027943440278371174, AUC: 0.5888170833333333


Test set: Avg. loss: 0.002969173272450765, AUC: 0.6337191666666667


Test set: Avg. loss: 0.0026788889567057293, AUC: 0.6404518333333333


Test set: Avg. loss: 0.010200992584228515, AUC: 0.5203337499999999


Test set: Avg. loss: 0.0014580308596293132, AUC: 0.5384295


Test set: Avg. loss: 0.0013363298972447714, AUC: 0.628893


Test set: Avg. loss: 0.0014429691632588705, AUC: 0.66305925


Test set: Avg. loss: 0.00281195060412089, AUC: 0.5770663333333332


Test set: Avg. loss: 0.00220535675684611, AUC: 0.6147014166666667


Test set: Avg. loss: 0.0023105909824371338, AUC: 0.6349646666666667


Test set: Avg. loss: 0.0024558690388997396, AUC: 0.6477726666666667


Test set: Avg. loss: 0.0036946707566579183, AUC: 0.5016439999999999


Test set: Avg. loss: 0.0020329121748606364, AUC: 0.5190675833333334


Test set: Avg. loss: 0.0021662095387776693, AUC: 0.5

In [15]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [16]:
# 3 class oversampled 

learning_rates = [1e-3, 5e-4, 1e-4]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_oversampled, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["oversampled", 3, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3], None, norm]
    rows.append(row)


Test set: Avg. loss: 0.007841265678405762, AUC: 0.4284085833333333


Test set: Avg. loss: 0.0010583848158518473, AUC: 0.687993


Test set: Avg. loss: 0.002482561747233073, AUC: 0.7706633333333333


Test set: Avg. loss: 0.003099634488423665, AUC: 0.7672075


Test set: Avg. loss: 0.013887118021647136, AUC: 0.42574649999999997


Test set: Avg. loss: 0.0010073497692743938, AUC: 0.7167925833333334


Test set: Avg. loss: 0.0014987698396046957, AUC: 0.7083903333333333


Test set: Avg. loss: 0.0026214383443196616, AUC: 0.7372626666666666


Test set: Avg. loss: 0.008022495905558268, AUC: 0.6410836666666667


Test set: Avg. loss: 0.0010992498795191446, AUC: 0.5000011666666667


Test set: Avg. loss: 0.001090564489364624, AUC: 0.6250315833333333


Test set: Avg. loss: 0.001974099357922872, AUC: 0.7543869166666667


Test set: Avg. loss: 0.011487732569376628, AUC: 0.5060690833333333


Test set: Avg. loss: 0.00105541463692983, AUC: 0.6406730833333333


Test set: Avg. loss: 0.00183162256081899, AUC: 

In [17]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [18]:
#  3 class SMOTE

learning_rates = [1e-4, 5e-4, 1e-3]

learning_rate_aucs = []

loss_fn_args = {}
loss_fn_args['loss_cap'] = None

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax_with_smote(epoch, train_loader_smote, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["smote", 3, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3], None, norm]
    rows.append(row)


Test set: Avg. loss: 0.0031121970812479655, AUC: 0.5074040833333333


Test set: Avg. loss: 0.001012291153271993, AUC: 0.6870238333333333


Test set: Avg. loss: 0.0010018948117891948, AUC: 0.7024875833333333


Test set: Avg. loss: 0.0009718345602353414, AUC: 0.7123998333333333


Test set: Avg. loss: 0.0036430826981862386, AUC: 0.44375975000000006


Test set: Avg. loss: 0.0010793885389963785, AUC: 0.6463530000000001


Test set: Avg. loss: 0.001073374072710673, AUC: 0.6677694166666667


Test set: Avg. loss: 0.0010547064542770386, AUC: 0.6988854999999999


Test set: Avg. loss: 0.003707357088724772, AUC: 0.5160290833333333


Test set: Avg. loss: 0.0010790506998697917, AUC: 0.6308525


Test set: Avg. loss: 0.001070495843887329, AUC: 0.6627665


Test set: Avg. loss: 0.0010717769861221314, AUC: 0.6658920833333334


Test set: Avg. loss: 0.004282213370005289, AUC: 0.5589395833333334


Test set: Avg. loss: 0.0010035297473271689, AUC: 0.7068214999999999


Test set: Avg. loss: 0.001014583845933278

In [19]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [46]:
# 3 class capped smote 


momentum=0
learning_rates = [1e-3, 5e-4, 1e-4]


cap_aucs = []

caps = [1, 5, 10]

for cap in caps:
    
    loss_fn_args = {}
    loss_fn_args['loss_cap'] = cap
    
    learning_rate_aucs = []

    for learning_rate in learning_rates:
        aucs = []
        for i in range(10):
            model_aucs = []
            network = models.ConvNet(NUM_CLASSES_REDUCED)
            optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
            _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
            model_aucs.append(auc)
            for epoch in range(n_epochs):
                _, _ = train.train_softmax_with_smote(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedCELoss, loss_fn_args=loss_fn_args)
                if (epoch + 1) % 10 == 0: 
                    _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                    model_aucs.append(auc)
            aucs.append(model_aucs)
        learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

    auc_mean = np.mean(learning_rate_aucs, axis=1)
    auc_variance = np.var(learning_rate_aucs, axis=1)
    
    
    cap_aucs.append([auc_mean, auc_variance])

    
    
for c in range(len(cap_aucs)):
    auc_mean = cap_aucs[c][0]
    auc_variance = cap_aucs[c][1]
    cap = caps[c]
    for i in range(len(learning_rates)): 
        row = ["capped_smote", NUM_CLASSES_REDUCED, nums, ratio, learning_rates[i],
                auc_mean[i][0], auc_variance[i][0], 
                auc_mean[i][1], auc_variance[i][1],
                auc_mean[i][2], auc_variance[i][2],
                auc_mean[i][3], auc_variance[i][3], cap, norm]
        rows.append(row)

print(rows)


Test set: Avg. loss: 0.004110835552215576, AUC: 0.5681081666666667


Test set: Avg. loss: 0.002226279099782308, AUC: 0.6757903333333334


Test set: Avg. loss: 0.002117084503173828, AUC: 0.6495369999999999


Test set: Avg. loss: 0.0017974026203155518, AUC: 0.6927331666666667


Test set: Avg. loss: 0.0031340863704681396, AUC: 0.512281


Test set: Avg. loss: 0.00269014310836792, AUC: 0.6720663333333333


Test set: Avg. loss: 0.002087400476137797, AUC: 0.6719768333333334


Test set: Avg. loss: 0.0021087907155354817, AUC: 0.7046515000000001


Test set: Avg. loss: 0.004356918017069499, AUC: 0.4547190833333333


Test set: Avg. loss: 0.002342305024464925, AUC: 0.6622166666666667


Test set: Avg. loss: 0.0020787399609883627, AUC: 0.6480883333333334


Test set: Avg. loss: 0.0017181493838628133, AUC: 0.6838915


Test set: Avg. loss: 0.00953905709584554, AUC: 0.4645290833333333


Test set: Avg. loss: 0.0023876633644104002, AUC: 0.6692125


Test set: Avg. loss: 0.0022252449989318846, AUC: 0.694723


Test set: Avg. loss: 0.0011222988367080688, AUC: 0.6803429166666667


Test set: Avg. loss: 0.0011310815811157227, AUC: 0.7075522500000001


Test set: Avg. loss: 0.00996462885538737, AUC: 0.6086563333333334


Test set: Avg. loss: 0.001101412057876587, AUC: 0.5254231666666667


Test set: Avg. loss: 0.001097700317700704, AUC: 0.6515381666666666


Test set: Avg. loss: 0.0011688193480173747, AUC: 0.6121513333333334


Test set: Avg. loss: 0.003925378719965617, AUC: 0.569997


Test set: Avg. loss: 0.0010989242792129516, AUC: 0.5


Test set: Avg. loss: 0.001098648428916931, AUC: 0.5


Test set: Avg. loss: 0.0010986142953236897, AUC: 0.5


Test set: Avg. loss: 0.003779754082361857, AUC: 0.5326775833333333


Test set: Avg. loss: 0.0010660351912180582, AUC: 0.6873114166666667


Test set: Avg. loss: 0.0010976146856943767, AUC: 0.6920904999999999


Test set: Avg. loss: 0.0010330626169840495, AUC: 0.7241146666666666


Test set: Avg. loss: 0.009398465474446614, AUC: 0.43843733333333335


Test set: A


Test set: Avg. loss: 0.0010924318631490072, AUC: 0.63493825


Test set: Avg. loss: 0.0011097737153371175, AUC: 0.6636036666666667


Test set: Avg. loss: 0.0010841543674468994, AUC: 0.6970113333333333


Test set: Avg. loss: 0.0032196237246195475, AUC: 0.410906


Test set: Avg. loss: 0.0010428651968638102, AUC: 0.7286165000000001


Test set: Avg. loss: 0.0011829702854156494, AUC: 0.7313481666666667


Test set: Avg. loss: 0.0013428943554560343, AUC: 0.7375265833333332


Test set: Avg. loss: 0.00943775240580241, AUC: 0.43502641666666664


Test set: Avg. loss: 0.001110884428024292, AUC: 0.56527575


Test set: Avg. loss: 0.0010660007397333782, AUC: 0.6873901666666667


Test set: Avg. loss: 0.001128097176551819, AUC: 0.6843335


Test set: Avg. loss: 0.010154059727986654, AUC: 0.4283825833333333


Test set: Avg. loss: 0.0010519113540649414, AUC: 0.7015693333333334


Test set: Avg. loss: 0.0011187071800231934, AUC: 0.6874246666666668


Test set: Avg. loss: 0.0011560682853062948, AUC: 0.7038700

In [47]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [48]:
# 3 class euclidean distance capped smote 


momentum=0
learning_rates = [1e-3, 5e-4, 1e-4]


cap_aucs = []

caps = [1, 5, 10]

loss_fn_args = {}
loss_fn_args['distance'] = 'euclidean'


for cap in caps:
    
    loss_fn_args['loss_cap'] = cap
    
    learning_rate_aucs = []

    for learning_rate in learning_rates:
        aucs = []
        for i in range(10):
            model_aucs = []
            network = models.ConvNetWithEmbeddings(NUM_CLASSES_REDUCED)
            optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
            _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True) 
            model_aucs.append(auc)
            for epoch in range(n_epochs):
                _, _ = train.train_softmax_with_embeddings(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedCELoss, loss_fn_args=loss_fn_args)
                if (epoch + 1) % 10 == 0: 
                    _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True)
                    model_aucs.append(auc)
            aucs.append(model_aucs)
        learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

    auc_mean = np.mean(learning_rate_aucs, axis=1)
    auc_variance = np.var(learning_rate_aucs, axis=1)
    
    
    cap_aucs.append([auc_mean, auc_variance])

    
    
for c in range(len(cap_aucs)):
    auc_mean = cap_aucs[c][0]
    auc_variance = cap_aucs[c][1]
    cap = caps[c]
    for i in range(len(learning_rates)): 
        row = ["distance_capped_smote", NUM_CLASSES_REDUCED, nums, ratio, learning_rates[i],
                auc_mean[i][0], auc_variance[i][0], 
                auc_mean[i][1], auc_variance[i][1],
                auc_mean[i][2], auc_variance[i][2],
                auc_mean[i][3], auc_variance[i][3], cap, norm]
        rows.append(row)

print(rows)



Test set: Avg. loss: 0.0053888258934020996, AUC: 0.5083743333333334


Test set: Avg. loss: 0.0011163065830866496, AUC: 0.5555713333333334


Test set: Avg. loss: 0.0011519194841384888, AUC: 0.6339973333333333


Test set: Avg. loss: 0.0012265472412109374, AUC: 0.6482106666666666


Test set: Avg. loss: 0.004785251299540202, AUC: 0.45894758333333335


Test set: Avg. loss: 0.0010515480041503906, AUC: 0.6808616666666666


Test set: Avg. loss: 0.001097139040629069, AUC: 0.6735478333333332


Test set: Avg. loss: 0.001158262848854065, AUC: 0.6645963333333333


Test set: Avg. loss: 0.002081297715504964, AUC: 0.5468329166666667


Test set: Avg. loss: 0.0010992099046707154, AUC: 0.6545398333333333


Test set: Avg. loss: 0.001181641697883606, AUC: 0.6227119999999999


Test set: Avg. loss: 0.0012087234656016032, AUC: 0.6747756666666667


Test set: Avg. loss: 0.0029571340878804526, AUC: 0.42823191666666666


Test set: Avg. loss: 0.0010765260457992554, AUC: 0.6793964166666666


Test set: Avg. loss: 0


Test set: Avg. loss: 0.0011079335610071819, AUC: 0.6403586666666666


Test set: Avg. loss: 0.0011695301135381062, AUC: 0.602681


Test set: Avg. loss: 0.0012256291309992473, AUC: 0.6285350833333333


Test set: Avg. loss: 0.005145518620808919, AUC: 0.44634475


Test set: Avg. loss: 0.0010701054732004801, AUC: 0.6592354166666666


Test set: Avg. loss: 0.0011435633500417074, AUC: 0.6172679166666667


Test set: Avg. loss: 0.0011791830857594808, AUC: 0.64276475


Test set: Avg. loss: 0.005740523020426432, AUC: 0.41456291666666667


Test set: Avg. loss: 0.0011070729494094848, AUC: 0.545332


Test set: Avg. loss: 0.0011548689603805542, AUC: 0.5669795000000001


Test set: Avg. loss: 0.001200473427772522, AUC: 0.6261900833333334


Test set: Avg. loss: 0.0022632593313852947, AUC: 0.4380240833333333


Test set: Avg. loss: 0.0010723592837651571, AUC: 0.6887599999999999


Test set: Avg. loss: 0.0011568965911865235, AUC: 0.6591684999999999


Test set: Avg. loss: 0.0012437734206517538, AUC: 0.604858


Test set: Avg. loss: 0.0011914784510930379, AUC: 0.6240734166666666


Test set: Avg. loss: 0.023554405212402343, AUC: 0.4700285


Test set: Avg. loss: 0.0010850404898325602, AUC: 0.6233955


Test set: Avg. loss: 0.0011104260285695394, AUC: 0.6535688333333333


Test set: Avg. loss: 0.0011573739051818847, AUC: 0.6540090833333333


Test set: Avg. loss: 0.00530566676457723, AUC: 0.4907729166666667


Test set: Avg. loss: 0.0011098883152008056, AUC: 0.5446788333333333


Test set: Avg. loss: 0.001145491639773051, AUC: 0.5830515833333333


Test set: Avg. loss: 0.0011320642630259195, AUC: 0.6684396666666665


Test set: Avg. loss: 0.0012915862401326498, AUC: 0.5365515


Test set: Avg. loss: 0.0011070706844329833, AUC: 0.5431425


Test set: Avg. loss: 0.0011233591238657633, AUC: 0.6475516666666667


Test set: Avg. loss: 0.0011704811652501425, AUC: 0.6520456666666666


Test set: Avg. loss: 0.009043893178304037, AUC: 0.3958669166666666


Test set: Avg. loss: 0.0010925205945968629, AUC: 0.606327833

In [49]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [50]:
# 3 class cosine distance capped smote 


momentum=0
learning_rates = [1e-3, 5e-4, 1e-4]


cap_aucs = []

caps = [1, 5, 10]

loss_fn_args = {}
loss_fn_args['distance'] = 'cosine'


for cap in caps:
    
    loss_fn_args['loss_cap'] = cap
    
    learning_rate_aucs = []

    for learning_rate in learning_rates:
        aucs = []
        for i in range(10):
            model_aucs = []
            network = models.ConvNetWithEmbeddings(NUM_CLASSES_REDUCED)
            optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
            _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True) 
            model_aucs.append(auc)
            for epoch in range(n_epochs):
                _, _ = train.train_softmax_with_embeddings(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedCELoss, loss_fn_args=loss_fn_args)
                if (epoch + 1) % 10 == 0: 
                    _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True)
                    model_aucs.append(auc)
            aucs.append(model_aucs)
        learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

    auc_mean = np.mean(learning_rate_aucs, axis=1)
    auc_variance = np.var(learning_rate_aucs, axis=1)
    
    
    cap_aucs.append([auc_mean, auc_variance])

    
    
for c in range(len(cap_aucs)):
    auc_mean = cap_aucs[c][0]
    auc_variance = cap_aucs[c][1]
    cap = caps[c]
    for i in range(len(learning_rates)): 
        row = ["cosine_distance_capped_smote", NUM_CLASSES_REDUCED, nums, ratio, learning_rates[i],
                auc_mean[i][0], auc_variance[i][0], 
                auc_mean[i][1], auc_variance[i][1],
                auc_mean[i][2], auc_variance[i][2],
                auc_mean[i][3], auc_variance[i][3], cap, norm]
        rows.append(row)

print(rows)



Test set: Avg. loss: 0.009307427088419596, AUC: 0.5663711666666666


Test set: Avg. loss: 0.0010381158590316772, AUC: 0.6928776666666666


Test set: Avg. loss: 0.0010394632418950398, AUC: 0.7083165833333332


Test set: Avg. loss: 0.001089628259340922, AUC: 0.7163693333333333


Test set: Avg. loss: 0.003905921379725138, AUC: 0.4067528333333334


Test set: Avg. loss: 0.001106173316637675, AUC: 0.5916745833333333


Test set: Avg. loss: 0.0011440603733062745, AUC: 0.6577873333333334


Test set: Avg. loss: 0.0011451536019643149, AUC: 0.7221175


Test set: Avg. loss: 0.003082738240559896, AUC: 0.40130899999999997


Test set: Avg. loss: 0.0010982749462127685, AUC: 0.6372173333333332


Test set: Avg. loss: 0.0011223978598912557, AUC: 0.6647081666666667


Test set: Avg. loss: 0.0011434684197107951, AUC: 0.6945667499999999


Test set: Avg. loss: 0.009841351509094239, AUC: 0.44450175


Test set: Avg. loss: 0.0010697311162948609, AUC: 0.6591475


Test set: Avg. loss: 0.001145783543586731, AUC: 0.


Test set: Avg. loss: 0.0011165897448857625, AUC: 0.6467489999999999


Test set: Avg. loss: 0.001058436155319214, AUC: 0.7070731666666666


Test set: Avg. loss: 0.007202654838562012, AUC: 0.5093048333333333


Test set: Avg. loss: 0.0010309611161549886, AUC: 0.6877974166666667


Test set: Avg. loss: 0.0010366814136505127, AUC: 0.7073982500000001


Test set: Avg. loss: 0.0010591098070144654, AUC: 0.7130885


Test set: Avg. loss: 0.006620848178863525, AUC: 0.5186609166666667


Test set: Avg. loss: 0.0010601287682851155, AUC: 0.6800023333333334


Test set: Avg. loss: 0.0011205731630325318, AUC: 0.6398495833333334


Test set: Avg. loss: 0.0011403633753458659, AUC: 0.6908668333333333


Test set: Avg. loss: 0.0022953606446584067, AUC: 0.4950433333333333


Test set: Avg. loss: 0.0010856351852416993, AUC: 0.6545500833333334


Test set: Avg. loss: 0.0011217998266220093, AUC: 0.6681236666666667


Test set: Avg. loss: 0.0011196550925572713, AUC: 0.7141925000000001


Test set: Avg. loss: 0.00254433


Test set: Avg. loss: 0.001116778572400411, AUC: 0.6900645


Test set: Avg. loss: 0.01625508181254069, AUC: 0.4664030833333333


Test set: Avg. loss: 0.0010741564830144246, AUC: 0.6418016666666667


Test set: Avg. loss: 0.0010800315936406453, AUC: 0.6800916666666666


Test set: Avg. loss: 0.0011884698470433553, AUC: 0.63988025


Test set: Avg. loss: 0.008342554728190104, AUC: 0.42451666666666665


Test set: Avg. loss: 0.0011109998226165772, AUC: 0.5107818333333333


Test set: Avg. loss: 0.0011286404530207316, AUC: 0.6413061666666667


Test set: Avg. loss: 0.0012048654556274414, AUC: 0.6312376666666665


Test set: Avg. loss: 0.01494838809967041, AUC: 0.4806551666666667


Test set: Avg. loss: 0.0010929774443308512, AUC: 0.5900256666666666


Test set: Avg. loss: 0.0010437474648157755, AUC: 0.6801426666666667


Test set: Avg. loss: 0.0010652991930643718, AUC: 0.6801737500000001


Test set: Avg. loss: 0.00754862642288208, AUC: 0.48777675000000004


Test set: Avg. loss: 0.0010817113320032755

In [51]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [None]:
# 3 class triplet loss capped smote


momentum=0
learning_rates = [5e-2]

cap_aucs = []

start_epoch = 2

loss_caps = [0.5]
loss_fn_args = {}


for loss_cap in loss_caps:
    
    loss_fn_args['loss_cap'] = loss_cap
    
    learning_rate_aucs = []

    for learning_rate in learning_rates:
        aucs = []
        for i in range(10):
            model_aucs = []
            network = models.ConvNetWithEmbeddings(NUM_CLASSES_REDUCED)
            optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
            _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True) 
            model_aucs.append(auc)
            for epoch in range(start_epoch):
                loss_fn_args['loss_cap'] = None
                _, _ = train.train_softmax_with_embeddings(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedCELoss, loss_fn_args=loss_fn_args)
            for epoch in range(start_epoch, n_epochs + 1):
                loss_fn_args['loss_cap'] = loss_cap
                _, _ = train.train_triplet_capped_loss(epoch, train_loader_tripletloss_smote, network, optimizer, verbose=False, cap_calc=loss_fns.TripletLoss,loss_fn=loss_fns.CappedCELoss, loss_fn_args=loss_fn_args)
                if (epoch + 1) % 10 == 0: 
                    _, auc = metric_utils.auc_softmax(test_loader_reduced, network, embeddings=True)
                    model_aucs.append(auc)
            aucs.append(model_aucs)
        learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

    auc_mean = np.mean(learning_rate_aucs, axis=1)
    auc_variance = np.var(learning_rate_aucs, axis=1)
    
    
    cap_aucs.append([auc_mean, auc_variance])

    
    
for c in range(len(cap_aucs)):
    auc_mean = cap_aucs[c][0]
    auc_variance = cap_aucs[c][1]
    cap = loss_caps[c]
    for i in range(len(learning_rates)): 
        row = ["triplet_loss_capped_smote", NUM_CLASSES_REDUCED, nums, ratio, learning_rates[i],
                auc_mean[i][0], auc_variance[i][0], 
                auc_mean[i][1], auc_variance[i][1],
                auc_mean[i][2], auc_variance[i][2],
                auc_mean[i][3], auc_variance[i][3], cap, norm]
        rows.append(row)

print(rows)



Test set: Avg. loss: 0.0010990204016367595, AUC: 0.5136136666666666


Test set: Avg. loss: 0.0019028458992640176, AUC: 0.7512252500000001


Test set: Avg. loss: 0.002086396058400472, AUC: 0.7691906666666668


Test set: Avg. loss: 0.002416961034138997, AUC: 0.7808670000000001


Test set: Avg. loss: 0.0010924596786499024, AUC: 0.5750945000000001


Test set: Avg. loss: 0.001673059900601705, AUC: 0.7794949999999999


Test set: Avg. loss: 0.002390830198923747, AUC: 0.7884782499999999


Test set: Avg. loss: 0.002485786517461141, AUC: 0.7885251666666666


Test set: Avg. loss: 0.0010973639885584513, AUC: 0.5168527500000001


Test set: Avg. loss: 0.001288370688756307, AUC: 0.7993951666666667


Test set: Avg. loss: 0.001963376998901367, AUC: 0.8154784999999999


Test set: Avg. loss: 0.0018575411240259806, AUC: 0.8272758333333333


Test set: Avg. loss: 0.0011002889474232991, AUC: 0.5800430833333333


Test set: Avg. loss: 0.0014540077050526937, AUC: 0.7614485000000001


Test set: Avg. loss: 0.002

In [None]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 