In [1]:
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
import os 
from warnings import simplefilter
import pandas as pd
from imblearn.over_sampling import SMOTE  

In [2]:
import models
import class_sampling
import train
import metric_utils
import inference
import loss_fns
import torchvision.ops 

In [3]:
NUM_CLASSES = 10
n_epochs = 30
batch_size_train = 64
batch_size_test = 1000
momentum = 0

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

NUM_CLASSES_REDUCED = 2
nums = (0, 1)
ratio = (100, 1)

CLASS_LABELS = {'airplane': 0,
                 'automobile': 1,
                 'bird': 2,
                 'cat': 3,
                 'deer': 4,
                 'dog': 5,
                 'frog': 6,
                 'horse': 7,
                 'ship': 8,
                 'truck': 9}


simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=UserWarning)
simplefilter(action='ignore', category=DeprecationWarning)

In [4]:
col_names = ["name", 
            "num_classes", 
            "classes_used", 
            "ratio", 
            "learning_rate", 
            "mean_0", "variance_0",
            "mean_10", "variance_10",
            "mean_20", "variance_20",
            "mean_30", "variance_30",
          #   "mean_40", "variance_40",
          #   "mean_50", "variance_50",
             "cap"]

rows = []

In [5]:
train_CIFAR10 = torchvision.datasets.CIFAR10('cifar10', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor() ]))  


test_CIFAR10 = torchvision.datasets.CIFAR10('cifar10', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor()]))

train_CIFAR10.data = train_CIFAR10.data.reshape(50000, 3, 32, 32)
test_CIFAR10.data = test_CIFAR10.data.reshape(10000, 3, 32, 32)

    
reduced_train_CIFAR10 = class_sampling.Reduce(train_CIFAR10, NUM_CLASSES_REDUCED, nums=nums, CIFAR=True)
reduced_test_CIFAR10 = class_sampling.Reduce(test_CIFAR10, NUM_CLASSES_REDUCED, nums=nums, CIFAR=True)

ratio_train_CIFAR10 = class_sampling.Ratio(train_CIFAR10, NUM_CLASSES_REDUCED, ratio, nums=nums)

triplet_train_CIFAR10 = class_sampling.ForTripletLoss(reduced_train_CIFAR10, smote=False)

smote_train_CIFAR10 = class_sampling.Smote(ratio_train_CIFAR10, 5000 * NUM_CLASSES_REDUCED)
triplet_smote_train_CIFAR10= class_sampling.ForTripletLoss(smote_train_CIFAR10, smote=True)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
targets = ratio_train_CIFAR10.labels 

class_count = np.unique(targets, return_counts=True)[1]
print(class_count)

weight = 1. / class_count

samples_weight = weight[targets]
samples_weight = torch.from_numpy(samples_weight)
oversampler = torch.utils.data.WeightedRandomSampler(samples_weight, int(max(class_count) * NUM_CLASSES_REDUCED), replacement=True)
sampler = torch.utils.data.WeightedRandomSampler(samples_weight, len(samples_weight), replacement=True)
undersampler = torch.utils.data.WeightedRandomSampler(samples_weight, int(min(class_count) * NUM_CLASSES_REDUCED), replacement=False)
undersampler_smote = torch.utils.data.WeightedRandomSampler(samples_weight, int(min(class_count) * 50 * NUM_CLASSES_REDUCED), replacement=False)
weight *= class_count[0]

[5000   50]


In [7]:
beta = 0.999 

exp = np.empty_like(targets)
for i, count in enumerate(class_count):
    exp[targets==i] = count
effective_weights = (1 - beta) / ( 1 - (beta ** torch.from_numpy(exp)))

In [8]:
train_loader_reduced = DataLoader(reduced_train_CIFAR10, batch_size=batch_size_train, shuffle=True)  

train_loader_ratio = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, shuffle=True) 

train_loader_oversampled = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, sampler=oversampler)

train_loader_undersampled = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, sampler=undersampler)

train_loader_sampled = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, sampler=sampler)

train_loader_smote = DataLoader(smote_train_CIFAR10, batch_size=batch_size_train, shuffle=True)

train_loader_smote_undersampled = DataLoader(smote_train_CIFAR10, batch_size=batch_size_train, sampler=undersampler_smote)

train_loader_tripletloss = DataLoader(triplet_train_CIFAR10, batch_size=batch_size_train, shuffle=True)

train_loader_tripletloss_smote = DataLoader(triplet_smote_train_CIFAR10, batch_size=batch_size_train, shuffle=True)

test_loader_reduced = DataLoader(reduced_test_CIFAR10, batch_size=batch_size_test, shuffle=True)

In [64]:
# 2 CLASS normal

momentum=0
learning_rates = [5e-4, 1e-3]

learning_rate_aucs = []


for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_sigmoid(epoch, train_loader_reduced, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["normal", 2, nums, (1, 1), learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)



Test set: Avg. loss: 0.001968830943107605, AUC: 0.6361665000000001


Test set: Avg. loss: 0.0005542738735675812, AUC: 0.885664


Test set: Avg. loss: 0.0005292963385581971, AUC: 0.8979130000000001


Test set: Avg. loss: 0.0004977731555700302, AUC: 0.9077430000000001


Test set: Avg. loss: 0.0032240989208221437, AUC: 0.38664699999999996


Test set: Avg. loss: 0.0006063661277294159, AUC: 0.855749


Test set: Avg. loss: 0.0005620519518852234, AUC: 0.8903450000000002


Test set: Avg. loss: 0.0005382097363471985, AUC: 0.898165


Test set: Avg. loss: 0.006873347997665405, AUC: 0.683021


Test set: Avg. loss: 0.0005775293409824372, AUC: 0.867693


Test set: Avg. loss: 0.0004782587885856628, AUC: 0.9038889999999999


Test set: Avg. loss: 0.00045187199115753177, AUC: 0.9145770000000001


Test set: Avg. loss: 0.001451125681400299, AUC: 0.47390000000000004


Test set: Avg. loss: 0.0006520158350467682, AUC: 0.7826169999999999


Test set: Avg. loss: 0.0006005813181400299, AUC: 0.869262


Test set:

In [65]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []


In [12]:
# 2 CLASS ratio

momentum=0
learning_rates = [1e-3, 1e-4, 5e-4, 5e-3]

learning_rate_aucs = []
learning_rate_train_aucs = []


for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_sigmoid(epoch, train_loader_ratio, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network)
                _, train_auc = metric_utils.auc_sigmoid(train_loader_ratio, network) 
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["ratio", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)



Test set: Avg. loss: 0.0020350258350372314, AUC: 0.6529435


Test set: Avg. loss: 0.0018120235204696655, AUC: 0.6530240000000002


Test set: Avg. loss: 0.0010293059689131113, AUC: 0.668872


Test set: Avg. loss: 0.001586657702922821, AUC: 0.7140299999999998


Test set: Avg. loss: 0.0010288638518293305, AUC: 0.736928


Test set: Avg. loss: 0.0013753380179405213, AUC: 0.739265


Test set: Avg. loss: 0.0011858412239811207, AUC: 0.7685119999999999


Test set: Avg. loss: 0.0007551872730255127, AUC: 0.6657620000000001


Test set: Avg. loss: 0.002787360668182373, AUC: 0.6875020000000001


Test set: Avg. loss: 0.0008929764428222091, AUC: 0.7381840000000001


Test set: Avg. loss: 0.0019024794697761536, AUC: 0.735419


Test set: Avg. loss: 0.0007972857817122251, AUC: 0.824272


Test set: Avg. loss: 0.001780514895915985, AUC: 0.8002449999999999


Test set: Avg. loss: 0.0007506171086489564, AUC: 0.867892


Test set: Avg. loss: 0.007656799077987671, AUC: 0.357271


Test set: Avg. loss: 0.002191634


Test set: Avg. loss: 0.002572161316871643, AUC: 0.5555669999999999


Test set: Avg. loss: 0.0009350464684833394, AUC: 0.54104


Test set: Avg. loss: 0.003061597228050232, AUC: 0.6727259999999999


Test set: Avg. loss: 0.0032712137699127198, AUC: 0.501175


Test set: Avg. loss: 0.0010851466581040974, AUC: 0.507844


Test set: Avg. loss: 0.0030594792366027833, AUC: 0.565909


Test set: Avg. loss: 0.0010099006753119797, AUC: 0.566432


Test set: Avg. loss: 0.0026028387546539306, AUC: 0.6164085


Test set: Avg. loss: 0.0009181819367453013, AUC: 0.605664


Test set: Avg. loss: 0.0032295290231704713, AUC: 0.5874085


Test set: Avg. loss: 0.0014447757601737975, AUC: 0.7830085


Test set: Avg. loss: 0.0009910186712104496, AUC: 0.8013079999999999


Test set: Avg. loss: 0.0012968756556510925, AUC: 0.8132029999999999


Test set: Avg. loss: 0.0010575488423651988, AUC: 0.838224


Test set: Avg. loss: 0.0014231621026992798, AUC: 0.83391


Test set: Avg. loss: 0.0008838533683873639, AUC: 0.857556





Test set: Avg. loss: 0.0009399736156262974, AUC: 0.86164


Test set: Avg. loss: 0.0015783169865608215, AUC: 0.8476490000000001


Test set: Avg. loss: 0.0007382010094968989, AUC: 0.900012


Test set: Avg. loss: 0.0022254493236541746, AUC: 0.45958600000000005


Test set: Avg. loss: 0.001722590446472168, AUC: 0.711756


Test set: Avg. loss: 0.000926684569014181, AUC: 0.7398999999999999


Test set: Avg. loss: 0.0012482515573501588, AUC: 0.8040189999999999


Test set: Avg. loss: 0.0011589160163213712, AUC: 0.9036


Test set: Avg. loss: 0.0013439211249351502, AUC: 0.8534299999999999


Test set: Avg. loss: 0.0007977260420523067, AUC: 0.939456


Test set: Avg. loss: 0.0016305903792381287, AUC: 0.399731


Test set: Avg. loss: 0.001686771333217621, AUC: 0.728855


Test set: Avg. loss: 0.0009271248459520907, AUC: 0.7390960000000001


Test set: Avg. loss: 0.0014608917236328125, AUC: 0.7759060000000002


Test set: Avg. loss: 0.0010075070543011815, AUC: 0.814392


Test set: Avg. loss: 0.00143611270

In [13]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [20]:
# 2 CLASS oversampled

momentum=0
learning_rates = [1e-3, 5e-3]

learning_rate_aucs = []


for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_sigmoid(epoch, train_loader_oversampled, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["oversampled", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)


Test set: Avg. loss: 0.005691136598587036, AUC: 0.7166170000000001


Test set: Avg. loss: 0.000588998019695282, AUC: 0.858235


Test set: Avg. loss: 0.0005738674998283386, AUC: 0.8568835


Test set: Avg. loss: 0.000547566682100296, AUC: 0.864155


Test set: Avg. loss: 0.0024350250959396364, AUC: 0.573198


Test set: Avg. loss: 0.0006682571172714234, AUC: 0.6593775


Test set: Avg. loss: 0.0006437829434871674, AUC: 0.686171


Test set: Avg. loss: 0.0006789233386516571, AUC: 0.644326


Test set: Avg. loss: 0.002518909811973572, AUC: 0.47361


Test set: Avg. loss: 0.0005086382031440735, AUC: 0.861209


Test set: Avg. loss: 0.0012186179161071777, AUC: 0.863998


Test set: Avg. loss: 0.0014082735776901245, AUC: 0.857443


Test set: Avg. loss: 0.0012410336136817932, AUC: 0.405966


Test set: Avg. loss: 0.0007035685777664185, AUC: 0.39492449999999996


Test set: Avg. loss: 0.0006275011897087097, AUC: 0.823426


Test set: Avg. loss: 0.00088487908244133, AUC: 0.843707


Test set: Avg. loss: 0.

In [22]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:14]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [33]:
# 2 CLASS undersampled

momentum=0
learning_rates = [1e-4, 1e-5]

learning_rate_aucs = []


for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_sigmoid(epoch, train_loader_undersampled, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["undersampled", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)



Test set: Avg. loss: 0.002664048075675964, AUC: 0.35103100000000004


Test set: Avg. loss: 0.0008118733763694763, AUC: 0.39093300000000003


Test set: Avg. loss: 0.0007359330058097839, AUC: 0.42988400000000004


Test set: Avg. loss: 0.0007080410122871399, AUC: 0.476849


Test set: Avg. loss: 0.005757878303527832, AUC: 0.4101775


Test set: Avg. loss: 0.0008514148592948913, AUC: 0.663575


Test set: Avg. loss: 0.0006793743073940277, AUC: 0.6388240000000001


Test set: Avg. loss: 0.0006851064562797546, AUC: 0.599984


Test set: Avg. loss: 0.001174522042274475, AUC: 0.498127


Test set: Avg. loss: 0.0009026037454605102, AUC: 0.569188


Test set: Avg. loss: 0.0007478025853633881, AUC: 0.5202549999999999


Test set: Avg. loss: 0.0007039127349853516, AUC: 0.5297069999999999


Test set: Avg. loss: 0.0009038377702236175, AUC: 0.47197


Test set: Avg. loss: 0.0007971824407577515, AUC: 0.48862799999999995


Test set: Avg. loss: 0.0007626304626464844, AUC: 0.494027


Test set: Avg. loss: 0.00074

In [None]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [10]:
# 2 Class Weighted Loss 

momentum=0
learning_rates = [1e-3, 1e-4]

learning_rate_aucs = []

loss_fn_args = {}
loss_fn_args['pos_weight'] = torch.tensor([weight[1]])


for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_sigmoid(epoch, train_loader_ratio, network, optimizer, verbose=False, loss_fn_args=loss_fn_args)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network)
                _, train_auc = metric_utils.auc_sigmoid(train_loader_ratio, network) 
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["weighted", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)



Test set: Avg. loss: 0.005942921638488769, AUC: 0.567106


Test set: Avg. loss: 0.000693460613489151, AUC: 0.5014949999999999


Test set: Avg. loss: 0.01101493201633491, AUC: 0.498


Test set: Avg. loss: 0.0006932864785194397, AUC: 0.49949899999999997


Test set: Avg. loss: 0.010916043460959255, AUC: 0.49879999999999997


Test set: Avg. loss: 0.0006932151615619659, AUC: 0.49949999999999994


Test set: Avg. loss: 0.010851628886591089, AUC: 0.4994


Test set: Avg. loss: 0.002580095887184143, AUC: 0.52986


Test set: Avg. loss: 0.0005952333509922028, AUC: 0.8429355000000001


Test set: Avg. loss: 0.01112000550373946, AUC: 0.9091279999999999


Test set: Avg. loss: 0.0005518969893455505, AUC: 0.8701230000000001


Test set: Avg. loss: 0.009937099761301929, AUC: 0.9495319999999999


Test set: Avg. loss: 0.0005057350695133209, AUC: 0.883475


Test set: Avg. loss: 0.009651631164078665, AUC: 0.956386


Test set: Avg. loss: 0.0009529277384281159, AUC: 0.445674


Test set: Avg. loss: 0.0006936190


Test set: Avg. loss: 0.0006944212913513184, AUC: 0.5038005


Test set: Avg. loss: 0.0113616940054563, AUC: 0.5663979999999998


Test set: Avg. loss: 0.007881028413772582, AUC: 0.4730995


Test set: Avg. loss: 0.000692995399236679, AUC: 0.500478


Test set: Avg. loss: 0.010630261945252371, AUC: 0.4824239999999999


Test set: Avg. loss: 0.0006933453977108002, AUC: 0.5038505


Test set: Avg. loss: 0.010637967291444835, AUC: 0.503172


Test set: Avg. loss: 0.0006934205293655395, AUC: 0.5003850000000001


Test set: Avg. loss: 0.010647759791648033, AUC: 0.503188



In [11]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [9]:
# 2 CLASS SMOTE

momentum=0
learning_rates = [5e-3, 1e-4, 1e-3]

learning_rate_aucs = []

loss_fn_args = {}
loss_fn_args['loss_cap'] = None


for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_sigmoid_with_smote(epoch, train_loader_smote, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["smote", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)



Test set: Avg. loss: 0.001968830943107605, AUC: 0.6361665000000001


Test set: Avg. loss: 0.0005302040874958038, AUC: 0.8983949999999999


Test set: Avg. loss: 0.000634994238615036, AUC: 0.825839


Test set: Avg. loss: 0.000631784051656723, AUC: 0.8181655


Test set: Avg. loss: 0.0032240989208221437, AUC: 0.38664699999999996


Test set: Avg. loss: 0.0006933937072753906, AUC: 0.4995


Test set: Avg. loss: 0.0006931578814983368, AUC: 0.4995


Test set: Avg. loss: 0.0006931533217430115, AUC: 0.4995


Test set: Avg. loss: 0.006873347997665405, AUC: 0.683021


Test set: Avg. loss: 0.0005754781067371369, AUC: 0.821701


Test set: Avg. loss: 0.0006926852166652679, AUC: 0.7723930000000001


Test set: Avg. loss: 0.0008418424427509307, AUC: 0.7373959999999999


Test set: Avg. loss: 0.001451125681400299, AUC: 0.47390000000000004


Test set: Avg. loss: 0.0006959674656391144, AUC: 0.4995


Test set: Avg. loss: 0.0006931877434253693, AUC: 0.5


Test set: Avg. loss: 0.0006931476294994354, AUC: 0.5



In [None]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [46]:
# 2 CLASS SMOTE with undersampling

momentum=0
learning_rates = [5e-3, 1e-4, 1e-3]

learning_rate_aucs = []

loss_fn_args = {}
loss_fn_args['loss_cap'] = None


for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_sigmoid_with_smote(epoch, train_loader_smote_undersampled, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["undersampled_smote", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)



Test set: Avg. loss: 0.001968830943107605, AUC: 0.6361665000000001



TypeError: 'RandomUnderSampler' object is not iterable

In [18]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [10]:
# 2 Class Capped SMOTE 

momentum=0
learning_rates = [1e-4, 5e-4, 1e-3]


cap_aucs = []

caps = [1, 5, 10]

for cap in caps:
    
    loss_fn_args = {}
    loss_fn_args['loss_cap'] = cap
    
    learning_rate_aucs = []
    

    for learning_rate in learning_rates:
        aucs = []
        for i in range(10):
            model_aucs = []
            network = models.ConvNet(NUM_CLASSES_REDUCED)
            optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
            _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network) 
            model_aucs.append(auc)
            for epoch in range(n_epochs):
                _, _ = train.train_sigmoid_with_smote(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedBCELoss, loss_fn_args=loss_fn_args)
                if (epoch + 1) % 10 == 0: 
                    _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network)
                    model_aucs.append(auc)
            aucs.append(model_aucs)
        learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

    auc_mean = np.mean(learning_rate_aucs, axis=1)
    auc_variance = np.var(learning_rate_aucs, axis=1)
    
    
    cap_aucs.append([auc_mean, auc_variance])

    
    
for i in range(len(cap_aucs)):
    auc_mean = cap_aucs[i][0]
    auc_variance = cap_aucs[i][1]
    cap = caps[i]
    for i in range(len(learning_rates)): 
        row = ["capped_smote", 2, nums, ratio, learning_rates[i],
                auc_mean[i][0], auc_variance[i][0], 
                auc_mean[i][1], auc_variance[i][1],
                auc_mean[i][2], auc_variance[i][2],
                auc_mean[i][3], auc_variance[i][3], cap]
        rows.append(row)

print(rows)



Test set: Avg. loss: 0.000964056670665741, AUC: 0.520941


Test set: Avg. loss: 0.002801976799964905, AUC: 0.603904


Test set: Avg. loss: 0.0023315074443817137, AUC: 0.6346155000000001


Test set: Avg. loss: 0.001935368299484253, AUC: 0.6770020000000001


Test set: Avg. loss: 0.0015654934048652648, AUC: 0.5420925


Test set: Avg. loss: 0.0012076606750488282, AUC: 0.6018205


Test set: Avg. loss: 0.0006806973814964295, AUC: 0.745674


Test set: Avg. loss: 0.0006647063195705414, AUC: 0.8141400000000001


Test set: Avg. loss: 0.007722229480743408, AUC: 0.477479


Test set: Avg. loss: 0.002731645941734314, AUC: 0.535709


Test set: Avg. loss: 0.00225200891494751, AUC: 0.597371


Test set: Avg. loss: 0.0006625505685806275, AUC: 0.812553


Test set: Avg. loss: 0.0012214133143424988, AUC: 0.549326


Test set: Avg. loss: 0.002234416365623474, AUC: 0.6193310000000001


Test set: Avg. loss: 0.0015321336984634398, AUC: 0.7092240000000001


Test set: Avg. loss: 0.0010821450352668763, AUC: 0.7708


Test set: Avg. loss: 0.0006544892489910126, AUC: 0.7352115000000004


Test set: Avg. loss: 0.0006303447484970093, AUC: 0.807039


Test set: Avg. loss: 0.0006590245068073272, AUC: 0.7533475000000001


Test set: Avg. loss: 0.0007899287045001984, AUC: 0.670127


Test set: Avg. loss: 0.0006518673598766327, AUC: 0.773551


Test set: Avg. loss: 0.0006446815729141235, AUC: 0.7977499999999997


Test set: Avg. loss: 0.0006375725269317626, AUC: 0.8204609999999999


Test set: Avg. loss: 0.003895806074142456, AUC: 0.585514


Test set: Avg. loss: 0.00063050776720047, AUC: 0.8090909999999999


Test set: Avg. loss: 0.0006110207438468934, AUC: 0.831422


Test set: Avg. loss: 0.0006125293970108033, AUC: 0.8279915000000001


Test set: Avg. loss: 0.0012400510311126709, AUC: 0.5677885


Test set: Avg. loss: 0.0006437472999095917, AUC: 0.7166265


Test set: Avg. loss: 0.0006125587224960328, AUC: 0.8170925


Test set: Avg. loss: 0.0006116727888584137, AUC: 0.8358605000000001


Test set: Avg. loss: 0.000811


Test set: Avg. loss: 0.0006174682676792144, AUC: 0.8310925


Test set: Avg. loss: 0.007684100151062012, AUC: 0.4070105


Test set: Avg. loss: 0.0006202857792377472, AUC: 0.8066605


Test set: Avg. loss: 0.0006176858246326447, AUC: 0.816376


Test set: Avg. loss: 0.0006103067696094513, AUC: 0.830536


Test set: Avg. loss: 0.0013037946224212647, AUC: 0.638735


Test set: Avg. loss: 0.0005673413872718811, AUC: 0.789875


Test set: Avg. loss: 0.0005420814156532288, AUC: 0.822748


Test set: Avg. loss: 0.0005116104185581207, AUC: 0.8447049999999999


Test set: Avg. loss: 0.0007415176630020142, AUC: 0.5973919999999999


Test set: Avg. loss: 0.0006263005435466766, AUC: 0.8075709999999999


Test set: Avg. loss: 0.0005980682075023651, AUC: 0.8442180000000001


Test set: Avg. loss: 0.0006044952571392059, AUC: 0.84839


Test set: Avg. loss: 0.000744761973619461, AUC: 0.6458790000000001


Test set: Avg. loss: 0.0006299040615558624, AUC: 0.7886010000000001


Test set: Avg. loss: 0.0006408950388431

In [11]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = (col_names[0:14]))

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [None]:
# 2 CLASS Focal Loss

momentum=0
learning_rates = [1e-4, 1e-5]

learning_rate_aucs = []

loss_fn_args = {}


for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_sigmoid(epoch, train_loader_ratio, network, optimizer, verbose=False, loss_fn=loss_fns.SigmoidFocalLoss, loss_fn_args=loss_fn_args)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["focal_loss", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)


In [38]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

NameError: name 'df2' is not defined

In [12]:
# distance + capped loss
momentum=0
learning_rates = [1e-3, 5e-4, 1e-4]


    
loss_fn_args = {}
loss_fn_args['loss_cap'] = None

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNetWithEmbeddings(2)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network, embeddings=True) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_sigmoid_euclidean_distance(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedBCELoss, loss_fn_args=loss_fn_args)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network, embeddings=True)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)


for i in range(len(learning_rates)): 
    row = ["distance_capped_smote_fixed1", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)


Test set: Avg. loss: 0.0008439761102199554, AUC: 0.458455


Test set: Avg. loss: 0.0005637407302856445, AUC: 0.8840095000000001


Test set: Avg. loss: 0.0004919547736644745, AUC: 0.9045030000000001


Test set: Avg. loss: 0.000604819118976593, AUC: 0.8537779999999999


Test set: Avg. loss: 0.009120811462402343, AUC: 0.447008


Test set: Avg. loss: 0.0006216726899147034, AUC: 0.834169


Test set: Avg. loss: 0.0005918767750263214, AUC: 0.8661705000000002


Test set: Avg. loss: 0.0006442052721977234, AUC: 0.8375685


Test set: Avg. loss: 0.0017059873938560486, AUC: 0.48120300000000005


Test set: Avg. loss: 0.0005866606831550598, AUC: 0.8596909999999999


Test set: Avg. loss: 0.0005112299025058746, AUC: 0.8946060000000001


Test set: Avg. loss: 0.0006069133877754211, AUC: 0.857534


Test set: Avg. loss: 0.0013419885039329528, AUC: 0.46782999999999997


Test set: Avg. loss: 0.0006542591452598572, AUC: 0.769544


Test set: Avg. loss: 0.000525863379240036, AUC: 0.8919025


Test set: Avg. los

In [13]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [9]:
# cosine distance + capped loss
momentum=0
learning_rates = [1e-3, 5e-4, 1e-4]


    
loss_fn_args = {}
loss_fn_args['loss_cap'] = None

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNetWithEmbeddings(2)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network, embeddings=True) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_sigmoid_cosine_distance(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedBCELoss, loss_fn_args=loss_fn_args)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network, embeddings=True)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)


for i in range(len(learning_rates)): 
    row = ["cosine_distance_capped_smote", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3], 5]
    rows.append(row)


Test set: Avg. loss: 0.0011436396837234497, AUC: 0.457495


Test set: Avg. loss: 0.0005700503587722778, AUC: 0.8682060000000001


Test set: Avg. loss: 0.0005558769106864929, AUC: 0.877864


Test set: Avg. loss: 0.0007304551005363465, AUC: 0.661598


Test set: Avg. loss: 0.00553279709815979, AUC: 0.2802325


Test set: Avg. loss: 0.0006947735548019409, AUC: 0.499513


Test set: Avg. loss: 0.0006955170035362244, AUC: 0.501002


Test set: Avg. loss: 0.0006960488855838776, AUC: 0.502988


Test set: Avg. loss: 0.0016392868161201478, AUC: 0.362875


Test set: Avg. loss: 0.0006916555464267731, AUC: 0.5249240000000001


Test set: Avg. loss: 0.0006274392902851104, AUC: 0.8069995


Test set: Avg. loss: 0.0005601176917552948, AUC: 0.8731455


Test set: Avg. loss: 0.0008223848342895508, AUC: 0.448507


Test set: Avg. loss: 0.0005955914556980133, AUC: 0.865737


Test set: Avg. loss: 0.0006267938613891601, AUC: 0.8331379999999999


Test set: Avg. loss: 0.0006058297157287598, AUC: 0.8535895000000001


In [11]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [None]:
# capped loss with everything capped + cosine distance 
momentum=0
learning_rates = [5e-4]

    
loss_fn_args = {}
loss_fn_args['loss_cap'] = None

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNetWithEmbeddings(2)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network, embeddings=True) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_sigmoid_cosine_distance(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.AllCappedBCELoss, loss_fn_args=loss_fn_args)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network, embeddings=True)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)


for i in range(len(learning_rates)): 
    row = ["cosine_distance_all_capped", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)

In [None]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [26]:
# 2 class triplet loss no ratio 
# no smote 

# note: sometimes can get a very high accuracy but may diverge

momentum=0
learning_rates = [(1e-7, 1e-5)]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(3): 
        model_aucs = []
        embed_network = models.ConvNetOnlyEmbeddings(2)
        linear_probe = models.ConvNetLinearProbe(2)
        complete_network = models.CompleteConvNet(embed_network, linear_probe)
        embed_optimizer = optim.SGD(embed_network.parameters(), lr=learning_rate[0], momentum=momentum)
        linear_optimizer = optim.SGD(complete_network.parameters(), lr=learning_rate[1], momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, complete_network) 
        model_aucs.append(auc)
        for epoch in range(15):
            _, train_losses = train.train_triplet_loss(epoch, train_loader_tripletloss, embed_network, embed_optimizer, verbose=False)
            print("Train loss: " + str(np.mean(np.array(train_losses))))
        for epoch in range(50):
            _, _ = train.train_linear_probe(epoch, train_loader_reduced, complete_network, linear_optimizer, verbose=False)
            _, auc = metric_utils.auc_sigmoid(test_loader_reduced, complete_network)
            if (epoch + 1) % 10 == 0: 
                model_aucs.append(auc) 
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)


learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["triplet_loss", 2, nums, (1, 1), learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3], 
           auc_mean[i][4], auc_variance[i][4],
           auc_mean[i][5], auc_variance[i][5],
           None]
    rows.append(row)

AttributeError: 'tuple' object has no attribute 'squeeze'

In [32]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [27]:
# triplet loss with ratio 
# need to make a new train loader if running this 

momentum=0
learning_rates = [(1e-7, 1e-3)]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(2):
        model_aucs = []
        embed_network = models.ConvNetOnlyEmbeddings(2)
        linear_probe = models.ConvNetLinearProbe(2)
        complete_network = models.CompleteConvNet(embed_network, linear_probe)
        embed_optimizer = optim.SGD(embed_network.parameters(), lr=learning_rate[0], momentum=momentum)
        linear_optimizer = optim.SGD(linear_probe.parameters(), lr=learning_rate[1], momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, complete_network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, train_losses = train.train_triplet_loss(epoch, train_loader_tripletloss, complete_network.embed_network, embed_optimizer, verbose=False)
            print("Train loss: " + str(np.mean(np.array(train_losses))))
            
    #    _, auc = metric_utils.auc_sigmoid(test_loader_reduced, complete_network)
        for epoch in range(50):
            _, _ = train.train_linear_probe(epoch, train_loader_reduced, complete_network.embed_network, complete_network.linear_probe, linear_optimizer, verbose=False)
            _, auc = metric_utils.auc_sigmoid(test_loader_reduced, complete_network)
            if (epoch + 1) % 10 == 0: 
                model_aucs.append(auc) 
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)
learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["triplet_loss", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3], 
           auc_mean[i][4], auc_variance[i][4],
           auc_mean[i][5], auc_variance[i][5],
           None]
    rows.append(row)

AttributeError: 'tuple' object has no attribute 'squeeze'

In [None]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [30]:
# triplet loss with SMOTE and cosine distance capped loss 

momentum=0
learning_rates = [(1e-7, 1e-4)]

loss_fn_args = {}
loss_fn_args['loss_cap'] = None

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(3): 
        model_aucs = []
        embed_network = models.ConvNetOnlyEmbeddings(2)
        linear_probe = models.ConvNetLinearProbe(2)
        complete_network = models.CompleteConvNet(embed_network, linear_probe)
        embed_optimizer = optim.SGD(embed_network.parameters(), lr=learning_rate[0], momentum=momentum)
        linear_optimizer = optim.SGD(complete_network.parameters(), lr=learning_rate[1], momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, complete_network, embeddings=True) 
        model_aucs.append(auc)
        for epoch in range(15):
            _, train_losses = train.train_triplet_loss(epoch, train_loader_tripletloss_smote, embed_network, embed_optimizer, verbose=False)
            print("Train triplet loss: " + str(np.mean(np.array(train_losses))))
        for epoch in range(50):
            _, _ = train.train_sigmoid_cosine_distance(epoch, train_loader_smote, complete_network, linear_optimizer, verbose=False, loss_fn=loss_fns.CappedBCELoss)
            _, auc = metric_utils.auc_sigmoid(test_loader_reduced, complete_network, embeddings=True)
            if (epoch + 1) % 10 == 0: 
                model_aucs.append(auc) 
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["cosine_distance_capped_smote_triplet_loss", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3], 
           auc_mean[i][4], auc_variance[i][4],
           auc_mean[i][5], auc_variance[i][5],
           5.0]
    rows.append(row)


Test set: Avg. loss: 0.002908367872238159, AUC: 0.337541

Train triplet loss: 224.1131993069011
Train triplet loss: 14.503459689343812
Train triplet loss: 2.3045721103431314
Train triplet loss: 0.9209006834941306
Train triplet loss: 1.294924501021197
Train triplet loss: 0.7550809907305772
Train triplet loss: 0.9270113535747406
Train triplet loss: 0.7487862869432778
Train triplet loss: 0.2525857892006066
Train triplet loss: 0.45942818321240175
Train triplet loss: 0.1847958750785536
Train triplet loss: 0.10644819022743565
Train triplet loss: 0.2511055499884733
Train triplet loss: 0.08564935216478481
Train triplet loss: 0.05430815022462492

Test set: Avg. loss: 0.0006781926453113556, AUC: 0.6198600000000001


Test set: Avg. loss: 0.000667210340499878, AUC: 0.6874805


Test set: Avg. loss: 0.0006613142788410187, AUC: 0.727465


Test set: Avg. loss: 0.0006528576910495758, AUC: 0.7565990000000002


Test set: Avg. loss: 0.0006457682251930236, AUC: 0.7813000000000001


Test set: Avg. loss: 0.

Train triplet loss: 0.1939485741269057
Train triplet loss: 0.3785654617722627
Train triplet loss: 0.00914804229311123
Train triplet loss: 0.10145357042361217
Train triplet loss: 0.17515371901214502
Train triplet loss: 0.3923213834975176
Train triplet loss: 0.014378716231911046
Train triplet loss: 0.0
Train triplet loss: 0.0

Test set: Avg. loss: 0.0006661825478076935, AUC: 0.681546


Test set: Avg. loss: 0.0006567904353141785, AUC: 0.7220775000000001


Test set: Avg. loss: 0.0006522080004215241, AUC: 0.728313


Test set: Avg. loss: 0.0006458030641078949, AUC: 0.7563310000000001


Test set: Avg. loss: 0.0006477517187595367, AUC: 0.7530370000000001


Test set: Avg. loss: 0.000641948789358139, AUC: 0.7699455000000002


Test set: Avg. loss: 0.0006373808979988099, AUC: 0.783147


Test set: Avg. loss: 0.0006401232182979584, AUC: 0.7701199999999999


Test set: Avg. loss: 0.0006400469243526459, AUC: 0.78073


Test set: Avg. loss: 0.0006371627151966095, AUC: 0.7924995


Test set: Avg. loss: 0.0

In [31]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [None]:
# capped smote using triplet loss

momentum=0
learning_rates = [1e-3, 1e-4, 1e-5]

loss_fn_args = {}
loss_fn_args['loss_cap'] = None

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(5): 
        model_aucs = []
        network = models.ConvNetWithEmbeddings(2)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network, embeddings=True) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_triplet_capped_loss(epoch, train_loader_tripletloss_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedBCELoss, loss_fn_args=loss_fn_args)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_sigmoid(test_loader_reduced, network, embeddings=True)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["triplet_loss_capped_smote", 2, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)


Test set: Avg. loss: 0.003393930196762085, AUC: 0.500865



In [None]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [39]:
NUM_CLASSES_REDUCED = 3
nums = (0, 3, 1)
ratio = (20, 2, 1)


reduced_train_CIFAR10 = class_sampling.Reduce(train_CIFAR10, NUM_CLASSES_REDUCED, nums=nums, CIFAR=True)
reduced_test_CIFAR10 = class_sampling.Reduce(test_CIFAR10, NUM_CLASSES_REDUCED, nums=nums, CIFAR=True)

ratio_train_CIFAR10 = class_sampling.Ratio(train_CIFAR10, NUM_CLASSES_REDUCED, ratio, nums=nums)
targets = ratio_train_CIFAR10.labels 
class_count = np.unique(targets, return_counts=True)[1]

smote_train_CIFAR10 = class_sampling.Smote(ratio_train_CIFAR10, 5000 * NUM_CLASSES_REDUCED)


weight = 1. / class_count
samples_weight = weight[targets]
samples_weight = torch.from_numpy(samples_weight)
oversampler = torch.utils.data.WeightedRandomSampler(samples_weight, int(max(class_count) * NUM_CLASSES_REDUCED), replacement=True)
sampler = torch.utils.data.WeightedRandomSampler(samples_weight, len(samples_weight), replacement=True)
undersampler = torch.utils.data.WeightedRandomSampler(samples_weight, int(min(class_count) * NUM_CLASSES_REDUCED), replacement=False)

weight *= max(class_count)

train_loader_reduced = DataLoader(reduced_train_CIFAR10, batch_size=batch_size_train, shuffle=True)  

train_loader_ratio = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, shuffle=True) 

train_loader_oversampled = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, sampler=oversampler)

train_loader_undersampled = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, sampler=undersampler)

train_loader_sampled = DataLoader(ratio_train_CIFAR10, batch_size=batch_size_train, sampler=sampler)

train_loader_smote = DataLoader(smote_train_CIFAR10, batch_size=batch_size_train, shuffle=True)

test_loader_reduced = DataLoader(reduced_test_CIFAR10, batch_size=batch_size_test, shuffle=True)

In [8]:
# 3 class normal

learning_rates = [1e-4, 1e-3]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_reduced, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["normal", 3, nums, (1, 1, 1), learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)


Test set: Avg. loss: 0.004159178098042806, AUC: 0.5



KeyboardInterrupt: 

In [11]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [15]:
#  3 class ratio

learning_rates = [1e-3]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_ratio, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["ratio", 3, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)


Test set: Avg. loss: 0.0028148902257283527, AUC: 0.5499715000000001


Test set: Avg. loss: 0.0012899534304936728, AUC: 0.5


Test set: Avg. loss: 0.0011812520821889241, AUC: 0.5


Test set: Avg. loss: 0.0011003198623657227, AUC: 0.5


Test set: Avg. loss: 0.0012160149812698365, AUC: 0.496695


Test set: Avg. loss: 0.0012602541049321493, AUC: 0.5


Test set: Avg. loss: 0.0011723772684733072, AUC: 0.5


Test set: Avg. loss: 0.0012640552123387655, AUC: 0.5


Test set: Avg. loss: 0.001084180474281311, AUC: 0.47396475


Test set: Avg. loss: 0.0012727203766504925, AUC: 0.4995


Test set: Avg. loss: 0.0010816088120142618, AUC: 0.5045000000000001


Test set: Avg. loss: 0.0011214701334635417, AUC: 0.5035


Test set: Avg. loss: 0.0026806603272755943, AUC: 0.49925


Test set: Avg. loss: 0.0012301311095555623, AUC: 0.5


Test set: Avg. loss: 0.001151394208272298, AUC: 0.5


Test set: Avg. loss: 0.00107947838306427, AUC: 0.5


Test set: Avg. loss: 0.0020958302021026613, AUC: 0.44825000000000004




In [16]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [17]:
# 3 class oversampled 

learning_rates = [1e-3]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_oversampled, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["oversampled", 3, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)



Test set: Avg. loss: 0.003038533369700114, AUC: 0.5


Test set: Avg. loss: 0.0010433460474014282, AUC: 0.574998


Test set: Avg. loss: 0.0011814462741216024, AUC: 0.7483075


Test set: Avg. loss: 0.001038165827592214, AUC: 0.7227574999999999


Test set: Avg. loss: 0.007914267698923746, AUC: 0.49925


Test set: Avg. loss: 0.0010645556449890136, AUC: 0.37466975


Test set: Avg. loss: 0.001031708836555481, AUC: 0.6914997500000001


Test set: Avg. loss: 0.0011311002572377523, AUC: 0.75428325


Test set: Avg. loss: 0.006719241301218669, AUC: 0.5075000000000001


Test set: Avg. loss: 0.0010695095856984456, AUC: 0.5922592499999999


Test set: Avg. loss: 0.0010078495343526204, AUC: 0.66097125


Test set: Avg. loss: 0.0009656443595886231, AUC: 0.68574


Test set: Avg. loss: 0.002060540755589803, AUC: 0.5615957500000001


Test set: Avg. loss: 0.0010401540199915568, AUC: 0.3627755


Test set: Avg. loss: 0.0009678711692492167, AUC: 0.6725415


Test set: Avg. loss: 0.0009192776083946228, AUC: 0.67

In [18]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)
rows = [] 

In [19]:
# 3 class undersampled  

learning_rates = [1e-3]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_undersampled, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["undersampled", 3, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)



Test set: Avg. loss: 0.0020289310614267984, AUC: 0.4756965


Test set: Avg. loss: 0.001091553012530009, AUC: 0.48456375


Test set: Avg. loss: 0.0010886570612589517, AUC: 0.4860155


Test set: Avg. loss: 0.0010853137572606406, AUC: 0.5097382500000001


Test set: Avg. loss: 0.0019787512222925823, AUC: 0.57222475


Test set: Avg. loss: 0.0011532610654830932, AUC: 0.49576575000000006


Test set: Avg. loss: 0.001145702044169108, AUC: 0.4937625


Test set: Avg. loss: 0.001140582799911499, AUC: 0.49350675


Test set: Avg. loss: 0.005084774017333984, AUC: 0.5


Test set: Avg. loss: 0.0010802105665206909, AUC: 0.50175


Test set: Avg. loss: 0.0010755322376887005, AUC: 0.502


Test set: Avg. loss: 0.0010662730137507121, AUC: 0.5022505


Test set: Avg. loss: 0.009249690055847169, AUC: 0.49975


Test set: Avg. loss: 0.0010924884875615438, AUC: 0.50599775


Test set: Avg. loss: 0.0010882760683695474, AUC: 0.5044992500000001


Test set: Avg. loss: 0.0010864359935124715, AUC: 0.50449775


Test set:

In [20]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = [] 

In [21]:
#  3 class weighted

learning_rates = [1e-3]

learning_rate_aucs = []

loss_fn_args={}
loss_fn_args['weight'] = torch.from_numpy(weight).float()


for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_ratio, network, optimizer, verbose=False, loss_fn_args=loss_fn_args)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["weighted", 3, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)



Test set: Avg. loss: 0.00608756939570109, AUC: 0.501


Test set: Avg. loss: 0.0010771948893864949, AUC: 0.5007499999999999


Test set: Avg. loss: 0.001078349749247233, AUC: 0.49949999999999994


Test set: Avg. loss: 0.0010784133275349936, AUC: 0.50025


Test set: Avg. loss: 0.00372534704208374, AUC: 0.50023525


Test set: Avg. loss: 0.0010370986064275106, AUC: 0.49925


Test set: Avg. loss: 0.0010473136504491171, AUC: 0.49649774999999996


Test set: Avg. loss: 0.0010565840800603231, AUC: 0.5224875


Test set: Avg. loss: 0.0020539817810058592, AUC: 0.50625


Test set: Avg. loss: 0.0010878965854644776, AUC: 0.50025


Test set: Avg. loss: 0.001087069312731425, AUC: 0.49999999999999994


Test set: Avg. loss: 0.0010845698912938435, AUC: 0.50025


Test set: Avg. loss: 0.006659661134084066, AUC: 0.49401975000000004


Test set: Avg. loss: 0.0010928993225097657, AUC: 0.53216375


Test set: Avg. loss: 0.0010782272815704345, AUC: 0.54923675


Test set: Avg. loss: 0.0010686718225479126, AUC: 0.55

In [22]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = [] 

In [23]:
#  3 class focal loss

learning_rates = [1e-3]

learning_rate_aucs = []

loss_fn_args={}
loss_fn_args['reduction'] = 'mean'


for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_ratio, network, optimizer, verbose=False, loss_fn=loss_fns.SoftmaxFocalLoss, loss_fn_args=loss_fn_args)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["focal_loss", 3, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)



Test set: Avg. loss: 0.0011305590867996215, AUC: 0.4987425


Test set: Avg. loss: 0.001101138710975647, AUC: 0.5


Test set: Avg. loss: 0.0011463310718536376, AUC: 0.5


Test set: Avg. loss: 0.0011780770619710286, AUC: 0.5


Test set: Avg. loss: 0.0038984591166178386, AUC: 0.5


Test set: Avg. loss: 0.0011603130102157593, AUC: 0.5


Test set: Avg. loss: 0.0011915287176767985, AUC: 0.5


Test set: Avg. loss: 0.0012100194692611695, AUC: 0.5


Test set: Avg. loss: 0.006762343406677246, AUC: 0.50025


Test set: Avg. loss: 0.001145007332166036, AUC: 0.5


Test set: Avg. loss: 0.0011761978069941203, AUC: 0.5


Test set: Avg. loss: 0.0011957573493321736, AUC: 0.5


Test set: Avg. loss: 0.0017103184858957927, AUC: 0.4808077500000001


Test set: Avg. loss: 0.0011710822582244873, AUC: 0.50025


Test set: Avg. loss: 0.0012000585397084554, AUC: 0.50025


Test set: Avg. loss: 0.0012189826170603433, AUC: 0.50025


Test set: Avg. loss: 0.003416938861211141, AUC: 0.5


Test set: Avg. loss: 0.00113592

In [24]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [34]:
#  3 class SMOTE

learning_rates = [1e-3]

learning_rate_aucs = []

for learning_rate in learning_rates:
    aucs = []
    for i in range(10):
        model_aucs = []
        network = models.ConvNet(NUM_CLASSES_REDUCED)
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
        _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
        model_aucs.append(auc)
        for epoch in range(n_epochs):
            _, _ = train.train_softmax(epoch, train_loader_smote, network, optimizer, verbose=False)
            if (epoch + 1) % 10 == 0: 
                _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                model_aucs.append(auc)
        aucs.append(model_aucs)
    learning_rate_aucs.append(aucs)

learning_rate_aucs = np.asarray(learning_rate_aucs)

auc_mean = np.mean(learning_rate_aucs, axis=1)
auc_variance = np.var(learning_rate_aucs, axis=1)

for i in range(len(learning_rates)): 
    row = ["smote", 3, nums, ratio, learning_rates[i],
            auc_mean[i][0], auc_variance[i][0], 
            auc_mean[i][1], auc_variance[i][1],
            auc_mean[i][2], auc_variance[i][2],
            auc_mean[i][3], auc_variance[i][3]]
    rows.append(row)



Test set: Avg. loss: 0.004159178098042806, AUC: 0.5


Test set: Avg. loss: 0.0010713002681732179, AUC: 0.49825


Test set: Avg. loss: 0.0010870064496994018, AUC: 0.49875


Test set: Avg. loss: 0.0010937161048253376, AUC: 0.4980015


Test set: Avg. loss: 0.001217594623565674, AUC: 0.5495857500000001


Test set: Avg. loss: 0.0010883294343948364, AUC: 0.54002475


Test set: Avg. loss: 0.0009588491717974345, AUC: 0.6908785


Test set: Avg. loss: 0.0010892333984375, AUC: 0.6341857500000001


Test set: Avg. loss: 0.0012967588504155477, AUC: 0.44993225000000003


Test set: Avg. loss: 0.0010879942576090494, AUC: 0.49974900000000005


Test set: Avg. loss: 0.0010939101775487264, AUC: 0.4984995


Test set: Avg. loss: 0.0010963983138402302, AUC: 0.4980035


Test set: Avg. loss: 0.009516664822896322, AUC: 0.4979995


Test set: Avg. loss: 0.0010874385436375935, AUC: 0.502996


Test set: Avg. loss: 0.0010935383637746174, AUC: 0.50224575


Test set: Avg. loss: 0.0010963813463846842, AUC: 0.50149875



In [35]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names[0:13]) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []

In [None]:
# 3 class capped loss 

momentum=0
learning_rates = [1e-2]


cap_aucs = []

caps = [1, 5, 10]

for cap in caps:
    
    loss_fn_args = {}
    loss_fn_args['loss_cap'] = cap
    
    learning_rate_aucs = []
    

    for learning_rate in learning_rates:
        aucs = []
        for i in range(10):
            model_aucs = []
            network = models.ConvNet(NUM_CLASSES_REDUCED)
            optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
            _, auc = metric_utils.auc_softmax(test_loader_reduced, network) 
            model_aucs.append(auc)
            for epoch in range(n_epochs):
                _, _ = train.train_softmax(epoch, train_loader_smote, network, optimizer, verbose=False, loss_fn=loss_fns.CappedCELoss, loss_fn_args=loss_fn_args, smote=True)
                if (epoch + 1) % 10 == 0: 
                    _, auc = metric_utils.auc_softmax(test_loader_reduced, network)
                    model_aucs.append(auc)
            aucs.append(model_aucs)
        learning_rate_aucs.append(aucs)

    learning_rate_aucs = np.asarray(learning_rate_aucs)

    auc_mean = np.mean(learning_rate_aucs, axis=1)
    auc_variance = np.var(learning_rate_aucs, axis=1)
    
    
    cap_aucs.append([auc_mean, auc_variance])

    
    
for i in range(len(cap_aucs)):
    auc_mean = cap_aucs[i][0]
    auc_variance = cap_aucs[i][1]
    cap = caps[i]
    for i in range(len(learning_rates)): 
        row = ["capped_smote", 3, nums, ratio, learning_rates[i],
                auc_mean[i][0], auc_variance[i][0], 
                auc_mean[i][1], auc_variance[i][1],
                auc_mean[i][2], auc_variance[i][2],
                auc_mean[i][3], auc_variance[i][3], cap]
        rows.append(row)

print(rows)



Test set: Avg. loss: 0.005159725666046142, AUC: 0.47875


Test set: Avg. loss: 0.0013107466697692871, AUC: 0.5


Test set: Avg. loss: 0.0014245965083440144, AUC: 0.5


Test set: Avg. loss: 0.0013205681641896565, AUC: 0.5


Test set: Avg. loss: 0.0036626233259836835, AUC: 0.5045


Test set: Avg. loss: 0.0012807045380274454, AUC: 0.55175


Test set: Avg. loss: 0.0008384430607159932, AUC: 0.6527735


Test set: Avg. loss: 0.0013193607727686565, AUC: 0.58771125


Test set: Avg. loss: 0.001882831374804179, AUC: 0.568451


Test set: Avg. loss: 0.0014629533290863037, AUC: 0.5


Test set: Avg. loss: 0.0013556772470474244, AUC: 0.5


Test set: Avg. loss: 0.0012844537496566773, AUC: 0.5


Test set: Avg. loss: 0.005783709685007731, AUC: 0.5153915


Test set: Avg. loss: 0.0014496474663416544, AUC: 0.68025


Test set: Avg. loss: 0.0013263502518335978, AUC: 0.72725


Test set: Avg. loss: 0.0005901109576225281, AUC: 0.6950000000000001


Test set: Avg. loss: 0.015779575983683267, AUC: 0.47575


Test s

In [None]:
df1 = pd.read_csv('results/convnet_aucs.csv')

df2 = pd.DataFrame(rows, columns = col_names) 

df = pd.concat([df1, df2])

df.to_csv('results/convnet_aucs.csv', index=False)

rows = []