In [25]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import time
import os
import argparse
import utils
import torchvision
from torchvision import datasets, models, transforms
from glob import glob
# import apex.amp as amp
import torch.backends.cudnn as cudnn
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from PIL import Image
from torch.utils.data import TensorDataset, DataLoader, Dataset

import pandas as pd
from typing import Any, Optional, Tuple
import multiprocessing
import tensorflow as tf
from tqdm import tqdm_notebook as tqdm
from random import random

In [67]:
MIN_SAMPLES_PER_CLASS = 25

# 36, 82, 34, 74
SELECT_CATEGORIES = [36, 74, 82]
IMAGES_DIR = "/home/daniel/projects/transfer_learning/sample_files"
NUMBER_OF_TRAIN_IMAGES_PER_CLASS =25
ORIGINAL_IMAGE_SIZE = 299
IMAGE_SIZE = 299
BATCH_SIZE = 256
NUM_WORKERS = int(multiprocessing.cpu_count() / 2)
BATCH_SIZE = 25
RESNET_SIZE = 18
PREDICT_ONLY = False
LEARNING_RATE = 1e-3
LR_STEP = 3
LR_FACTOR = 0.5
USE_PARALLEL = False
NUM_EPOCHS = 40
LOG_FREQ = 500
NUM_TOP_PREDICTS = 1
MAX_STEPS_PER_EPOCH = 2 ** 32
PREDICT_ONLY = False
TIME_LIMIT = 500 * 60 * 60
IN_KERNEL = False

In [68]:
def has_time_run_out() -> bool:
    return time.time() - global_start_time > TIME_LIMIT - 500


class AverageMeter:
    ''' Computes and stores the average and current value '''
    def __init__(self) -> None:
        self.reset()

    def reset(self) -> None:
        self.val = 0.0
        self.avg = 0.0
        self.sum = 0.0
        self.count = 0

    def update(self, val: float, n: int = 1) -> None:
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

        
class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe: pd.DataFrame, mode: str) -> None:
        print(f'creating data loader - {mode}')
        assert mode in ['train', 'val', 'test']

        self.df = dataframe
        self.mode = mode

        if self.mode == "test":
            transforms_list = [
                transforms.CenterCrop(ORIGINAL_IMAGE_SIZE),
                transforms.Resize(IMAGE_SIZE)
            ]
        elif self.mode == "train":
            transforms_list = [
                transforms.CenterCrop(ORIGINAL_IMAGE_SIZE),
                transforms.Resize(IMAGE_SIZE)
                
#                 transforms.Resize(ORIGINAL_IMAGE_SIZE)
            ]
        else:
            transforms_list = [
                transforms.CenterCrop(ORIGINAL_IMAGE_SIZE),
                transforms.Resize(IMAGE_SIZE)
            ]

        if self.mode == 'train':
            transforms_list.extend([
                transforms.RandomHorizontalFlip(),
                transforms.RandomResizedCrop(IMAGE_SIZE),
                transforms.RandomChoice([
                    transforms.RandomChoice([
                        transforms.RandomResizedCrop(IMAGE_SIZE),
                        transforms.ColorJitter(0.2, 0.2, 0.2, 0.2),
                        transforms.RandomAffine(degrees=15, translate=(0.2, 0.2),
                                                scale=(0.8, 1.2), shear=15,
                                                resample=Image.BILINEAR)
                    ]),
                    transforms.RandomChoice([
                        transforms.Grayscale(num_output_channels=3),
                        transforms.RandomRotation(degrees=90),
                        transforms.ColorJitter(0.2, 0.2, 0.2, 0.2),
                        transforms.RandomAffine(degrees=15, translate=(0.2, 0.2),
                                                scale=(0.8, 1.2), shear=15,
                                                resample=Image.BILINEAR)
                    ])
                ])
            ])


        transforms_list.extend([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225]),
        ])
        self.transforms = transforms.Compose(transforms_list)

    def __getitem__(self, index: int) -> Any:
        ''' Returns: tuple (sample, target) '''
#         filename = self.df.image_id.values[index]
        filename = "COCO_train2014_{}.jpg".format(str(self.df.image_id.values[index]).zfill(12))

        part = 1 if self.mode == 'test' or filename[0] in '01234567' else 2
#         directory = 'test' if self.mode == 'test' else 'train_' + filename[0]
        # sample = Image.open(f'../input/google-landmarks-2019-64x64-part{part}/{directory}/{self.mode}_64/{filename}.jpg')
        if self.mode == "train":
            sample = Image.open(f'{IMAGES_DIR}/{filename}')
        elif self.mode == "val":
            sample = Image.open(f'{IMAGES_DIR}/{filename}')
        else:
            sample = Image.open(f'{IMAGES_DIR}/{filename}')
        assert sample.mode == 'RGB'

        image = self.transforms(sample)

        if self.mode == 'test':
            return image
        else:
            return image, self.df["category_id"].values[index]

    def __len__(self) -> int:
        return self.df.shape[0]

In [69]:
def load_data(checkpoint: any = None) -> 'Tuple[DataLoader[np.ndarray], DataLoader[np.ndarray], LabelEncoder, int]':
    label_column = "category_id"
    torch.multiprocessing.set_sharing_strategy('file_system')
    cudnn.benchmark = True

    # only use classes which have at least MIN_SAMPLES_PER_CLASS samples
    print('loading data...')
    df = pd.read_csv("sample_images_labels.txt")
    image_files = ["COCO_train2014_{}.jpg".format(str(x).zfill(12)) for x in df["image_id"].tolist()]

    counts = df[label_column].value_counts()
    selected_classes = counts[counts >= MIN_SAMPLES_PER_CLASS].index
    num_classes = selected_classes.shape[0]
    print('classes with at least N samples:', num_classes)
    train_df = df.loc[df["category_id"].isin(selected_classes)].copy()
    print('train_df', train_df.shape)
    train_exists = lambda img: os.path.exists(f'{IMAGES_DIR}/COCO_train2014_{str(img).zfill(12)}.jpg')
    train_df = train_df.loc[train_df["image_id"].apply(train_exists)].copy()
    print('train_df after filtering', train_df.shape)
    train_df = train_df.loc[df[label_column].isin(SELECT_CATEGORIES)].copy()
    print("Train shape after filtering classes: ", train_df.shape)
    new_counts = train_df[label_column].value_counts()
    num_classes = new_counts.shape[0]


    if checkpoint != None:
        print("Loading label encoder from checkpoint...")
        label_encoder = checkpoint["label_encoder"]
    else:
        label_encoder = LabelEncoder()
        label_encoder.fit(train_df.category_id.values)

    y = train_df.pop(label_column)
    x = train_df

    train_size = NUMBER_OF_TRAIN_IMAGES_PER_CLASS * y.nunique()
    train_x, val_x, train_y, val_y = train_test_split(x, y, train_size=train_size, random_state=42, stratify=y)
    train_x[label_column] = train_y
    val_x[label_column] = val_y

    train_df = train_x
    val_df = val_x
    
    y = val_df.pop(label_column)
    x = val_df
    val_x, test_x, val_y, test_y = train_test_split(x, y, test_size=0.5, random_state=42, stratify=y)
    val_x[label_column] = val_y
    test_x[label_column] = test_y

    val_df = val_x
    test_df = test_x

    print(f"Train length: {len(train_df)} Val length: {len(val_df)} Test length: {len(test_df)}")

#     test_df = pd.read_csv(csv_dir + 'test2.csv', dtype=str)
#     print('test_df', test_df.shape)

#     # filter non-existing test images
#     exists = lambda img: os.path.exists(f'/hdd/kaggle/landmarks/test_images2/{img}.jpg')

#     test_df = test_df.loc[test_df.id.apply(exists)].copy()
#     print('test_df after filtering', test_df.shape)
#     assert test_df.shape[0] > 112000
#     # assert test_df.shape[0] > 117703
    if PREDICT_ONLY:
        num_classes = len(label_encoder.classes_)
    print('found classes', len(label_encoder.classes_))
    assert len(label_encoder.classes_) == num_classes

    train_df[label_column] = label_encoder.transform(train_df[label_column])
    val_df[label_column] = label_encoder.transform(val_df[label_column])
    test_df[label_column] = label_encoder.transform(test_df[label_column])

    train_dataset = ImageDataset(train_df, mode='train')
    val_dataset = ImageDataset(val_df, mode='val')
    test_dataset = ImageDataset(test_df, mode='test')
    
    dataset_sizes = [len(train_dataset), len(test_dataset), len(val_dataset)]

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                              shuffle=False, num_workers=NUM_WORKERS, drop_last=False)

    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE,
                              shuffle=False, num_workers=NUM_WORKERS, drop_last=False)

    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE,
                             shuffle=False, num_workers=NUM_WORKERS, drop_last=False)

    return train_loader, val_loader, test_loader, label_encoder, num_classes, dataset_sizes

In [70]:
def accuracy(predicts, targets, confs):
    assert len(predicts.shape) == 1
    assert len(confs.shape) == 1
    assert len(targets.shape) == 1
    assert predicts.shape == confs.shape and confs.shape == targets.shape

    _, indices = torch.sort(confs, descending=True)
    confs = confs.cpu().numpy()
    predicts = predicts[indices].cpu().numpy()
    targets = targets[indices].cpu().numpy()

    num_correct = 0
    for i, (c, p, t) in enumerate(zip(confs, predicts, targets)):
        if p == t:
            num_correct += 1

    return num_correct / len(predicts)
            
def train(train_loader: Any, model: Any, criterion: Any, optimizer: Any,
          epoch: int, lr_scheduler: Any, tensorboard: Any, label_encoder:Any) -> None:
    print(f'epoch {epoch}')
    batch_time = AverageMeter()
    losses = AverageMeter()
    avg_score = AverageMeter()

    model.train()
    num_steps = min(len(train_loader), MAX_STEPS_PER_EPOCH)

    print(f'total batches: {num_steps}')

    end = time.time()
    lr_str = ''

    global_step = (epoch - 1) * len(train_loader)

    running_corrects = 0
    for i, (input_, target) in enumerate(train_loader):
        global_step += 1
        if i >= num_steps:
            break

        output = model(input_.cuda())
        loss = criterion(output, target.cuda())

        confs, predicts = torch.max(output.detach(), dim=1)
        avg_score.update(accuracy(predicts, target, confs))

        losses.update(loss.data.item(), input_.size(0))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

        if i % LOG_FREQ == 0:
            tensorboard.log_scalar("train_step_loss", losses.val, global_step)
            tensorboard.log_scalar("train_step_accuracy", avg_score.val, global_step)

            print(f'{epoch} [{i}/{num_steps}]\t'
                        f'time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                        f'loss {losses.val:.4f} ({losses.avg:.4f})\t'
                        f'Accuracy {avg_score.val:.4f} ({avg_score.avg:.4f})'
                        + lr_str)

        if has_time_run_out():
            break

    avg_epoch_loss = losses.avg
    avg_accuracy = avg_score.avg
#     avg_epoch_gap = avg_score.avg

#     tensorboard.log_scalar("train_epoch_loss", avg_epoch_loss, epoch)
#     tensorboard.log_scalar("train_epoch_gap", avg_epoch_gap, epoch)

#     torch.save({
#         'epoch': epoch,
#         'classifier': model.fc,
#         'model_state_dict': model.state_dict(),
#         'optimizer': optimizer,
#         'optimizer_state_dict': optimizer.state_dict(),
#         'loss': losses.avg,
#         'gap': avg_score.avg,
#         'global_step': global_step,
#         'label_encoder': label_encoder,
#         'resnet_size': RESNET_SIZE,
#         'image_size': IMAGE_SIZE
#     }, CHECKPOINT_PATH + "checkpoints_{}".format(epoch))

#     print(f' * average GAP on train {avg_score.avg:.4f}')
    
def inference(data_loader: Any, model: Any) -> Tuple[torch.Tensor, torch.Tensor,
                                                     Optional[torch.Tensor]]:
    ''' Returns predictions and targets, if any. '''
    model.eval()

    activation = nn.Softmax(dim=1)
    all_predicts, all_confs, all_targets, all_predicts_gap, all_confs_gap = [], [], [], [], []

    print("Data loader length", len(data_loader))
    with torch.no_grad():
        for i, data in enumerate(tqdm(data_loader, disable=IN_KERNEL)):
            if data_loader.dataset.mode != 'test':
                input_, target = data
            else:
                input_, target = data, None

            output = model(input_.cuda())
            output = activation(output)

            confs, predicts = torch.topk(output, NUM_TOP_PREDICTS)
            confs_gap, predicts_gap = torch.max(output.detach(), 1)
            all_confs.append(confs)
            all_predicts.append(predicts)
            all_predicts_gap.append(predicts_gap)
            all_confs_gap.append(confs_gap)

            if target is not None:
                all_targets.append(target)

    predicts = torch.cat(all_predicts)
    confs = torch.cat(all_confs)
    targets = torch.cat(all_targets) if len(all_targets) else None
    predicts_gap = torch.cat(all_predicts_gap)
    confs_gap = torch.cat(all_confs_gap)

    return predicts, confs, targets, predicts_gap, confs_gap

def eval(val_loader: Any, train_loader: Any, model: Any, tensorboard: Any, epoch: int) -> np.ndarray:
    predicts_gpu, confs_gpu, targets_gpu, predicts_gap_gpu, confs_gap_gpu = inference(val_loader, model)
    val_gap = accuracy(predicts_gap_gpu, targets_gpu, confs_gap_gpu)
    num_correct = torch.sum(predicts_gap_gpu.cpu() == targets_gpu.cpu())
    predicts, confs, targets = predicts_gpu.cpu().numpy(), confs_gpu.cpu().numpy(), targets_gpu.cpu().numpy()


    labels = [label_encoder.inverse_transform(pred) for pred in predicts]

    assert len(labels) == len(val_loader.dataset.df)

    print(f"Val Accuracy: {val_gap}, Num correct: {num_correct}")

    tensorboard.log_scalar("val_num_correct", num_correct, epoch)
    tensorboard.log_scalar("val_gap", val_gap, epoch)

    val_df = val_loader.dataset.df
    train_df = train_loader.dataset.df
    rand_idx = int(random() * len(val_df))

    sample_row = val_df.iloc[rand_idx]
    sample_target = sample_row["category_id"]
    sample_prediction = int(predicts[rand_idx])

#     sample_predict_image_name = train_df[train_df["category_id"] == sample_prediction]["image_id"].tolist()[0]
#     sample_predict_image_path = f"{IMAGES_DIR}/{sample_predict_image_name}.jpg"
#     sample_correct_label_image_path = f"{IMAGES_DIR}/{sample_row['image_id'].zfill()}.jpg"

    # images = [image_append_text(sample_predict_image_path,sample_prediction), image_append_text(sample_correct_label_image_path,sample_target)]

#     widths, heights = zip(*(i.size for i in images))

#     total_width = sum(widths)
#     max_height = max(heights)

#     new_im = Image.new('RGB', (total_width, max_height))

#     x_offset = 0
#     for im in images:
#         new_im.paste(im, (x_offset,0))
#         x_offset += im.size[0]

#     tensorboard.log_image("predicted_and_target_image", np.asarray(new_im), epoch)

class Tensorboard:
    def __init__(self, logdir):
        self.writer = tf.summary.FileWriter(logdir)

    def close(self):
        self.writer.close()

    def log_scalar(self, tag, value, global_step):
        summary = tf.Summary()
        summary.value.add(tag=tag, simple_value=value)
        self.writer.add_summary(summary, global_step=global_step)
        self.writer.flush()
        
    def log_histogram(self, tag, values, global_step, bins):
        counts, bin_edges = np.histogram(values, bins=bins)

        hist = tf.HistogramProto()
        hist.min = float(np.min(values))
        hist.max = float(np.max(values))
        hist.num = int(np.prod(values.shape))
        hist.sum = float(np.sum(values))
        hist.sum_squares = float(np.sum(values**2))

        bin_edges = bin_edges[1:]

        for edge in bin_edges:
            hist.bucket_limit.append(edge)
        for c in counts:
            hist.bucket.append(c)

        summary = tf.Summary()
        summary.value.add(tag=tag, histo=hist)
        self.writer.add_summary(summary, global_step=global_step)
        self.writer.flush()

    def log_image(self, tag, img, global_step):
        s = io.BytesIO()
        Image.fromarray(img).save(s, format='png')

        img_summary = tf.Summary.Image(encoded_image_string=s.getvalue(),
                                   height=img.shape[0],
                                   width=img.shape[1])

        summary = tf.Summary()
        summary.value.add(tag=tag, image=img_summary)
        self.writer.add_summary(summary, global_step=global_step)
        self.writer.flush()

    def log_plot(self, tag, figure, global_step):
        plot_buf = io.BytesIO()
        figure.savefig(plot_buf, format='png')
        plot_buf.seek(0)
        img = Image.open(plot_buf)
        img_ar = np.array(img)

        img_summary = tf.Summary.Image(encoded_image_string=plot_buf.getvalue(),
                                   height=img_ar.shape[0],
                                   width=img_ar.shape[1])

        summary = tf.Summary()
        summary.value.add(tag=tag, image=img_summary)
        self.writer.add_summary(summary, global_step=global_step)
        self.writer.flush()

In [71]:
train_loader, val_loader, test_loader, label_encoder, n_classes, dataset_sizes = load_data()

loading data...
classes with at least N samples: 4
train_df (7214, 2)
train_df after filtering (7214, 2)
Train shape after filtering classes:  (5352, 2)
Train length: 75 Val length: 2638 Test length: 2639

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



found classes 3
creating data loader - train
creating data loader - val
creating data loader - test


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [72]:
# data_dir = "data/"

# model_conv = torchvision.models.resnet50(pretrained="imagenet").cuda()

In [73]:
# freeze_layers = True
# # n_class = 17143
# # n_class = len(glob(data_dir + "train/*"))
# # Stage-1 Freezing all the layers 
# if freeze_layers:
#     for i, param in model_conv.named_parameters():
#         param.requires_grad = False

# # Since imagenet as 1000 classes , We need to change our last layer according to the number of classes we have,
# num_ftrs = model_conv.fc.in_features
# model_conv.fc = nn.Linear(num_ftrs, n_classes).cuda()

# input_shape = 512
# batch_size = 100
# mean = [0.5, 0.5, 0.5]
# std = [0.5, 0.5, 0.5]
# scale = 360
# use_parallel = False
# use_gpu = True
# epochs = 100

# data_transforms = {
#         'train': transforms.Compose([
#         transforms.Resize(scale),
#         transforms.RandomResizedCrop(input_shape),
#         transforms.RandomHorizontalFlip(),
# #         transforms.RandomVerticalFlip(),
#         transforms.RandomRotation(degrees=90),
#         transforms.ToTensor(),
#         transforms.Normalize(mean, std)]),
#         'val': transforms.Compose([
#         transforms.Resize(scale),
#         transforms.CenterCrop(input_shape),
#         transforms.ToTensor(),
#         transforms.Normalize(mean, std)]),
#         }

In [74]:
# image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
#                                       data_transforms[x]) for x in ['train', 'val']}
# dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
#                                          shuffle=True, num_workers=4) for x in ['train', 'val']}


# dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
# class_names = image_datasets['train'].classes


# dataloaders = {
#     "train": train_loader,
#     "val": val_loader,
#     "test": test_loader
# }

# class_names = label_encoder.classes_

In [75]:
# if use_parallel:
#     print("[Using all the available GPUs]")
#     model_conv = nn.DataParallel(model_conv, device_ids=[0, 1])

# print("[Using CrossEntropyLoss...]")
# criterion = nn.CrossEntropyLoss()

# print("[Using small learning rate with momentum...]")
# optimizer_conv = optim.SGD(list(filter(lambda p: p.requires_grad, model_conv.parameters())), lr=0.001, momentum=0.9)

# # model_conv, optimizer_conv = amp.initialize(model_conv, optimizer_conv, opt_level="O1")
# print("[Creating Learning rate scheduler...]")
# exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

In [76]:
# print("[Training the model begun ....]")
# model_ft = utils.train_model(model_conv, dataloaders, dataset_sizes, criterion, optimizer_conv, exp_lr_scheduler, use_gpu,
#                      num_epochs=epochs)

In [None]:
CHECKPOINT_PATH = "checkpoints"
CHECKPOINT_NAME = None
tensorboard = Tensorboard(CHECKPOINT_PATH + "logdir")
epoch = 1

if CHECKPOINT_NAME != None:
    checkpoint = torch.load(CHECKPOINT_PATH + CHECKPOINT_NAME)
    train_loader, val_loader, test_loader, label_encoder, num_classes,_ = load_data(checkpoint)
else:
    train_loader, val_loader, test_loader, label_encoder, num_classes, _ = load_data()

if RESNET_SIZE == 50:
    model = torchvision.models.resnet50(pretrained=True)
elif RESNET_SIZE == 101:
    model = torchvision.models.resnet101(pretrained=True)
elif RESNET_SIZE == 18:
    model = torchvision.models.resnet18(pretrained=True)
else:
    raise ValueError("Invalid resnet size: ", RESNET_SIZE)
model.avg_pool = nn.AdaptiveAvgPool2d(1)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.cuda()

if CHECKPOINT_NAME != None:
    model.load_state_dict(checkpoint["model_state_dict"])
    epoch = int(checkpoint["epoch"]) + 1
    global_step = int(checkpoint["global_step"])

global_start_time = time.time()



if not PREDICT_ONLY:
    print("Training...")
    criterion = nn.CrossEntropyLoss()

    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    if CHECKPOINT_NAME != None:
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

    # model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=LR_STEP,
                                                   gamma=LR_FACTOR)

    if USE_PARALLEL:
        print("[Using all the available GPUs]")
        model = nn.DataParallel(model, device_ids=[0, 1])

    for epoch in range(epoch, NUM_EPOCHS + 1):
        print('-' * 50)
        train(train_loader, model, criterion, optimizer, epoch, lr_scheduler, tensorboard, label_encoder)
        eval(val_loader, train_loader, model, tensorboard, epoch)
        lr_scheduler.step()

        if has_time_run_out():
            break


loading data...
classes with at least N samples: 4
train_df (7214, 2)
train_df after filtering (7214, 2)
Train shape after filtering classes:  (5352, 2)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

Train length: 75 Val length: 2638 Test length: 2639
found classes 3
creating data loader - train
creating data loader - val
creating data loader - test
Training...
--------------------------------------------------
epoch 1
total batches: 3
1 [0/3]	time 0.360 (0.360)	loss 1.3200 (1.3200)	Accuracy 0.2000 (0.2000)
Data loader length 106



Val Accuracy: 0.7539802880970432, Num correct: 1989
--------------------------------------------------
epoch 2
total batches: 3
2 [0/3]	time 0.346 (0.346)	loss 0.4835 (0.4835)	Accuracy 0.7200 (0.7200)
Data loader length 106



Val Accuracy: 0.525018953752843, Num correct: 1385
--------------------------------------------------
epoch 3
total batches: 3
3 [0/3]	time 0.345 (0.345)	loss 0.1818 (0.1818)	Accuracy 0.8800 (0.8800)
Data loader length 106



Val Accuracy: 0.5856709628506445, Num correct: 1545
--------------------------------------------------
epoch 4
total batches: 3
4 [0/3]	time 0.342 (0.342)	loss 1.2428 (1.2428)	Accuracy 0.6400 (0.6400)
Data loader length 106



Val Accuracy: 0.48142532221379836, Num correct: 1270
--------------------------------------------------
epoch 5
total batches: 3
5 [0/3]	time 0.343 (0.343)	loss 0.5899 (0.5899)	Accuracy 0.6800 (0.6800)
Data loader length 106



Val Accuracy: 0.46739954510993176, Num correct: 1233
--------------------------------------------------
epoch 6
total batches: 3
6 [0/3]	time 0.341 (0.341)	loss 0.6689 (0.6689)	Accuracy 0.7600 (0.7600)
Data loader length 106



Val Accuracy: 0.643669446550417, Num correct: 1698
--------------------------------------------------
epoch 7
total batches: 3
7 [0/3]	time 0.398 (0.398)	loss 0.4650 (0.4650)	Accuracy 0.8400 (0.8400)
Data loader length 106



Val Accuracy: 0.675890826383624, Num correct: 1783
--------------------------------------------------
epoch 8
total batches: 3
8 [0/3]	time 0.344 (0.344)	loss 0.3909 (0.3909)	Accuracy 0.8400 (0.8400)
Data loader length 106



Val Accuracy: 0.6830932524639879, Num correct: 1802
--------------------------------------------------
epoch 9
total batches: 3
9 [0/3]	time 0.354 (0.354)	loss 0.2868 (0.2868)	Accuracy 0.8400 (0.8400)
Data loader length 106



Val Accuracy: 0.6884003032600455, Num correct: 1816
--------------------------------------------------
epoch 10
total batches: 3
10 [0/3]	time 0.346 (0.346)	loss 0.0768 (0.0768)	Accuracy 1.0000 (1.0000)
Data loader length 106



Val Accuracy: 0.721379833206975, Num correct: 1903
--------------------------------------------------
epoch 11
total batches: 3
11 [0/3]	time 0.355 (0.355)	loss 0.1811 (0.1811)	Accuracy 1.0000 (1.0000)
Data loader length 106



Val Accuracy: 0.7426080363912054, Num correct: 1959
--------------------------------------------------
epoch 12
total batches: 3
12 [0/3]	time 0.345 (0.345)	loss 0.1499 (0.1499)	Accuracy 0.9200 (0.9200)
Data loader length 106



Val Accuracy: 0.768385140257771, Num correct: 2027
--------------------------------------------------
epoch 13
total batches: 3
13 [0/3]	time 0.350 (0.350)	loss 0.2471 (0.2471)	Accuracy 0.9200 (0.9200)
Data loader length 106



Val Accuracy: 0.775587566338135, Num correct: 2046
--------------------------------------------------
epoch 14
total batches: 3
14 [0/3]	time 0.353 (0.353)	loss 0.1604 (0.1604)	Accuracy 0.9600 (0.9600)
Data loader length 106



Val Accuracy: 0.7767247915087188, Num correct: 2049
--------------------------------------------------
epoch 15
total batches: 3
15 [0/3]	time 0.368 (0.368)	loss 0.1975 (0.1975)	Accuracy 0.9600 (0.9600)
Data loader length 106



Val Accuracy: 0.7763457164518575, Num correct: 2048
--------------------------------------------------
epoch 16
total batches: 3
16 [0/3]	time 0.345 (0.345)	loss 0.1180 (0.1180)	Accuracy 0.9600 (0.9600)
Data loader length 106



Val Accuracy: 0.7808946171341926, Num correct: 2060
--------------------------------------------------
epoch 17
total batches: 3
17 [0/3]	time 0.358 (0.358)	loss 0.2014 (0.2014)	Accuracy 0.9600 (0.9600)
Data loader length 106



Val Accuracy: 0.7824109173616376, Num correct: 2064
--------------------------------------------------
epoch 18
total batches: 3
18 [0/3]	time 0.343 (0.343)	loss 0.1298 (0.1298)	Accuracy 0.9200 (0.9200)
Data loader length 106



Val Accuracy: 0.785822592873389, Num correct: 2073
--------------------------------------------------
epoch 19
total batches: 3
19 [0/3]	time 0.351 (0.351)	loss 0.1117 (0.1117)	Accuracy 0.9600 (0.9600)
Data loader length 106



Val Accuracy: 0.7884761182714177, Num correct: 2080
--------------------------------------------------
epoch 20
total batches: 3
20 [0/3]	time 0.336 (0.336)	loss 0.2662 (0.2662)	Accuracy 0.8400 (0.8400)
Data loader length 106



Val Accuracy: 0.7907505686125853, Num correct: 2086
--------------------------------------------------
epoch 21
total batches: 3
21 [0/3]	time 0.347 (0.347)	loss 0.1061 (0.1061)	Accuracy 0.9600 (0.9600)
Data loader length 106
