In [2]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import time
import copy

import cv2


"""
below is for local load only 
"""
# im_names = os.listdir('/home/clint/project2/images')
# path = '/home/clint/project2/files/'
# name = 'Xa_test.csv'
# names = os.listdir(path)
# df1 = pd.DataFrame()
# for name in names:
#     print(name)
#     df2 = pd.read_csv(f'{path}{name}')
#     df1 = df1.append(df2, ignore_index=True)
 
# df_select = df1[df1['Image File'].isin(im_names)]
# df_select2 = df_select.dropna()

# df_val = df_select2.sample(int(len(df_select2)*.2/200))
# df_train = df_select2[~df_select2.index.isin(df_val.index.tolist())]
# df_train = df_train.sample(int(len(df_train)/200))

# data_val = P2DataLoader(df_val, root='/home/clint/project2/images')
# data_train = P2DataLoader(df_train, root='/home/clint/project2/images')
# dataload_val = DataLoader(data_val, batch_size=4, shuffle=True, num_workers=0)
# dataload_train = DataLoader(data_train, batch_size=4, shuffle=True, num_workers=0)
# dataloaders = {'train':dataload_train, 'val': dataload_val}
# dataset_sizes = {'train':len(df_train),'val':len(df_val)}


class P2DataLoader():
    def __init__(self, df, root='', train=True, transform=None,):
        
        # path to image data
#         self.csv_data = pd.read_csv(csv_file)
        self.root = root
        self.csv_data = df
        self.target = np.array(self.csv_data['Sex (subj)']) # label of image
        self.im_file = np.array(self.csv_data['Image File']) # label of image
        self.h = np.array(self.csv_data['Image Height'])
        self.w = np.array(self.csv_data['Image Width'])
        self.x1 = np.array(self.csv_data['X (top left)'])
        self.x2 = np.array(self.csv_data['X (bottom right)'])
        self.y1 = np.array(self.csv_data['Y (top left)'])
        self.y2 = np.array(self.csv_data['Y (bottom right)'])

    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            dict: {'image': image, 'target': index of target class, 'meta': dict}
        """
        #make slicer from bbox
        img, target, h, w = io.imread(f'{self.root}/{self.im_file[index]}'), self.target[index], self.h[index], self.w[index]
        # slicer from bbox
        img = img[self.y1[index]:self.y2[index],self.x1[index]:self.x2[index]]
        # resize to a standard size
        img = cv2.resize(img , (64, 64))
        img = torch.from_numpy(img).float()
        img = img.reshape(3, 64, 64)
        
        """
        I have a transform library we can use here
        """
#         if self.transform is not None:
#             img = self.transform(img)

        out = {'image': img,
               'target': int(target),
               'meta': {'im_size': (h, w), 'index': index, 'class_ID': target}}

        return out

    def get_image(self, index):
        img = index
        return img

    def __len__(self):
        return len(self.csv_data)
    

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    start = time.time()
    gold_acc, gold_model_wts = 0.0, copy.deepcopy(model.state_dict())

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
                
            current_corrects, current_loss = 0, 0.0
            # Here's where the training happens
            print('Iterating through data...')

            for data in dataloaders[phase]:
                inputs = data['image'].to(device)
                labels = data['target'].to(device)

                # We need to zero the gradients, don't forget it
                optimizer.zero_grad()

                """
                forward
                """
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    """
                    backward
                    """
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # loss statistics
                current_loss += loss.item() * inputs.size(0)
                current_corrects += torch.sum(preds == labels.data)

            e_loss = current_loss / dataset_sizes[phase]
            epoch_ac = current_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, e_loss, epoch_ac))

            """
            copy only if the model improved
            """
            if phase == 'val' and epoch_ac > gold_acc:
                gold_acc = epoch_ac
                gold_model_wts = copy.deepcopy(model.state_dict())

        print()
    end = time.time() - start
    print('Training complete in {:.0f}m {:.0f}s'.format(
        end // 60, end % 60))
    print('Best val Acc: {:4f}'.format(gold_acc))

    # Now we'll load in the best model weights and return it
    model.load_state_dict(gold_model_wts)
    return model


def Tain_ResNet(path2trainCSV, val_fract, path2ims, num_epochs=25):
    """
    load data
    inputs: path2trainCSV, val_fract, path2ims
    """
    df =  pd.read_csv(path2trainCSV)
    df_val = df.sample(int(len(sample)*val_fract))
    df_train = df[~df.index.isin(df_val.index)]

    data_val = P2DataLoader(df_val, root=path2ims)
    data_train = P2DataLoader(df_train, root=path2ims)
    dataload_val = DataLoader(data_val, batch_size=4, shuffle=True, num_workers=0)
    dataload_train = DataLoader(data_train, batch_size=4, shuffle=True, num_workers=0)
    dataloaders = {'train':dataload_train, 'val': dataload_val}
    dataset_sizes = {'train':len(df_train),'val':len(df_val)}

    """
    build res
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    res_mod = models.resnet34(pretrained=True)
    num_ftrs = res_mod.fc.in_features
    res_mod.fc = nn.Linear(num_ftrs, 2)
    res_mod = res_mod.to(device)
    criterion = nn.CrossEntropyLoss()

    optimizer_ft = optim.SGD(res_mod.parameters(), lr=0.001, momentum=0.9) # Observe that all parameters are being optimized
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)# Decay LR by a factor of 0.1 every 7 epochs

    """
    train
    """
    base_model = train_model(res_mod, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=num_epochs)