In [1]:
import pandas as pd
import cv2
import numpy as np

from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
from torchvision import transforms
import torch.utils.data as data
from torch.utils.data.sampler import SubsetRandomSampler

import pickle

In [7]:
def preprocess_data(directory:str, batch_size:int, test_size:int, rand_num:int, worker:int):
    '''
        directory: the directory of processed directory with class folders inside
        batch_size: size of batch for training
        test_size: percent of dataset used for test
        rand_num: put random number for reproducibility
        worker: number of worker in computation
        
        return train and test data ready for training
    '''
    #pipeline to resize images, crop, convert to tensor, and normalize
    trans = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])])
    
    dataset = torchvision.datasets.ImageFolder(root=directory, transform=trans) #read image in folder to data with labels
    
    train_len = len(dataset) #get length of whole data
    ind = list(range(train_len)) #indices of whole data
    spl = int(np.floor(test_size * train_len)) #index of test data
    
    #reproducibility and shuffle step
    np.random.seed(rand_num) 
    np.random.shuffle(ind)
    
    #sampling preparation steps
    train_id, test_id = ind[spl:], ind[:spl]
    tr_sampl = SubsetRandomSampler(train_id)
    te_sampl = SubsetRandomSampler(test_id)

    #use data loader to get train and test set ready for training
    trainloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=tr_sampl,num_workers=worker)
    testloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=te_sampl,num_workers=worker)
    return (trainloader, testloader)

In [8]:
dire = "./Data/Processed"
trainloader, testloader = preprocess_data(
                                        directory=dire,
                                        batch_size=4,
                                        test_size=0.3,
                                        rand_num=40,
                                        worker=4
                                    )

In [None]:
'''
net = Net()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()
net = net.cuda()
temp = []
for epoch in range(10):  # loop over the dataset multiple times
# YOUR CODE HERE
    a = 0.0
    for j, data in enumerate(trainloader, 0):
        inp, out = data #take input and output from a batch from train data
        inp = inp.cuda() #make input to GPU
        out = out.cuda() #make output to GPU
        optimizer.zero_grad() #zero out the gradients
        pre = net(inp) #make prediction with model
        _,pre1 = torch.max(pre,dim=1) #use the class with highest predicted probability
        loss = criterion(pre, out) #calculate the loss
        loss.backward() #do backpropagation step
        optimizer.step() #use optimizer for backpropagation step
        a += loss.item() #accumulate the loss
    temp.append(a/(j+1)) #append the average loss for each batch
'''