In [36]:
# CODE REFERENCES
# https://towardsdatascience.com/how-to-build-an-image-classifier-for-waste-sorting-6d11d3c9c478
# https://github.com/danny95333/Trash-Classification-based-on-CNN
from linear_classifier import *
from linear_svm import *
from softmax import *
import time
import random
import shutil
import re
import os
from pathlib import Path
userPath = os.path.expanduser("~")
print(userPath.replace("\\", "/") + "/")

C:/Users/Anand Natu/


In [40]:
# DATA PROCESSING
# Take the resized images and port into train, val, test folders
# SEe 
## helper functions ##

## splits indices for a folder into train, validation, and test indices with random sampling
    ## input: folder path
    ## output: train, valid, and test indices    
def split_indices(folder,seed1,seed2):    
    n = len(os.listdir(folder))
    full_set = list(range(1,n+1))

    ## train indices
    random.seed(seed1)
    train = random.sample(list(range(1,n+1)),int(.5*n))

    ## temp
    remain = list(set(full_set)-set(train))

    ## separate remaining into validation and test
    random.seed(seed2)
    valid = random.sample(remain,int(.5*len(remain)))
    test = list(set(remain)-set(valid))
    return(train,valid,test)

## gets file names for a particular type of trash, given indices
    ## input: waste category and indices
    ## output: file names 
def get_names(waste_type,indices):
    file_names = [waste_type+str(i)+".jpg" for i in indices]
    return(file_names)    

## moves group of source files to another folder
    ## input: list of source files and destination folder
    ## no output
def move_files(source_files,destination_folder):
    for file in source_files:
        # Copy files so they still live in the source folder in case we need to repeat / change the operation
        # without having to re-downlaod in the source data
        shutil.copy(file,destination_folder)

In [41]:
# DATA PROCESSING
# Make train, val, and test folders for data (preserving the 
# child folder structure which tells us the waste type)
## paths will be train/cardboard, train/glass, etc...
subsets = ['train','val']
waste_types = ['cardboard','glass','metal','paper','plastic','trash']


inBase = "Documents/GitHub/CS231n-Project-2019/datasets/trashnet/dataset-resized"
inDataPath = os.path.join(userPath, inBase)
outBase = "Documents/GitHub/CS231n-Project-2019/datasets/trashnet/data"
outDataPath = os.path.join(userPath, outBase)

## create destination folders for data subset and waste type
for subset in subsets:
    for waste_type in waste_types:
        folder = os.path.join(outDataPath,subset,waste_type)
        if not os.path.exists(folder):
            os.makedirs(folder)

if not os.path.exists(os.path.join(outDataPath,'test')):
    os.makedirs(os.path.join(outDataPath,'test'))
            
## move files to destination folders for each waste type
for waste_type in waste_types:
    source_folder = os.path.join(inDataPath,waste_type)
    train_ind, valid_ind, test_ind = split_indices(source_folder,1,1)
    
    ## move source files to train
    train_names = get_names(waste_type,train_ind)
    train_source_files = [os.path.join(source_folder,name) for name in train_names]
    train_dest = userPath.replace("\\", "/") + "/" + outBase + "/train/" + waste_type
    move_files(train_source_files,train_dest)
    
    ## move source files to valid
    valid_names = get_names(waste_type,valid_ind)
    valid_source_files = [os.path.join(source_folder,name) for name in valid_names]
    valid_dest = userPath.replace("\\", "/") + "/" + outBase + "/val/" + waste_type
    move_files(valid_source_files,valid_dest)
    
    ## move source files to test
    test_names = get_names(waste_type,test_ind)
    test_source_files = [os.path.join(source_folder,name) for name in test_names]
    ## I use data/test here because the images can be mixed up
    move_files(test_source_files, userPath.replace("\\", "/") + "/" + outBase + "/test")

In [43]:
# DATA AUGMENTATION
# Loading the dataset into memory and applying transforms
import torch
from torchvision import transforms, datasets


# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}


# Create an ImageFolder dataloader for the input data
# See https://pytorch.org/docs/stable/torchvision/datasets.html#imagefolder
image_datasets = {x: datasets.ImageFolder(os.path.join(outDataPath, x),
                                          data_transforms[x]) for x in ["train", "val"]}

# Create DataLoader objects for each of the image datasets returned by ImageFolder
# See https://pytorch.org/docs/stable/data.html
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=64,
                                              shuffle=True, num_workers=4) for x in ['train', 'val']}


datasets_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

class_names = image_datasets['train'].classes



In [46]:
print(datasets_sizes)
print(class_names)
print(image_datasets)

{'train': 1262, 'val': 630}
['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']
{'train': Dataset ImageFolder
    Number of datapoints: 1262
    Root Location: C:\Users\Anand Natu\Documents/GitHub/CS231n-Project-2019/datasets/trashnet/data\train
    Transforms (if any): Compose(
                             RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
                             RandomHorizontalFlip(p=0.5)
                             ToTensor()
                             Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                         )
    Target Transforms (if any): None, 'val': Dataset ImageFolder
    Number of datapoints: 630
    Root Location: C:\Users\Anand Natu\Documents/GitHub/CS231n-Project-2019/datasets/trashnet/data\val
    Transforms (if any): Compose(
                             Resize(size=256, interpolation=PIL.Image.BILINEAR)
                             CenterCrop(si

In [3]:
# TRAIN THE MODEL
svm = LinearSVM()
tic = time.time()
print("Training model with {} rows of training data".format(X_train.shape[0]))
loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4,
                      num_iters=1500, verbose=True)
toc = time.time()
print('Training Complete. That took %fs' % (toc - tic))

In [None]:
# HYPERPARAMETER TUNING


In [5]:
# EVALUATE TEST PERFORMANCE
