Skip to content

Commit

Permalink
Merge pull request NVIDIA#1262 from lukeyeager/fix-batch-accumulation
Browse files Browse the repository at this point in the history
Fix batch accumulation
  • Loading branch information
lukeyeager committed Nov 16, 2016
2 parents 75ec4a0 + b8e905a commit f7cf695
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 24 deletions.
16 changes: 6 additions & 10 deletions digits/dataset/images/classification/test_imageset_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,18 @@
import PIL.Image


IMAGE_SIZE = 10
IMAGE_COUNT = 10 # per category


def create_classification_imageset(folder, image_size=None, image_count=None, add_unbalanced_category=False):
def create_classification_imageset(
folder,
image_size=10,
image_count=10,
add_unbalanced_category=False,
):
"""
Creates a folder of folders of images for classification
If requested to add an unbalanced category then a category is added with
half the number of samples of other categories
"""
if image_size is None:
image_size = IMAGE_SIZE
if image_count is None:
image_count = IMAGE_COUNT

# Stores the relative path of each image of the dataset
paths = defaultdict(list)

Expand Down
20 changes: 12 additions & 8 deletions digits/dataset/images/classification/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from bs4 import BeautifulSoup
import PIL.Image

from .test_imageset_creator import create_classification_imageset, IMAGE_COUNT as DUMMY_IMAGE_COUNT
from .test_imageset_creator import create_classification_imageset
from digits import test_utils
import digits.test_views

Expand Down Expand Up @@ -64,6 +64,7 @@ class BaseViewsTestWithImageset(BaseViewsTest):
Provides an imageset and some functions
"""
# Inherited classes may want to override these default attributes
IMAGE_COUNT = 10 # per class
IMAGE_HEIGHT = 10
IMAGE_WIDTH = 10
IMAGE_CHANNELS = 3
Expand All @@ -78,8 +79,11 @@ def setUpClass(cls):
super(BaseViewsTestWithImageset, cls).setUpClass()
cls.imageset_folder = tempfile.mkdtemp()
# create imageset
cls.imageset_paths = create_classification_imageset(cls.imageset_folder,
add_unbalanced_category=cls.UNBALANCED_CATEGORY)
cls.imageset_paths = create_classification_imageset(
cls.imageset_folder,
image_count=cls.IMAGE_COUNT,
add_unbalanced_category=cls.UNBALANCED_CATEGORY,
)
cls.created_datasets = []

@classmethod
Expand Down Expand Up @@ -363,7 +367,7 @@ def check_image_count(self, type):
assert parse_info['val_count'] == 0
image_count = parse_info['test_count']
assert self.categoryCount() == parse_info['label_count']
assert image_count == DUMMY_IMAGE_COUNT * parse_info['label_count'], 'image count mismatch'
assert image_count == self.IMAGE_COUNT * parse_info['label_count'], 'image count mismatch'
assert self.delete_dataset(job_id) == 200, 'delete failed'
assert not self.dataset_exists(job_id), 'dataset exists after delete'

Expand All @@ -375,9 +379,9 @@ def test_max_per_class(self):
yield self.check_max_per_class, type

def check_max_per_class(self, type):
# create dataset, asking for at most DUMMY_IMAGE_COUNT/2 images per class
assert DUMMY_IMAGE_COUNT % 2 == 0
max_per_class = DUMMY_IMAGE_COUNT / 2
# create dataset, asking for at most IMAGE_COUNT/2 images per class
assert self.IMAGE_COUNT % 2 == 0
max_per_class = self.IMAGE_COUNT / 2
data = {'folder_pct_val': 0}
if type == 'train':
data['folder_train_max_per_class'] = max_per_class
Expand Down Expand Up @@ -418,7 +422,7 @@ def test_min_per_class(self):
def check_min_per_class(self, type):
# create dataset, asking for one more image per class
# than available in the "unbalanced" category
min_per_class = DUMMY_IMAGE_COUNT / 2 + 1
min_per_class = self.IMAGE_COUNT / 2 + 1
data = {'folder_pct_val': 0}
if type == 'train':
data['folder_train_min_per_class'] = min_per_class
Expand Down
38 changes: 38 additions & 0 deletions digits/model/images/classification/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import itertools
import json
import math
import os
import shutil
import tempfile
Expand All @@ -16,13 +17,16 @@
from StringIO import StringIO

from bs4 import BeautifulSoup
from google.protobuf import text_format

from digits.config import config_value
import digits.dataset.images.classification.test_views
from digits.frameworks import CaffeFramework
import digits.test_views
from digits import test_utils
import digits.webapp

import caffe_pb2

# May be too short on a slow system
TIMEOUT_DATASET = 45
Expand Down Expand Up @@ -101,6 +105,10 @@ def model_exists(cls, job_id):
def model_status(cls, job_id):
return cls.job_status(job_id, 'models')

@classmethod
def model_info(cls, job_id):
return cls.job_info(job_id, 'models')

@classmethod
def abort_model(cls, job_id):
return cls.abort_job(job_id, job_type='models')
Expand Down Expand Up @@ -1254,3 +1262,33 @@ def test_sweep(self):
assert self.model_wait_completion(job_id) == 'Done', 'create failed'
assert self.delete_model(job_id) == 200, 'delete failed'
assert not self.model_exists(job_id), 'model exists after delete'


@unittest.skipIf(
not CaffeFramework().can_accumulate_gradients(),
'This version of Caffe cannot accumulate gradients')
class TestBatchAccumulationCaffe(BaseViewsTestWithDataset, test_utils.CaffeMixin):
TRAIN_EPOCHS = 1
IMAGE_COUNT = 10 # per class

def test_batch_accumulation_calculations(self):
batch_size = 10
batch_accumulation = 2

job_id = self.create_model(
batch_size=batch_size,
batch_accumulation=batch_accumulation,
)
assert self.model_wait_completion(job_id) == 'Done', 'create failed'
info = self.model_info(job_id)
solver = caffe_pb2.SolverParameter()
with open(os.path.join(info['directory'], info['solver file']), 'r') as infile:
text_format.Merge(infile.read(), solver)
assert solver.iter_size == batch_accumulation, \
'iter_size is %d instead of %d' % (solver.iter_size, batch_accumulation)
max_iter = int(math.ceil(
float(self.TRAIN_EPOCHS * self.IMAGE_COUNT * 3) /
(batch_size * batch_accumulation)
))
assert solver.max_iter == max_iter,\
'max_iter is %d instead of %d' % (solver.max_iter, max_iter)
16 changes: 10 additions & 6 deletions digits/model/tasks/caffe_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,8 +525,10 @@ def save_files_classification(self):
solver.iter_size = self.batch_accumulation

# Epochs -> Iterations
train_iter = int(math.ceil(float(self.dataset.get_entry_count(
constants.TRAIN_DB)) / train_data_layer.data_param.batch_size))
train_iter = int(math.ceil(
float(self.dataset.get_entry_count(constants.TRAIN_DB)) /
(train_data_layer.data_param.batch_size * solver.iter_size)
))
solver.max_iter = train_iter * self.train_epochs
snapshot_interval = self.snapshot_interval * train_iter
if 0 < snapshot_interval <= 1:
Expand Down Expand Up @@ -598,7 +600,7 @@ def save_files_classification(self):
# Display 8x per epoch, or once per 5000 images, whichever is more frequent
solver.display = max(1, min(
int(math.floor(float(solver.max_iter) / (self.train_epochs * 8))),
int(math.ceil(5000.0 / train_data_layer.data_param.batch_size))
int(math.ceil(5000.0 / (train_data_layer.data_param.batch_size * solver.iter_size)))
))

if self.random_seed is not None:
Expand Down Expand Up @@ -753,8 +755,10 @@ def save_files_generic(self):
solver.iter_size = self.batch_accumulation

# Epochs -> Iterations
train_iter = int(math.ceil(float(self.dataset.get_entry_count(constants.TRAIN_DB)) /
train_image_data_layer.data_param.batch_size))
train_iter = int(math.ceil(
float(self.dataset.get_entry_count(constants.TRAIN_DB)) /
(train_image_data_layer.data_param.batch_size * solver.iter_size)
))
solver.max_iter = train_iter * self.train_epochs
snapshot_interval = self.snapshot_interval * train_iter
if 0 < snapshot_interval <= 1:
Expand Down Expand Up @@ -821,7 +825,7 @@ def save_files_generic(self):
# Display 8x per epoch, or once per 5000 images, whichever is more frequent
solver.display = max(1, min(
int(math.floor(float(solver.max_iter) / (self.train_epochs * 8))),
int(math.ceil(5000.0 / train_image_data_layer.data_param.batch_size))
int(math.ceil(5000.0 / (train_image_data_layer.data_param.batch_size * solver.iter_size)))
))

if self.random_seed is not None:
Expand Down

0 comments on commit f7cf695

Please sign in to comment.