New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Mean image for VGG-16 net #11
Comments
You should apply the following to your inputs: https://github.com/fchollet/deep-learning-models/blob/master/imagenet_utils.py#L11 (or equivalent) |
@fchollet I've tried to test the performance of the VGG-16 and 19 pre-trained models, however the performance does not match my expectation to VGG models. I randomly sampled 50,000 samples from the training dataset (I didn't run the code on full dataset because of my own purpose). However I got only 67.2%. Note that when I load the dataset, instead of randomly cropping the image, I resized them to (224, 224) and used your way in def sample_imagenet(origin_path, target_path, num_samples=50):
"""Randomly sample images from ImageNet Training Dataset.
This function ramdomly samples images from each class of ImageNet Training
Dataset for normalizing pretained ImageNet model.
In case you don't have to permission to the data folder, make sure
your are running this function with super user.
Parameters
----------
origin_path : string
The path to training dataset
target_path : string
The path to target destination
"""
if not os.path.isdir(origin_path):
raise ValueError("The source folder is not existed!")
if not os.path.isdir(target_path):
os.makedirs(target_path)
print ("[MESSAGE] WARNING! The target path is not existed, "
"The path is created automatically.")
print ("[MESSAGE] Start Copying.")
folder_list = [f for f in os.listdir(origin_path)
if os.path.isdir(join(origin_path, f))]
for folder_name in folder_list:
if not os.path.isdir(join(target_path, folder_name)):
os.makedirs(join(target_path, folder_name))
folder_path = join(origin_path, folder_name)
file_list = [f for f in os.listdir(folder_path)
if os.path.isfile(join(folder_path, f)) and ".JPEG" in f]
if num_samples > len(file_list):
file_idx = range(len(file_list))
else:
file_idx = random.sample(range(len(file_list)), num_samples)
for idx in file_idx:
shutil.copy(join(folder_path, file_list[idx]),
join(target_path, folder_name))
print ("[MESSAGE] Image %s is copied to %s" %
(file_list[idx], join(target_path, folder_name)))
print ("[MESSAGE] Images are sampled! Stored at %s" % (target_path)) def get_imagenet(train_path, test_path, save_path, class_idx_path,
filename=None):
if not os.path.isdir(train_path):
raise ValueError("Training dataset is not found!")
if not os.path.isdir(test_path):
raise ValueError("Testing dataset is not found!")
if not os.path.isdir(save_path):
os.makedirs(save_path)
if not os.path.isfile(class_idx_path):
raise ValueError("The class idx file is not existed!")
class_idx = json.load(open(class_idx_path, "r"))
classes = []
for idx in xrange(len(class_idx)):
classes.append(class_idx[str(idx)][0])
datagen = ImageDataGenerator()
train_dataflow = datagen.flow_from_directory(train_path,
target_size=(224, 224),
classes=classes,
batch_size=1000)
X_train, Y_train = train_dataflow.next()
X_train[:, 0, :, :] -= 103.939
X_train[:, 1, :, :] -= 116.779
X_train[:, 2, :, :] -= 123.68
X_train = X_train[:, ::-1, :, :]
# X_train /= 255.
test_dataflow = datagen.flow_from_directory(test_path,
target_size=(224, 224),
classes=classes,
batch_size=1000)
X_test, Y_test = test_dataflow.next()
X_test[:, 0, :, :] -= 103.939
X_test[:, 1, :, :] -= 116.779
X_test[:, 2, :, :] -= 123.68
X_test = X_test[:, ::-1, :, :]
# X_test /= 255.
if filename is None:
filename = ''
filepath = os.path.join(save_path, filename)
np.savez_compressed(filepath + 'X_norm', X_train.astype('float32'))
np.savez_compressed(filepath + 'Y_norm', Y_train.astype('float32'))
np.savez_compressed(filepath + 'X_test', X_test.astype('float32'))
np.savez_compressed(filepath + 'Y_test', Y_test.astype('float32')) |
Are the weight files here as same as the original VGG-16 net?
There is a mean image file with VGG-16's Caffe Model.
Should I still apply it for the best result?
The text was updated successfully, but these errors were encountered: