Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mean image for VGG-16 net #11

Closed
duguyue100 opened this issue Aug 23, 2016 · 2 comments
Closed

Mean image for VGG-16 net #11

duguyue100 opened this issue Aug 23, 2016 · 2 comments

Comments

@duguyue100
Copy link

Are the weight files here as same as the original VGG-16 net?
There is a mean image file with VGG-16's Caffe Model.
Should I still apply it for the best result?

@fchollet
Copy link
Owner

You should apply the following to your inputs: https://github.com/fchollet/deep-learning-models/blob/master/imagenet_utils.py#L11 (or equivalent)

@duguyue100
Copy link
Author

@fchollet I've tried to test the performance of the VGG-16 and 19 pre-trained models, however the performance does not match my expectation to VGG models. I randomly sampled 50,000 samples from the training dataset (I didn't run the code on full dataset because of my own purpose).

However I got only 67.2%. Note that when I load the dataset, instead of randomly cropping the image, I resized them to (224, 224) and used your way in preprocess_input for handling preprocessing. I wonder this performance drop is from my wrong doing on the data or the pre-trained model itself. I attached my code for sampling the images and data loading function. Please take a look if you can. 😄

def sample_imagenet(origin_path, target_path, num_samples=50):
    """Randomly sample images from ImageNet Training Dataset.
    This function ramdomly samples images from each class of ImageNet Training
    Dataset for normalizing pretained ImageNet model.
    In case you don't have to permission to the data folder, make sure
    your are running this function with super user.
    Parameters
    ----------
    origin_path : string
        The path to training dataset
    target_path : string
        The path to target destination
    """
    if not os.path.isdir(origin_path):
        raise ValueError("The source folder is not existed!")
    if not os.path.isdir(target_path):
        os.makedirs(target_path)
        print ("[MESSAGE] WARNING! The target path is not existed, "
               "The path is created automatically.")

    print ("[MESSAGE] Start Copying.")
    folder_list = [f for f in os.listdir(origin_path)
                   if os.path.isdir(join(origin_path, f))]

    for folder_name in folder_list:
        if not os.path.isdir(join(target_path, folder_name)):
            os.makedirs(join(target_path, folder_name))
        folder_path = join(origin_path, folder_name)
        file_list = [f for f in os.listdir(folder_path)
                     if os.path.isfile(join(folder_path, f)) and ".JPEG" in f]

        if num_samples > len(file_list):
            file_idx = range(len(file_list))
        else:
            file_idx = random.sample(range(len(file_list)), num_samples)

        for idx in file_idx:
            shutil.copy(join(folder_path, file_list[idx]),
                        join(target_path, folder_name))
            print ("[MESSAGE] Image %s is copied to %s" %
                   (file_list[idx], join(target_path, folder_name)))

    print ("[MESSAGE] Images are sampled! Stored at %s" % (target_path))
def get_imagenet(train_path, test_path, save_path, class_idx_path,
                 filename=None):
    if not os.path.isdir(train_path):
        raise ValueError("Training dataset is not found!")
    if not os.path.isdir(test_path):
        raise ValueError("Testing dataset is not found!")
    if not os.path.isdir(save_path):
        os.makedirs(save_path)
    if not os.path.isfile(class_idx_path):
        raise ValueError("The class idx file is not existed!")

    class_idx = json.load(open(class_idx_path, "r"))

    classes = []
    for idx in xrange(len(class_idx)):
        classes.append(class_idx[str(idx)][0])

    datagen = ImageDataGenerator()
    train_dataflow = datagen.flow_from_directory(train_path,
                                                 target_size=(224, 224),
                                                 classes=classes,
                                                 batch_size=1000)
    X_train, Y_train = train_dataflow.next()

    X_train[:, 0, :, :] -= 103.939
    X_train[:, 1, :, :] -= 116.779
    X_train[:, 2, :, :] -= 123.68
    X_train = X_train[:, ::-1, :, :]
    # X_train /= 255.

    test_dataflow = datagen.flow_from_directory(test_path,
                                                target_size=(224, 224),
                                                classes=classes,
                                                batch_size=1000)

    X_test, Y_test = test_dataflow.next()

    X_test[:, 0, :, :] -= 103.939
    X_test[:, 1, :, :] -= 116.779
    X_test[:, 2, :, :] -= 123.68
    X_test = X_test[:, ::-1, :, :]
    # X_test /= 255.

    if filename is None:
        filename = ''
    filepath = os.path.join(save_path, filename)
    np.savez_compressed(filepath + 'X_norm', X_train.astype('float32'))
    np.savez_compressed(filepath + 'Y_norm', Y_train.astype('float32'))
    np.savez_compressed(filepath + 'X_test', X_test.astype('float32'))
    np.savez_compressed(filepath + 'Y_test', Y_test.astype('float32'))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants