# Use Caffe to extract features from pre-trained CaffeNet.


In [2]:
caffe_root = '/home/ubuntu/caffe/'
data_root = '/mnt/data/'

In [3]:
import numpy as np
import sys
import caffe
import os

# make sure caffe is on the python path
sys.path.insert(0, caffe_root + 'python')

Download pre-trained Alexnet model

In [4]:
if not os.path.isfile(caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):
    print("Downloading pre-trained CaffeNet model...")
    !/home/ubuntu/caffe/scripts/download_model_binary.py /home/ubuntu/caffe/models/bvlc_reference_caffenet

In [5]:
# set caffe to cpu mode
#caffe.set_mode_cpu()

# set caffe to gpu mode
caffe.set_device(0)
caffe.set_mode_gpu()

Define function to extract features from specified layer.

In [6]:
def extract_features(images):
    """
    INPUT
    images - a list of the names of images (including full directory path)
    
    RETURNS
    net.blobs - all blobs from forward pass through net
    """
    
    # Load the net in test phase
    net = caffe.Net(caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt',
                caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel',
                caffe.TEST)
    
    # input preprocessing: 'data' is the name of the input blob == net.inputs[0]
    transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
    transformer.set_transpose('data', (2,0,1))
    transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # mean pixel
    transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]
    transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB]]

    # Set batch size to num_images
    num_images = len(images)
    net.blobs['data'].reshape(num_images,3,227,227)
    
    # Feed in the image (with some preprocessing) and classify with a forward pass.
    net.blobs['data'].data[...] = map(lambda x: transformer.preprocess('data', caffe.io.load_image(x)), images)
    out = net.forward()
    
    return net.blobs

Define function that calls on `extract_features` for a set of images

In [11]:
def extract_features_from_dataset(image_names, outfile_prefix="train_", layers=['fc6','fc7','prob'], batch_size=500):
    """Extract features in batches from a set of images.  Write to HDF5 file.
    INPUT:
    layers - list, layer names from which activations/features should be extracted
    batch_size - numeric, number of images to process at a time
    image_names - list, names of images to be processed (including full directory path)
    outfile_prefix - string, prefix for outfile name, e.g. "train"
    """
    n_images = len(image_names)
    
    # extract features in batches
    for i in xrange(0, n_images, batch_size):
        image_batch = image_names[i: min(i+batch_size, n_images)]
        net_blobs = extract_features(images = image_batch)
        num_done = i + net_blobs[layers[0]].shape[0]
        
        # write out features, one file per layer
        for l in layers:
            fname = data_root + outfile_prefix + l + '_features.h5'
            try:
                f = h5py.File(fname, 'r+')
            except IOError:
                initialize_h5files(layer=l, filename=fname, layer_size=net_blobs[l].shape[1])
                f = h5py.File(fname, 'r+')
            f['photo_id'].resize((num_done,))
            f['photo_id'][i: num_done] = np.array(image_batch)
            f['feature'].resize((num_done, net_blobs[l].shape[1]))
            f['feature'][i: num_done, :] = net_blobs[l].data
            f.close()
            
        if i%20000==0 or num_done==n_images:
            print "Images processed: ", num_done
            sys.stdout.flush()

# Extract features

In [8]:
# extract image features and save it to .h5

# Initialize files
import h5py

def initialize_h5files(layer, filename, layer_size):
    """
    INPUT:
    layer - string, name of layer in net
    fname - string, initialized file name
    layer_size - integer, size of layer output (assumed to be a vector)
    """
    f = h5py.File(filename, 'w')
    _ = f.create_dataset('photo_id', (0,), maxshape=(None,), dtype='|S54')
    _ = f.create_dataset('feature',(0, layer_size), maxshape = (None, layer_size))
    f.close()

## Extract features from training set

In [8]:
import pandas as pd 
train_photos = pd.read_csv(data_root+'train_photo_to_biz_ids.csv')
train_folder = data_root+'train_photos/'
train_images = [os.path.join(train_folder, str(x)+'.jpg') for x in train_photos['photo_id']]  # get full filename

In [9]:
num_train = len(train_images)
print "Number of training images: ", num_train

Number of training images:  234842


In [10]:
import time
tic = time.time()
extract_features_from_dataset(image_names=train_images, outfile_prefix='train_', layers=['fc6','fc7','prob'], batch_size=256)
toc = time.time()
print 'computed in {0}s'.format(toc-tic)

Images processed:  160000
Images processed:  234842
computed in 6741.17514801s


In [11]:
### Check the file content

f = h5py.File(data_root + 'train_fc7_features.h5','r')
print 'training image features:'
for key in f.keys():
    print key, f[key].shape
    
print "\nA photo:", f['photo_id'][0]
print "Its feature vector (first 10-dim): ", f['feature'][0][0:10], " ..."
f.close()

training image features:
feature (234842, 4096)
photo_id (234842,)

A photo: /mnt/data/train_photos/204149.jpg
Its feature vector (first 10-dim):  [ 0.          0.          0.          0.          0.          0.10489148
  0.          0.          0.          0.        ]  ...


## Extract features from test set

In [9]:
import pandas as pd 
test_photos = pd.read_csv(data_root+'test_photo_to_biz.csv')
test_folder = data_root+'test_photos/'
test_images = [os.path.join(test_folder, str(x)+'.jpg') for x in test_photos['photo_id'].unique()]  # get full filename

In [10]:
num_test = len(test_images)
print "Number of training images: ", num_test

Number of training images:  237152


In [12]:
extract_features_from_dataset(image_names=test_images, outfile_prefix='test_', layers=['fc6','fc7','prob'], batch_size=256)

Images processed:  256
Images processed:  160256
Images processed:  237152


In [13]:
f = h5py.File(data_root+'test_fc6_features.h5','r')
for key in f.keys():
    print key, f[key].shape
print "\nA photo:", f['photo_id'][0]
print "feature vector: (first 10-dim)", f['feature'][0][0:10], " ..."
f.close()

feature (237152, 4096)
photo_id (237152,)

A photo: /mnt/data/test_photos/317818.jpg
feature vector: (first 10-dim) [  1.11734688   0.           0.           0.          18.62758636   0.
   6.45451498   0.           9.16341496   0.        ]  ...
