### 1. Setup

* First, set up Python, `numpy`, and `matplotlib`.
* Load caffe
* Load caffemodel
* Load mean

In [1]:
# set up Python environment: numpy for numerical routines, and matplotlib for plotting
import numpy as np
import matplotlib.pyplot as plt
# display plots in this notebook
%matplotlib inline

# set display defaults
plt.rcParams['figure.figsize'] = (10, 10)        # large images
plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray'  # use grayscale output rather than a (potentially misleading) color heatmap



In [2]:
# The caffe module needs to be on the Python path;
#  we'll add it here explicitly.
import sys
caffe_root = '../'  # this file should be run from {caffe_root}/examples (otherwise change this line)
sys.path.insert(0, caffe_root + 'python')

import caffe
# If you get "No module named _caffe", either you have not built pycaffe or you have the wrong path.

  from ._caffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, \
  from ._caffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, \
  from ._caffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, \


In [3]:
import os
if os.path.isfile(caffe_root + 'examples/imagenet/caffenet_train_iter_49074.caffemodel'):
    print 'CaffeNet found.'
else:
    print 'Downloading pre-trained CaffeNet model...'
    !../scripts/download_model_binary.py ../models/bvlc_reference_caffenet

CaffeNet found.


In [4]:
blob = caffe.proto.caffe_pb2.BlobProto()
data = open(caffe_root+'data/ilsvrc12/imagenet_mean.binaryproto' , 'rb' ).read()
blob.ParseFromString(data)
arr = np.array( caffe.io.blobproto_to_array(blob) )
out = arr[0]
np.save(caffe_root+'examples/imagenet/tree_mean.npy', out )

In [5]:
caffe.set_mode_cpu()
model_def = caffe_root + 'examples/imagenet/deploy.prototxt'
model_weights = caffe_root + 'examples/imagenet/caffenet_train_iter_49074.caffemodel'

net = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)     # use test mode (e.g., don't perform dropout)

In [6]:
# load the mean ImageNet image (as distributed with Caffe) for subtraction
mu = np.load(caffe_root + 'examples/imagenet/tree_mean.npy')
mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel values
#print 'mean-subtracted values:', zip('BGR', mu)

# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
transformer.set_mean('data', mu)            # subtract the dataset-mean value in each channel
transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR

# Extract feature:
* extract feature 
* convert to the form of train_X,train_Y,test_X,test_Y
* save it to .mat

In [27]:

def extract(path,str):
    image = caffe.io.load_image(path)
    transformed_image = transformer.preprocess('data', image)
    net.blobs['data'].data[...] = transformed_image
    output = net.forward()
    return net.blobs[str].data[0]

def DFSDATA(path,str):
    #Cnt How many data in path,preparing for data.shape[0]
    Cnt = sum([len(files)for root,dirs,files in os.walk(path)])
    data = np.zeros(((Cnt,) + net.blobs[str].data[0].shape))
    label = np.zeros((Cnt))
    #how simun?
    i = 0
    for root,dirs,files in  os.walk(path):
        for f in files:
            
            #extract feature for pic in os.path.join(root,f)
            data[i] = extract(os.path.join(root,f),str)
            label[i] = int(os.path.split(root)[-1][1])
            i = i + 1
    data = data.reshape((Cnt,-1))
    
    return data,label
    
def ExtractFeatureToMat(InPath,OutPath,str):
    caffe.set_device(0)  # if we have multiple GPUs, pick the first one
    caffe.set_mode_gpu()
    # set the size of the input (we can skip this if we're happy
    #  with the default; we can also change it later, e.g., for different batch sizes)
    net.blobs['data'].reshape(50,        # batch size
                              3,         # 3-channel (BGR) images
                              227, 227)  # image size is 227x227
    #print net.blobs[str].data[0].shape
    import scipy.io as sio
    train_X,train_Y = DFSDATA(InPath[0],str)
    test_X,test_Y   = DFSDATA(InPath[1],str)    
    sio.savemat(OutPath,mdict={'train_X':train_X,'train_Y':train_Y,'test_X':test_X,'test_Y':test_Y})
    
# source picture 256*256
train_path = '/home/jiangyy/Desktop/tree/train'
test_path = '/home/jiangyy/Desktop/tree/val'
# out .mat path
matpath = '/home/jiangyy/Desktop/tree/pool5.mat'
ExtractFeatureToMat([train_path,test_path],matpath,'pool5')
matpath = '/home/jiangyy/Desktop/tree/fc6.mat'
ExtractFeatureToMat([train_path,test_path],matpath,'fc6')


