In [1]:
import os
import re

import tensorflow as tf
import tensorflow.python.platform
from tensorflow.python.platform import gfile
import numpy as np
import pandas as pd
import sklearn
from sklearn import cross_validation
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import SVC, LinearSVC
import matplotlib.pyplot as plt
%matplotlib inline
import pickle

In [7]:
model_dir = '/home/king/DeepLearning/tensorflow/tensorflow/models/image/imagenet/TUTORIAL_DIR/imagenet'
images_dir = 'images/'
key_images = [images_dir+f for f in os.listdir(images_dir) if re.search('jpg|JPG|jpeg|JPEG', f)]

#the feature image is in the list_image


In [8]:
print key_images

['images/11642004616.jpeg', 'images/1482807.jpeg', 'images/10376619775.jpeg', 'images/1788049237.jpeg', 'images/1242365.jpeg', 'images/865539.jpeg', 'images/1736799718.jpeg', 'images/1262564.jpeg', 'images/10728874810.jpeg', 'images/11135259511.jpeg', 'images/11126663072.jpeg', 'images/10782385282.jpeg', 'images/1290675.jpeg', 'images/1074470.jpeg', 'images/10745344355.jpeg', 'images/1655191167.jpeg', 'images/1017376.jpeg', 'images/11286525792.jpeg', 'images/10417533589.jpeg', 'images/4078174.jpeg', 'images/11067511404.jpeg', 'images/1574438.jpeg', 'images/2676488.jpeg', 'images/10454678150.jpeg', 'images/11286365975.jpeg', 'images/2201988.jpeg', 'images/11608246865.jpeg', 'images/1086966.jpeg']


In [9]:
def create_graph(model_path):
    """
    create_graph loads the inception model to memory, should be called before
    calling extract_features.
 
    model_path: path to inception model in protobuf form.
    """
    with gfile.FastGFile(model_path, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='')

def extract_features(image_paotths, verbose=False):
    """
    extract_features computed the inception bottleneck feature for a list of images
 
    image_paths: array of image path
    return: 2-d array in the shape of (len(image_paths), 2048)
    """
    feature_dimension = 2048
    features = np.empty((len(image_paths), feature_dimension))
 
    with tf.Session() as sess:
        flattened_tensor = sess.graph.get_tensor_by_name('pool_3:0')
 
        for i, image_path in enumerate(image_paths):
            if verbose:
                print('Processing %s...' % (image_path))
 
            if not gfile.Exists(image_path):
                tf.logging.fatal('File does not exist %s', image)
 
            image_data = gfile.FastGFile(image_path, 'rb').read()
            feature = sess.run(flattened_tensor, {
                'DecodeJpeg/contents:0': image_data
            })
            features[i, :] = np.squeeze(feature)
 
    return features 

In [10]:
def create_graph():
    with gfile.FastGFile(os.path.join( model_dir, 'classify_image_graph_def.pb'), 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='')
    
def extract_features(list_images):
    nb_features = 2048
    features = np.empty((len(list_images),nb_features))
    labels = []

    create_graph()

    with tf.Session() as sess:

        next_to_last_tensor = sess.graph.get_tensor_by_name('pool_3:0')

        for ind, image in enumerate(list_images):
            if (ind%1 == 0):
                print('Processing %s...' % (image))
            if not gfile.Exists(image):
                tf.logging.fatal('File does not exist %s', image)

            image_data = gfile.FastGFile(image, 'rb').read()
            predictions = sess.run(next_to_last_tensor,{'DecodeJpeg/contents:0': image_data})
            features[ind,:] = np.squeeze(predictions)
            labels.append(re.split('_\d+',image.split('/')[1])[0])

    return features, labels


In [11]:
features,labels = extract_features(key_images)
print labels

Processing images/11642004616.jpeg...
Processing images/1482807.jpeg...
Processing images/10376619775.jpeg...
Processing images/1788049237.jpeg...
Processing images/1242365.jpeg...
Processing images/865539.jpeg...
Processing images/1736799718.jpeg...
Processing images/1262564.jpeg...
Processing images/10728874810.jpeg...
Processing images/11135259511.jpeg...
Processing images/11126663072.jpeg...
Processing images/10782385282.jpeg...
Processing images/1290675.jpeg...
Processing images/1074470.jpeg...
Processing images/10745344355.jpeg...
Processing images/1655191167.jpeg...
Processing images/1017376.jpeg...
Processing images/11286525792.jpeg...
Processing images/10417533589.jpeg...
Processing images/4078174.jpeg...
Processing images/11067511404.jpeg...
Processing images/1574438.jpeg...
Processing images/2676488.jpeg...
Processing images/10454678150.jpeg...
Processing images/11286365975.jpeg...
Processing images/2201988.jpeg...
Processing images/11608246865.jpeg...
Processing images/1086

In [12]:
pickle.dump(features, open('features', 'wb'))
pickle.dump(labels, open('labels', 'wb'))


In [13]:

features = pickle.load(open('features'))
labels = pickle.load(open('labels'))

print features
print labels

[[0.41377807 0.59280455 0.32684758 ... 0.57435828 0.41353694 0.42008951]
 [0.69165319 0.29902676 0.01650494 ... 0.05479108 0.01516404 0.80479848]
 [0.01203115 0.56935561 0.11367688 ... 0.40356895 1.02387428 0.33352917]
 ...
 [0.87836444 0.06072858 0.20197085 ... 0.61062628 0.61770022 0.53088611]
 [0.11471878 0.10627836 0.24543177 ... 0.092321   0.42540067 0.61093289]
 [0.39185447 0.29624408 0.14939347 ... 0.21214782 0.07772984 0.57533085]]
['11642004616.jpeg', '1482807.jpeg', '10376619775.jpeg', '1788049237.jpeg', '1242365.jpeg', '865539.jpeg', '1736799718.jpeg', '1262564.jpeg', '10728874810.jpeg', '11135259511.jpeg', '11126663072.jpeg', '10782385282.jpeg', '1290675.jpeg', '1074470.jpeg', '10745344355.jpeg', '1655191167.jpeg', '1017376.jpeg', '11286525792.jpeg', '10417533589.jpeg', '4078174.jpeg', '11067511404.jpeg', '1574438.jpeg', '2676488.jpeg', '10454678150.jpeg', '11286365975.jpeg', '2201988.jpeg', '11608246865.jpeg', '1086966.jpeg']


In [14]:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(features, labels, test_size=0.0, random_state=42)

In [15]:
clf = LinearSVC(C=1.0, loss='squared_hinge', penalty='l2',multi_class='ovr')
clf.fit(X_train, y_train)
#y_pred = clf.predict(X_test)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [16]:
print X_train
print y_train

[[0.6519832  0.35863289 0.32619032 ... 1.31694627 0.13698654 0.62269157]
 [0.87836444 0.06072858 0.20197085 ... 0.61062628 0.61770022 0.53088611]
 [0.10503348 0.33225742 0.2427562  ... 0.15818107 0.54429334 0.23554236]
 ...
 [0.30283138 0.07817659 0.33446062 ... 0.35435283 0.16470085 0.20100775]
 [0.11686864 0.16477966 0.48402545 ... 0.37912226 0.01035391 0.10016323]
 [0.42933169 0.03833755 0.54119611 ... 0.36448148 0.46310416 0.88233185]]
['11135259511.jpeg', '2201988.jpeg', '10728874810.jpeg', '1574438.jpeg', '11642004616.jpeg', '1290675.jpeg', '11286525792.jpeg', '2676488.jpeg', '10782385282.jpeg', '1074470.jpeg', '1655191167.jpeg', '1482807.jpeg', '1242365.jpeg', '865539.jpeg', '10376619775.jpeg', '1017376.jpeg', '10454678150.jpeg', '1788049237.jpeg', '11608246865.jpeg', '11286365975.jpeg', '10417533589.jpeg', '1086966.jpeg', '11067511404.jpeg', '1262564.jpeg', '11126663072.jpeg', '10745344355.jpeg', '4078174.jpeg', '1736799718.jpeg']


In [17]:
#all the JD images
JD_images_dir = 'JDimage/'
JD_list_images = [JD_images_dir+f for f in os.listdir(JD_images_dir) if re.search('jpg|JPG|jpeg|JPEG', f)]
JD_features,JD_labels = extract_features(JD_list_images)
#how big the features and labels can be

Processing JDimage/1736742637.jpeg...
Processing JDimage/11492326213.jpeg...
Processing JDimage/11498545616.jpeg...
Processing JDimage/10782385276.jpeg...
Processing JDimage/10938079452.jpeg...
Processing JDimage/11252368724.jpeg...
Processing JDimage/10128713153.jpeg...
Processing JDimage/1643572925.jpeg...
Processing JDimage/11286525787.jpeg...
Processing JDimage/11247139684.jpeg...
Processing JDimage/10850537838.jpeg...
Processing JDimage/2536942.jpeg...
Processing JDimage/11403765874.jpeg...
Processing JDimage/10376619777.jpeg...
Processing JDimage/11282351602.jpeg...
Processing JDimage/10728874600.jpeg...
Processing JDimage/11454484604.jpeg...
Processing JDimage/2649433.jpeg...
Processing JDimage/10926714585.jpeg...
Processing JDimage/11010953083.jpeg...
Processing JDimage/10914042359.jpeg...
Processing JDimage/1789124252.jpeg...
Processing JDimage/11618665769.jpeg...
Processing JDimage/10866863024.jpeg...
Processing JDimage/10711231047.jpeg...
Processing JDimage/10745344345.jpeg.

In [18]:
y_pred = clf.predict(JD_features)
print len(JD_labels)
print len(y_pred)

3565
3565


In [19]:
for dirName in y_train:
    os.makedirs("/media/king/DeepVision/JDimage_result_1/"+dirName)

In [20]:
import shutil

In [21]:
for dirName in y_train:
    #os.makedirs("/media/king/DeepVision/JDimage_result/"+dirName)
    shutil.copy2('/media/king/DeepVision/JDimage/'+dirName,"/media/king/DeepVision/JDimage_result_1/"+dirName)

In [22]:
for JD_image, result in zip(JD_labels, y_pred):
     shutil.copy2('/media/king/DeepVision/JDimage/'+JD_image,"/media/king/DeepVision/JDimage_result_1/"+result)
#the results are putted in the folders