In [None]:
%matplotlib inline

# 第2部 特集4 Python で画像認識にチャレンジ (2)

## 第4章 猫顔検出に挑戦

In [None]:
import sys , os , re
import numpy as np
from skimage import io, feature, color , transform
from xml.etree import ElementTree as et
import matplotlib.pyplot as plt
from glob import iglob
import pickle

----

### 猫画像の準備

教科書の Cat Dataset はアクセス不可だったため、http://www.robots.ox.ac.uk/~vgg/data/pets/ で代用（犬もある！）

images.tar.gz に画像ファイル

annotations.tar.gz の xmls 内に顔の位置を示す ymin,ymax,xmin,xmax 座標あり

In [None]:
#画像ファイルと顔の座標がわかるannotationファイルを与えて、元画像と顔画像を返す
def loadImagesWithFace (imgFile,xmlFile):
    image = io.imread(imgFile)
    tree  = et.parse(xmlFile)
    elem  = tree.getroot()
    ymin = int(elem.findtext(".//ymin"))
    ymax = int(elem.findtext(".//ymax"))
    xmin = int(elem.findtext(".//xmin"))
    xmax = int(elem.findtext(".//xmax"))
    return image,image[ymin:ymax,xmin:xmax]

In [None]:
img, face = loadImagesWithFace('cats/Abyssinian_1.jpg','xmls/Abyssinian_1.xml')

In [None]:
# 結果を可視化
fig, (ax1,ax2,ax3) = plt.subplots(ncols=3, figsize=(8,3))

ax1.imshow(img)
ax1.set_axis_off()
ax1.set_title('image')

ax2.imshow(face)
ax2.set_axis_off()
ax2.set_title('face')

ax3.imshow(color.rgb2gray(face))
ax3.set_axis_off()
ax3.set_title('rgb2gray')

In [None]:
face.shape

しかし、顔サイズが画像によってバラバラ

なので、最初から指定サイズになるような関数を作成

In [None]:
#画像ファイルと顔の座標がわかるannotationファイルを与えて、元画像と顔画像を返す
#返す顔画像は指定のサイズにする
def loadImagesWithFaceResize (imgFile,xmlFile,size):
    image = io.imread(imgFile)
    tree  = et.parse(xmlFile)
    elem  = tree.getroot()
    ymin = int(elem.findtext(".//ymin"))
    ymax = int(elem.findtext(".//ymax"))
    xmin = int(elem.findtext(".//xmin"))
    xmax = int(elem.findtext(".//xmax"))
    ycenter = (ymin + ymax) / 2 
    xcenter = (xmin + xmax) / 2
    width  = xmax - xmin
    height = ymax - ymin
    if max(width,height) <= image.shape[0] & max(width,height) <= image.shape[1]:
        orgSize = max(width,height)
    else : #顔部分をスクエア化した結果、元画像より大きくならないための対策
        orgSize = min(width,height)
    face = image[ycenter - orgSize/2:ycenter + orgSize/2 , 
                 xcenter - orgSize/2:xcenter + orgSize/2]
    return image,transform.resize(face,(size,size))

In [None]:
img, face = loadImagesWithFaceResize('cats/Abyssinian_1.jpg',
                                     'xmls/Abyssinian_1.xml',64)

In [None]:
# 結果を可視化
fig, (ax1,ax2,ax3) = plt.subplots(ncols=3, figsize=(8,3))

ax1.imshow(img)
ax1.set_axis_off()
ax1.set_title('image')

ax2.imshow(face)
ax2.set_axis_off()
ax2.set_title('face')

ax3.imshow(color.rgb2gray(face))
ax3.set_axis_off()
ax3.set_title('rgb2gray')

In [None]:
face.shape

あとは画像をたくさん用意するだけ。
- cats フォルダに正例
- dogs フォルダを負例

- cats_test に正例テストデータ
- dogs_test に負例テストデータ

として用意した。

----

ここからようやく本来のテキストに戻る

◯リスト1 セルごとにLBP特徴量のヒストグラムを求める

In [None]:
# LBP定数
LBP_RADIUS = 3  #注目画素の3pxを半径とした
LBP_POINTS = 24 #24点の明暗を比較する
CELL_SIZE  = 4  #ヒストグラムを評価するセルのサイズ
WINDOW_SIZE= 64 #検出窓ザイズ = 学習画像のサイズ

In [None]:
# セルごとにLBP(Local Binary Pattern) を算出 渡すのはグレースケール化された画像
def get_histogram(image):
    lbp = feature.local_binary_pattern(image, LBP_POINTS, LBP_RADIUS, 'uniform')
    bins = LBP_POINTS + 2 #追加される2種類は 0 と non-uniform
    histogram = np.zeros(shape = (image.shape[0] / CELL_SIZE,
                                  image.shape[1] / CELL_SIZE, bins),
                         dtype = np.int)
    for y in range(0, image.shape[0] - CELL_SIZE, CELL_SIZE):
        for x in range(0, image.shape[1] - CELL_SIZE, CELL_SIZE):
            
            for dy in range(CELL_SIZE):
                for dx in range(CELL_SIZE):
                    histogram[ y / CELL_SIZE,
                               x / CELL_SIZE,
                               int(lbp[y+dy, x+dx])] += 1
    return histogram

In [None]:
# 顔画像とグレースケールした顔画像のshape
face.shape , color.rgb2gray(face).shape

In [None]:
# ヒストグラム化した値
get_histogram(color.rgb2gray(face)).shape

----

◯リスト2改 正例、負例それぞれの特徴量を計算（ただし、顔画像を抽出しながらやる点で改造した）

In [None]:
def get_features(ImgDirectory , XmlDirectory):
    features = []
    re_jpgFile = re.compile(r"/((.*).jpg)") #拡張子付きファイル名と拡張子抜きファイル名
    
    for imgFile in iglob('%s/*.jpg' % ImgDirectory):
        name = re.search(re_jpgFile,imgFile).group(2)
        xmlFile = XmlDirectory + name + ".xml"
        if os.path.exists(xmlFile):
            print "found " + name
            img, face = loadImagesWithFaceResize(imgFile,xmlFile,WINDOW_SIZE)
            image = color.rgb2gray(face)
            features.append(get_histogram(image).reshape(-1))
            features.append(get_histogram(np.fliplr(image)).reshape(-1))
    return features

In [None]:
def get_featuresNega(ImgDirectory):
    features = []
    re_jpgFile = re.compile(r"/((.*).png)") #拡張子付きファイル名と拡張子抜きファイル名
    
    for imgFile in iglob('%s/*.png' % ImgDirectory):
        name = re.search(re_jpgFile,imgFile).group(2)
        print "found " + name
        image = color.rgb2gray(io.imread(ImgDirectory+"/"+name+".png"))
        features.append(get_histogram(image).reshape(-1))
        features.append(get_histogram(np.fliplr(image)).reshape(-1))
    return features

◯リスト2改 LBP化main

In [None]:
xml_dir = 'xmls/'
positive_dir = 'cats/'
negative_dir = 'dogs/'
# negative_dir = 'negatives/'
positive_samples = get_features(positive_dir,xml_dir)
negative_samples = get_features(negative_dir,xml_dir)
#negative_samples = get_featuresNega(negative_dir)
n_positives = len(positive_samples)
n_negatives = len(negative_samples)
X = np.array(positive_samples + negative_samples)
y = np.array([1 for i in range(n_positives)] +
             [0 for i in range(n_negatives)])

In [None]:
X.shape

In [None]:
y , y.shape

In [None]:
X , y = sklearn.utils.shuffle(X,y)

In [None]:
y, y.shape

----

◯リスト3改 SVMを学習

In [None]:
import sklearn.svm

In [None]:
classifier = sklearn.svm.LinearSVC(C = 0.0001)

In [None]:
classifier.fit(X,y)

In [None]:
y_predict = classifier.predict(X)

In [None]:
y_predict

In [None]:
correct = 0
for i in range(len(y)):
    if y[i] == y_predict[i]: correct += 1
print 'Accuracy: %f' % (float(correct)/len(y))

ん？過学習？　っていうか、学習データでpredictしてる ...

そんなわけで、別にテストデータを用意

In [None]:
positive_test_dir = 'cats_test/'
negative_test_dir = 'dogs_test/'
positive_test_samples = get_features(positive_test_dir,xml_dir)
negative_test_samples = get_features(negative_test_dir,xml_dir)
n_positives = len(positive_test_samples)
n_negatives = len(negative_test_samples)
X = np.array(positive_test_samples + negative_test_samples)
y = np.array([1 for i in range(n_positives)] +
             [0 for i in range(n_negatives)])


In [None]:
y #用意したテストデータの正答

In [None]:
X, y = sklearn.utils.shuffle( X, y ) #シャッフルしてみる

In [None]:
y

In [None]:
y_predict = classifier.predict(X)

In [None]:
y_predict #判定結果

In [None]:
correct = 0
for i in range(len(y)):
    if y[i] == y_predict[i]: correct += 1
print 'Accuracy: %f' % (float(correct)/len(y))

まあ、こんなもんだよね

っていうか、ランダムしなくて良いのか？

----

◯リスト5改 検出処理

In [None]:
THRESHOLD = 1.0 #3.0だと厳しすぎる
targetImg = 'cats_test/Abyssinian_195.jpg' #正面向いている写真を選ばないときつい
targetXml = 'xmls/Abyssinian_195.xml'

In [None]:
svm = classifier
target , face = loadImagesWithFaceResize(targetImg,targetXml,64)
target_scaled = target + 0
scale_factor = 2.0** (-1.0/8.0)
scores = []
detections = []
s = 1
#for s in range(16):
while target_scaled.shape > (WINDOW_SIZE, WINDOW_SIZE):
    histogram = get_histogram(color.rgb2gray(target_scaled))
    print "target shape is ", target_scaled.shape
    print "histogram shape is ", histogram.shape
    for y in range(0,histogram.shape[0] - WINDOW_SIZE / CELL_SIZE):
        for x in range(0,histogram.shape[1] - WINDOW_SIZE / CELL_SIZE):
            myFeature = histogram[y : y+WINDOW_SIZE/CELL_SIZE, x : x+WINDOW_SIZE/CELL_SIZE].reshape(-1)
            score = svm.decision_function(myFeature)
            scores.append(score)
            if score[0] > THRESHOLD:
                #検出!
                scale = (scale_factor ** s)
                detections.append({ 'x': x * CELL_SIZE / scale,
                                    'y': y * CELL_SIZE / scale,
                                    'width' : WINDOW_SIZE / scale,
                                    'height': WINDOW_SIZE / scale,
                                    'score' : score,
                                    'histogram' : myFeature})
    target_scaled = transform.rescale(target_scaled, scale_factor)
    s += 1

In [None]:
max(scores) , target.shape , target_scaled.shape

In [None]:
detections

In [None]:
# Scoreでソート
detections = sorted(detections,key=lambda x: x['score'],reverse=True)

In [None]:
# 結果を可視化
fig, (ax1,ax2) = plt.subplots(ncols=2, figsize=(8,3))

ax1.imshow(target)
ax1.set_axis_off()
ax1.set_title('target')

ax2.imshow(target)
ax2.set_axis_off()
ax2.set_title('detections')
for i in range(0,2): #上位2件を描画
    rect = plt.Rectangle((detections[i]["x"],detections[i]["y"]),detections[i]["height"],detections[i]["width"], edgecolor='r', facecolor='none') #マッチ位置
    ax2.add_patch(rect)



----

◯リスト6 Non-maximum Suppression

In [None]:
def overlap_score(a,b):
    left   = max(a['x'],b['x'])
    right  = min(a['x']+a['width'],b['x']+b['width'])
    top    = max(a['y'],b['y'])
    bottom = min(a['y']+a['height'],b['y']+b['height'])
    intersect = max(0, (right - left) * (bottom - top))
    union  = a['width'] * a['height'] + b['width'] * b['height'] - intersect
    return intersect / union
detections = sorted(detections,key=lambda x: x['score'],reverse=True)
deleted = set()
for i in range(len(detections)):
    if i in deleted: continue
    for j in range(i + 1, len(detections)):
        if overlap_score(detections[i],detections[j]) > 0.3:
            deleted.add(j)
detections = [d for i,d in enumerate(detections) if not i in deleted]

In [None]:
detections

In [None]:
# 結果を可視化
fig, (ax1,ax2) = plt.subplots(ncols=2, figsize=(8,3))

ax1.imshow(target)
ax1.set_axis_off()
ax1.set_title('target')

ax2.imshow(target)
ax2.set_axis_off()
ax2.set_title('detections')
for i in range(0,1): #上位1件を描画
    rect = plt.Rectangle((detections[i]["x"],detections[i]["y"]),detections[i]["height"],detections[i]["width"], edgecolor='r', facecolor='none') #マッチ位置
    ax2.add_patch(rect)


### おわり

## 精度向上の試み

- サンプル数をたくさん与えてみる


In [None]:
xml_dir = 'xmls/'
positive_dir = 'cats_big/'
#negative_dir = 'dogs_big/'
negative_dir = 'negatives/'
positive_samples = get_features(positive_dir,xml_dir)
#negative_samples = get_features(negative_dir,xml_dir)
negative_samples = get_featuresNega(negative_dir)
n_positives = len(positive_samples)
n_negatives = len(negative_samples)
X = np.array(positive_samples + negative_samples)
y = np.array([1 for i in range(n_positives)] +
             [0 for i in range(n_negatives)])

In [None]:
# -- 大量サンプルデータ LBPのIN/OUT --
#pickle.dump((X,y),open('bigSample_Xy.data','w')) #1.4G
#X,y = pickle.load(open('bigSample_Xy.data'))

In [None]:
X , y = sklearn.utils.shuffle(X , y)

In [None]:
X.shape , y.shape

In [None]:
n_positives , n_negatives

In [None]:
classifier = sklearn.svm.LinearSVC(C = 0.0001)

In [None]:
classifier.fit(X,y)

In [None]:
positive_test_dir = 'cats_test/'
negative_test_dir = 'dogs_test/'
positive_test_samples = get_features(positive_test_dir,xml_dir)
negative_test_samples = get_features(negative_test_dir,xml_dir)
n_test_positives = len(positive_test_samples)
n_test_negatives = len(negative_test_samples)
X = np.array(positive_test_samples + negative_test_samples)
y = np.array([1 for i in range(n_test_positives)] +
             [0 for i in range(n_test_negatives)])

In [None]:
y_predict = classifier.predict(X)

In [None]:
y_predict

In [None]:
correct = 0
for i in range(len(y)):
    if y[i] == y_predict[i]: correct += 1
print 'Accuracy: %f' % (float(correct)/len(y))

In [None]:
THRESHOLD = 1.0 #3.0だと厳しすぎる
targetImg = 'cats_test/Abyssinian_195.jpg' #正面向いている写真を選ばないときつい
targetXml = 'xmls/Abyssinian_195.xml'

In [None]:
svm = classifier
target , face = loadImagesWithFaceResize(targetImg,targetXml,64)
target_scaled = target + 0
scale_factor = 2.0** (-1.0/8.0)
scores = []
detections = []
s = 1
#for s in range(16):
while target_scaled.shape > (WINDOW_SIZE, WINDOW_SIZE):
    histogram = get_histogram(color.rgb2gray(target_scaled))
    print "target shape is ", target_scaled.shape
    print "histogram shape is ", histogram.shape
    for y in range(0,histogram.shape[0] - WINDOW_SIZE / CELL_SIZE):
        for x in range(0,histogram.shape[1] - WINDOW_SIZE / CELL_SIZE):
            myFeature = histogram[y : y+WINDOW_SIZE/CELL_SIZE, x : x+WINDOW_SIZE/CELL_SIZE].reshape(-1)
            score = svm.decision_function(myFeature)
            scores.append(score)
            if score[0] > THRESHOLD:
                #検出!
                scale = (scale_factor ** s)
                detections.append({ 'x': x * CELL_SIZE / scale,
                                    'y': y * CELL_SIZE / scale,
                                    'width' : WINDOW_SIZE / scale,
                                    'height': WINDOW_SIZE / scale,
                                    'score' : score,
                                    'histogram' : myFeature})
    target_scaled = transform.rescale(target_scaled, scale_factor)
    s += 1

In [None]:
# Scoreでソート
detections = sorted(detections,key=lambda x: x['score'],reverse=True)

In [None]:
len(detections)

In [None]:
# 結果を可視化
fig, (ax1,ax2) = plt.subplots(ncols=2, figsize=(8,3))

ax1.imshow(target)
ax1.set_axis_off()
ax1.set_title('target')

ax2.imshow(target)
ax2.set_axis_off()
ax2.set_title('detections')
for i in range(0,2): #上位2件を描画
    rect = plt.Rectangle((detections[i]["x"],detections[i]["y"]),detections[i]["height"],detections[i]["width"], edgecolor='r', facecolor='none') #マッチ位置
    ax2.add_patch(rect)

