# Table of Contents
 <p><div class="lev1 toc-item"><a href="#GANs-Demo" data-toc-modified-id="GANs-Demo-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>GANs Demo</a></div><div class="lev2 toc-item"><a href="#原始数据的搜集" data-toc-modified-id="原始数据的搜集-11"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>原始数据的搜集</a></div><div class="lev2 toc-item"><a href="#头像截取" data-toc-modified-id="头像截取-12"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>头像截取</a></div><div class="lev2 toc-item"><a href="#训练" data-toc-modified-id="训练-13"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>训练</a></div>

# GANs Demo

来自 [GAN学习指南：从原理入门到制作生成Demo - 知乎专栏](https://zhuanlan.zhihu.com/p/24767059) 非常有意思的实现，缺点是没有解释源代码，我们尝试解释一下，让本文更加性感些。  
爬虫的代码收下了，多谢作者【何之源】，请大家关注作者专栏。

## 原始数据的搜集

In [1]:
import requests
from bs4 import BeautifulSoup
import os
import traceback

In [37]:
def download(url, filename):
    # 判断文件是否存在
    if os.path.exists(filename):
        print('file exists!')
        return
    try:
        # 给 url 一个请求，返回 Response
        # r.text：bytes；r.content：unicode；r.encoding：编码方式
        r = requests.get(url, stream=True, timeout=60)
        # r.raise_for_status() 会返回 None，因为 r Response 是 200；r.status_code：200
        r.raise_for_status()
        # 以下面的模式将文本流保存到文件
        with open(filename, 'wb') as f:
            # r.iter_content()：chunk size 是允许读进内存的 bytes，默认是 1
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
                    f.flush()
        return filename
    # 手动停止，normally Control-C or Delete
    except KeyboardInterrupt:
        if os.path.exists(filename):
            os.remove(filename)
        raise KeyboardInterrupt
    # 其他错误
    except Exception:
        # print_exception(sys.exc_type, sys.exc_value, sys.exc_traceback, limit, file) 的简写
        # 打印错误信息
        traceback.print_exc()
        if os.path.exists(filename):
            os.remove(filename)

In [115]:
# 如果没有 ims 目录就创建
if os.path.exists('imgs') is False:
    os.makedirs('imgs')

In [None]:
# 中间 requests 可能有各种原因会断，看一下循环的位置，修改 start 即可
start = 1
end = 10
for i in range(start, end+1):
    # 正则：page 是数字
    url = 'http://konachan.net/post?page=%d&tags=' % i
    # bytes
    html = requests.get(url).text
    # 解析网页
    soup = BeautifulSoup(html, 'html.parser')
    for img in soup.find_all('img', "preview"):
        target_url = 'http:' + img['src']
        # target_url.split('/')[-1] 就是 “文件名.jpg”
        # 自动将 文件名.jpg 存在 imgs 目录下面
        filename = os.path.join('imgs', target_url.split('/')[-1])
        download(target_url, filename)
    print('%d / %d' % (i, end))

## 头像截取

In [17]:
import cv2
import sys
import os.path
from glob import glob

In [59]:
def detect(filename, cascade_file="lbpcascade_animeface.xml"):
    if not os.path.isfile(cascade_file):
        raise RuntimeError("%s: not found" % cascade_file)

    cascade = cv2.CascadeClassifier(cascade_file)
    # 获取图像矩阵：如（212,300,3）
    image = cv2.imread(filename)
    # 转为灰度矩阵：如（212,300）
    # 或者直接加载为灰度：image = cv2.imread(filename, 0)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 直方图均衡化
    gray = cv2.equalizeHist(gray)
    # 
    faces = cascade.detectMultiScale(gray,
                                     # detector options
                                     scaleFactor=1.1,
                                     minNeighbors=5,
                                     minSize=(48, 48))
    for i, (x, y, w, h) in enumerate(faces):
        face = image[y: y + h, x:x + w, :]
        # 变为 96*96
        face = cv2.resize(face, (96, 96))
        # imgs/**.jpg 的 imgs/**
        save_filename = '%s-%d.jpg' % (os.path.basename(filename).split('.')[0], i)
        # 写入文件
        cv2.imwrite("faces/" + save_filename, face)


if __name__ == '__main__':
    if os.path.exists('faces') is False:
        os.makedirs('faces')
    # 返回 imgs/**.jpg 的 list
    file_list = glob('imgs/*.jpg')
    for filename in file_list:
        detect(filename)

In [78]:
cascade = cv2.CascadeClassifier("lbpcascade_animeface.xml")

In [86]:
image = cv2.imread('imgs/4ecae22fbd97a2458ec6849ac5a454cc.jpg')

In [87]:
image.shape

(186, 300, 3)

In [88]:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

In [89]:
gray.shape

(186, 300)

In [90]:
gray = cv2.equalizeHist(gray)

In [91]:
faces = cascade.detectMultiScale(gray,
                                 # detector options
                                 scaleFactor=1.1,
                                 minNeighbors=5,
                                 minSize=(48, 48))

In [92]:
faces

array([[ 50,  62, 106, 106]], dtype=int32)

In [93]:
for i, (x, y, w, h) in enumerate(faces):
    face = image[y: y + h, x:x + w, :]
    print face.shape
    # 变为 96*96
    face = cv2.resize(face, (96, 96))
    print face.shape

(106, 106, 3)
(96, 96, 3)


## 训练

In [4]:
ls data

[0m[01;34mfaces[0m/


In [2]:
cd DCGAN-tensorflow

/home/evil_rabbit/RL/GANs/DCGAN-tensorflow


In [5]:
import tensorflow as tf

In [10]:
# 可以运行的
!python main.py --input_height 96 --input_width 96 --output_height 48 --output_width 48 --dataset faces --crop --train --epoch 300 --input_fname_pattern "*.jpg"

{'batch_size': 64,
 'beta1': 0.5,
 'checkpoint_dir': 'checkpoint',
 'crop': True,
 'dataset': 'faces',
 'epoch': 300,
 'input_fname_pattern': '*.jpg',
 'input_height': 96,
 'input_width': 96,
 'learning_rate': 0.0002,
 'output_height': 48,
 'output_width': 48,
 'sample_dir': 'samples',
 'train': True,
 'train_size': inf,
 'visualize': False}
2017-06-07 19:50:30.360530: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations.
2017-06-07 19:50:30.360568: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2017-06-07 19:50:30.360593: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU c

`python main.py --input_height 96 --input_width 96 --output_height 48 --output_width 48 --dataset faces --crop --train --epoch 300 --input_fname_pattern "*.jpg"`