In [11]:
import numpy as np

import cv2 as cv2

import tensorflow as tf

tf.compat.v1.disable_eager_execution()

In [12]:
import os
import random

In [13]:
# 预先写几个方法
# 方法一就是获取文件夹下的数据
def get_imgs(rate = 0.2):
    """
    获取图片，并划分训练集和测试集
    Parameters:
        rate:测试集 2 和训练集 10 的比例，即测试集个数/训练集个数
    Returns:
        test_imgs:测试集
        test_labels:测试集标签
        train_imgs:训练集
        train_labels:训练集标签
    """
    # 读取图片
    imgs = os.listdir('./verify/')
    # 打乱图片顺序
    random.shuffle(imgs)

    # 数据集总共个数
    imgs_num = len(imgs)
    
    # 按照比例求出测试集个数
    test_num = int(imgs_num * rate / (1 + rate))
    
    # 测试集，测试数据的路径
    test_imgs = imgs[:test_num]
    # 根据文件名获取测试集标签
    test_labels = list(map(lambda x: x.split('.')[0], test_imgs))
    
    
    # 训练集
    train_imgs = imgs[test_num:]
    # 根据文件名获取训练集标签
    train_labels = list(map(lambda x: x.split('.')[0], train_imgs))

    return test_imgs, test_labels, train_imgs, train_labels

In [14]:
char_set_len = 63
def text2vec(text):
    """
    文本转向量
    Parameters:
        text:文本
    Returns:
        vector:向量
    """
    if len(text) > 4:
        raise ValueError('验证码最长4个字符')

    vector = np.zeros(4 * char_set_len)
    def char2pos(c):
        if c =='_':
            k = 62
            return k
        k = ord(c) - 48
        if k > 9:
            k = ord(c) - 55
            if k > 35:
                k = ord(c) - 61
                if k > 61:
                    raise ValueError('No Map')
        return k
    for i, c in enumerate(text):
        idx = i * char_set_len + char2pos(c)
        vector[idx] = 1
    return vector

In [15]:
# 定义了每次取出一批数据，tensorflow训练的时候，一批批喂给算法，for循环执行很多次
def get_next_batch(test_imgs,test_labels,train_imgs,train_labels,train_flag=True, batch_size=100):
    train_size = 4160
    test_size = 831
    train_ptr = 0
    test_ptr = 0
    height = 30
    width = 100
    max_captcha = 4
    # 0~ 9（10），a~z（26） ，A~Z（26） --------> 62 + 1 (_未知) -----> 63
    char_set_len = 63
    data_path = './verify/'
    """
    获得batch_size大小的数据集
    Parameters:
        batch_size:batch_size大小
        train_flag:是否从训练集获取数据
    Returns:
        batch_x:大小为batch_size的数据x
        batch_y:大小为batch_size的数据y
        image(用于测试)
    """
    # 从训练集获取数据
    if train_flag == True:
        if (batch_size + train_ptr) < train_size:
            trains = train_imgs[train_ptr:(train_ptr + batch_size)]
            labels = train_labels[train_ptr:(train_ptr + batch_size)]
            train_ptr += batch_size
        else:
            new_ptr = (train_ptr + batch_size) % train_size
            trains = train_imgs[train_ptr:] + train_imgs[:new_ptr]
            labels = train_labels[train_ptr:] + train_labels[:new_ptr]
            train_ptr = new_ptr

#       返回数据，给了形状
        batch_X = np.zeros([batch_size, height*width])
#       目标值，独热编码表示 4 * 63 ------> 概率问题
        batch_y = np.zeros([batch_size, max_captcha*char_set_len])

        for index, train in enumerate(trains):
            # 黑白图片
            img = np.mean(cv2.imread(data_path + train), axis = -1)
            # 将多维降维1维
            batch_X[index,:] = img.flatten() / 255
        for index, label in enumerate(labels):
            batch_y[index,:] = text2vec(label)

    # 从测试集获取数据
    else:
        if (batch_size + test_ptr) < test_size:
            tests = test_imgs[test_ptr:(test_ptr + batch_size)]
            labels = test_labels[test_ptr:(test_ptr + batch_size)]
            test_ptr += batch_size
        else:
            new_ptr = (test_ptr + batch_size) % test_size
            tests = test_imgs[test_ptr:] + test_imgs[:new_ptr]
            labels = test_labels[test_ptr:] + test_labels[:new_ptr]
            test_ptr = new_ptr

        batch_X = np.zeros([batch_size, height*width])
        batch_y = np.zeros([batch_size, max_captcha*char_set_len])

        for index, test in enumerate(tests):
#             图片灰度化处理，黑白处理
            img = np.mean(cv2.imread(data_path + test), axis = -1)
            # 将多维降维1维
            batch_X[index,:] = img.ravel() / 255
        for index, label in enumerate(labels):
            batch_y[index,:] = text2vec(label)
        return batch_X, batch_y
    return batch_X, batch_y

In [16]:
def vec2text(vec):
    char_set_len = 63
    """
    向量转文本
    Parameters:
        vec:向量
    Returns:
        文本
    """
    char_pos = vec.nonzero()[0]
    text = []
    for c in char_pos:
        char_idx = c % char_set_len
        if char_idx < 10:
            char_code = char_idx + ord('0')
        elif char_idx < 36:
            char_code = char_idx - 10 + ord('A')
        elif char_idx < 62:
            char_code = char_idx - 36 + ord('a')
        elif char_idx == 62:
            char_code = ord('_')
        else:
            raise ValueError('error')
        text.append(chr(char_code))
    return "".join(text)

In [17]:
X =tf.compat.v1.placeholder(dtype=tf.float64,shape = [None,3000])

kp = tf.compat.v1.placeholder(dtype=tf.float64,shape = None)

# 独热编码，长度4（验证码长度）* 63（0~9，A~Z，a~z _ 63）
y = tf.compat.v1.placeholder(dtype=tf.float64,shape = [None,4*63])
def crack_captcha_cnn():
    
#     第一层
    input_data = tf.reshape(X,shape = [-1,30,100,1])

    filter1 = tf.Variable(initial_value=tf.random.normal(shape = [3,3,1,32],dtype = tf.float64,stddev = 0.01),dtype=tf.float64)
    
    b1 = tf.Variable(initial_value=tf.random.normal(shape = [32],dtype = tf.float64,stddev = 0.01),dtype=tf.float64)
    
    conv1 = tf.nn.conv2d(input_data,filter1,strides=[1,1,1,1],padding='SAME') + b1
    
    relu1 = tf.nn.relu(conv1)
#     池化
    pool1 = tf.nn.max_pool(relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
#     pool1 -------> shape = [-1,15,50,32]
#     第二层
    filter2 = tf.Variable(initial_value=tf.random.normal(shape = [3,3,32,64],dtype = tf.float64,stddev = 0.01),dtype=tf.float64)
    
    b2 = tf.Variable(initial_value=tf.random.normal(shape = [64],dtype = tf.float64,stddev = 0.01),dtype=tf.float64)

    conv2 = tf.nn.conv2d(pool1,filter2,strides=[1,1,1,1],padding='SAME') + b2
    
#     激活函数
    sigmoid2 = tf.nn.sigmoid(conv2)
    
#     池化
    pool2 = tf.nn.max_pool(sigmoid2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
#     pool2  --------> shape = [-1,8,25,64]
#     第三层
    filter3 = tf.Variable(initial_value=tf.random.normal(shape = [3,3,64,64],dtype = tf.float64,stddev = 0.01),dtype=tf.float64)
    
    b3 = tf.Variable(initial_value=tf.random.normal(shape = [64],dtype = tf.float64,stddev = 0.01),dtype=tf.float64)
    
    conv3 = tf.nn.conv2d(pool2,filter3,strides=[1,1,1,1],padding='SAME') + b3

    relu3 = tf.nn.relu(conv3)
    
    pool3 = tf.nn.max_pool(relu3,ksize= [1,2,2,1],strides=[1,2,2,1],padding='SAME')
#     pool3  --------> shape = [-1,4,13,64]
#     全连接层 1024 个神经元
    dense = tf.reshape(pool3,shape = [-1,4*13*64])
    
    conn_w = tf.Variable(tf.random.normal(shape = [4*13*64,1024],dtype = tf.float64,stddev = 0.01),dtype= tf.float64)
    
    conn_b  = tf.Variable(tf.random.normal(shape = [1024],dtype = tf.float64,stddev = 0.01),dtype= tf.float64)
    
    conn = tf.matmul(dense,conn_w) + conn_b
#     conn -------> shape = [-1,1024]
    conn_relu = tf.nn.relu(conn)
    
#     dropout
    dropout = tf.nn.dropout(conn_relu,rate=kp)
    
#     输出层 y进行对比
    out_w = tf.Variable(initial_value=tf.random.normal(shape = [1024,4*63],dtype = tf.float64,stddev = 0.01),dtype=tf.float64)
    out_b = tf.Variable(initial_value=tf.random.normal(shape = [4*63],dtype = tf.float64,stddev = 0.01),dtype=tf.float64)
    
    out = tf.matmul(dropout,out_w) + out_b
    
#     out ----------> shape = [?,4*63]
    return out

In [18]:
# 训练
def train_crack_captcha_cnn():
    
    
    
    out = crack_captcha_cnn()
    saver = tf.compat.v1.train.Saver()
    
#     真实分布，y 
    cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels= y,logits=out))
    
    optimizer = tf.compat.v1.train.AdamOptimizer().minimize(cost)
    
    
#     计算准确率方法,概率
    prob_ = tf.nn.softmax(out)
    
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(prob_,axis = -1),tf.argmax(y,axis = -1)),tf.float64))
    
    
    with tf.compat.v1.Session() as sess:
        
        sess.run(tf.compat.v1.global_variables_initializer())
        
        test_imgs,test_labels,train_imgs,train_labels = get_imgs()
        
        for i in range(100000):
            
            if (i%500 == 0)&(i!=0):
                test_imgs,test_labels,train_imgs,train_labels = get_imgs()
            
            X_train,y_train = get_next_batch(test_imgs,test_labels,train_imgs,train_labels)
            
            
            optimizer_,cost_ = sess.run(fetches = [optimizer,cost],feed_dict = {X:X_train,y:y_train,kp:0.5})
            
            if i%100 == 0:
                X_test,y_test = get_next_batch(test_imgs,test_labels,train_imgs,train_labels,train_flag=False,batch_size=500)
                score = sess.run(fetches = accuracy,feed_dict={X:X_test,y:y_test,kp:1})
                print('算法执行次数：{}算法准确率：{}'.format(i,score))
                if score >=0.96:
                    saver.save(sess,'./captcha/model',i)
                    
            if (i%1000 == 0)&(i!=0):
                saver.save(sess,'./captcha/model',i)

In [19]:
train_crack_captcha_cnn()

NotFoundError: 2 root error(s) found.
  (0) Not found: No algorithm worked!
	 [[node Conv2D_3 (defined at <ipython-input-17-e56fe0ce0afb>:16) ]]
  (1) Not found: No algorithm worked!
	 [[node Conv2D_3 (defined at <ipython-input-17-e56fe0ce0afb>:16) ]]
	 [[Mean_2/_9]]
0 successful operations.
0 derived errors ignored.

Errors may have originated from an input operation.
Input Source operations connected to node Conv2D_3:
 Reshape_2 (defined at <ipython-input-17-e56fe0ce0afb>:10)

Input Source operations connected to node Conv2D_3:
 Reshape_2 (defined at <ipython-input-17-e56fe0ce0afb>:10)

Original stack trace for 'Conv2D_3':
  File "D:\Python\Python38\lib\runpy.py", line 193, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "D:\Python\Python38\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "d:\python\venv(data_analysis)\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "d:\python\venv(data_analysis)\lib\site-packages\traitlets\config\application.py", line 845, in launch_instance
    app.start()
  File "d:\python\venv(data_analysis)\lib\site-packages\ipykernel\kernelapp.py", line 612, in start
    self.io_loop.start()
  File "d:\python\venv(data_analysis)\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "D:\Python\Python38\lib\asyncio\windows_events.py", line 316, in run_forever
    super().run_forever()
  File "D:\Python\Python38\lib\asyncio\base_events.py", line 567, in run_forever
    self._run_once()
  File "D:\Python\Python38\lib\asyncio\base_events.py", line 1855, in _run_once
    handle._run()
  File "D:\Python\Python38\lib\asyncio\events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "d:\python\venv(data_analysis)\lib\site-packages\tornado\ioloop.py", line 688, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "d:\python\venv(data_analysis)\lib\site-packages\tornado\ioloop.py", line 741, in _run_callback
    ret = callback()
  File "d:\python\venv(data_analysis)\lib\site-packages\tornado\gen.py", line 814, in inner
    self.ctx_run(self.run)
  File "d:\python\venv(data_analysis)\lib\site-packages\tornado\gen.py", line 775, in run
    yielded = self.gen.send(value)
  File "d:\python\venv(data_analysis)\lib\site-packages\ipykernel\kernelbase.py", line 358, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "d:\python\venv(data_analysis)\lib\site-packages\tornado\gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "d:\python\venv(data_analysis)\lib\site-packages\ipykernel\kernelbase.py", line 261, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "d:\python\venv(data_analysis)\lib\site-packages\tornado\gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "d:\python\venv(data_analysis)\lib\site-packages\ipykernel\kernelbase.py", line 536, in execute_request
    self.do_execute(
  File "d:\python\venv(data_analysis)\lib\site-packages\tornado\gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "d:\python\venv(data_analysis)\lib\site-packages\ipykernel\ipkernel.py", line 302, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "d:\python\venv(data_analysis)\lib\site-packages\ipykernel\zmqshell.py", line 539, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "d:\python\venv(data_analysis)\lib\site-packages\IPython\core\interactiveshell.py", line 2894, in run_cell
    result = self._run_cell(
  File "d:\python\venv(data_analysis)\lib\site-packages\IPython\core\interactiveshell.py", line 2940, in _run_cell
    return runner(coro)
  File "d:\python\venv(data_analysis)\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "d:\python\venv(data_analysis)\lib\site-packages\IPython\core\interactiveshell.py", line 3165, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "d:\python\venv(data_analysis)\lib\site-packages\IPython\core\interactiveshell.py", line 3357, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "d:\python\venv(data_analysis)\lib\site-packages\IPython\core\interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-19-6ea79fa8cbf0>", line 1, in <module>
    train_crack_captcha_cnn()
  File "<ipython-input-18-137e65b57150>", line 6, in train_crack_captcha_cnn
    out = crack_captcha_cnn()
  File "<ipython-input-17-e56fe0ce0afb>", line 16, in crack_captcha_cnn
    conv1 = tf.nn.conv2d(input_data,filter1,strides=[1,1,1,1],padding='SAME') + b1
  File "d:\python\venv(data_analysis)\lib\site-packages\tensorflow\python\util\dispatch.py", line 201, in wrapper
    return target(*args, **kwargs)
  File "d:\python\venv(data_analysis)\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 2163, in conv2d_v2
    return conv2d(input,  # pylint: disable=redefined-builtin
  File "d:\python\venv(data_analysis)\lib\site-packages\tensorflow\python\util\dispatch.py", line 201, in wrapper
    return target(*args, **kwargs)
  File "d:\python\venv(data_analysis)\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 2270, in conv2d
    return gen_nn_ops.conv2d(
  File "d:\python\venv(data_analysis)\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 968, in conv2d
    _, _, _op, _outputs = _op_def_library._apply_op_helper(
  File "d:\python\venv(data_analysis)\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 748, in _apply_op_helper
    op = g._create_op_internal(op_type_name, inputs, dtypes=None,
  File "d:\python\venv(data_analysis)\lib\site-packages\tensorflow\python\framework\ops.py", line 3528, in _create_op_internal
    ret = Operation(
  File "d:\python\venv(data_analysis)\lib\site-packages\tensorflow\python\framework\ops.py", line 1990, in __init__
    self._traceback = tf_stack.extract_stack()
