## 基于yolov5的数独检测
### 0.环境安装

In [7]:
!git clone https://github.com/pk5ls20/sudoku.git
!mv sudoku/* .
%pip install -qr requirements.txt
!git clone https://github.com/ultralytics/yolov5.git

fatal: destination path 'sudoku' already exists and is not an empty directory.
mv: cannot stat 'sudoku/*': No such file or directory
fatal: destination path 'yolov5' already exists and is not an empty directory.


### 1.提取数独

In [8]:
import os
import torch
import datetime
import cv2
model = torch.hub.load('ultralytics/yolov5', 'custom', path='detect_sudoku.pt')
input_path = 'sudoku_pic'
output_path = 'sudoku_pic/extract'
timex = lambda :datetime.datetime.now()
for file_name in os.listdir(input_path):
    if file_name.endswith('.jpg') or file_name.endswith('.png'):
        print(f"正在处理{file_name}", end='')
        img = cv2.imread(os.path.join(input_path, file_name))
        # 使用YOLOv5检测
        results = model(img)
        # 得到置信度最高的数独检测结果
        sudoku_detection = None
        for result in results.pred[0]:
            if result[-1] == 0 and result[-2] > 0.8:
                sudoku_detection = result
                break
        # 提取数独
        if sudoku_detection is not None:
            xmin, ymin, xmax, ymax, confidence = sudoku_detection[:5]
            sudoku = img[int(ymin):int(ymax), int(xmin):int(xmax)]
            cv2.imwrite(os.path.join(output_path, f"extract_{file_name}"), sudoku)
            print(f"...提取成功！")
        else:
            print(f"...{file_name}未检测到数独！")
print(f"Done on {timex()}")

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip
YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)



[31m[1mrequirements:[0m /root/.cache/torch/hub/requirements.txt not found, check failed.


Fusing layers... 
Model summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
Adding AutoShape... 


正在处理1.png...提取成功！
Done on 2023-04-27 00:59:16.548206


### 2.提取数独中的数字

In [9]:
# Iterate over images in input folder
fp = list(os.listdir(output_path))
for file_name in fp:
    if file_name.split('_')[0] != 'extract':
        continue
    # Load image
    img = cv2.imread(os.path.join(output_path, file_name))
    # Get dimensions of image
    height, width, _ = img.shape
    # Calculate size of each small image
    size = int(height / 9)
    # Iterate over rows and columns of small images
    for row in range(9):
        for col in range(9):
            # Calculate coordinates of small image
            x1 = col * size
            y1 = row * size
            x2 = x1 + size
            y2 = y1 + size
            # Crop small image from main image
            small_img = img[y1:y2, x1:x2]
            # Save small image to output folder
            small_img_file_name = '{}_{}.png'.format(os.path.splitext(file_name)[0], row * 9 + col)
            cv2.imwrite(os.path.join(output_path, small_img_file_name), small_img)
print(f"Done on {timex()}")

Done on 2023-04-27 00:59:23.746297


### 3.单个数字图片预处理
在识别单个数字之前，需要对图片进行预处理。
受限于训练模型，进行二值化+去黑线的预处理可以大幅度提高识别准确率

In [10]:
from PIL import Image, ImageOps
import os

# 读取图片并转换为黑白图
folder_path = 'sudoku_pic/extract'
for filename in os.listdir(folder_path):
    if len(filename.split('_')) != 3:
        continue
    img = Image.open(f"{folder_path}/{filename}").convert('L')
    # 获取图片的宽度和高度
    width, height = img.size
    cl = []
    # 遍历每一行，如果整行像素点>=80%部分不是白色，则将该行像素点全部转换为白色
    for y in range(height):
        pixels = [img.getpixel((x, y)) for x in range(width)]
        white_pixels = sum(1 for pixel in pixels if pixel == 255)
        if white_pixels < width*0.2:
            for x in range(width):
                cl.append((x, y))
    # 遍历每一列，如果整列像素点>=80%部分不是白色，则将该列像素点全部转换为白色
    for x in range(width):
        pixels = [img.getpixel((x, y)) for y in range(height)]
        white_pixels = sum(1 for pixel in pixels if pixel == 255)
        if white_pixels < height*0.2:
            for y in range(height):
                cl.append((x, y))
    # 将所有的白色像素点转换为黑色
    for x, y in cl:
        img.putpixel((x, y), 255)
    # 反转图片颜色
    img = ImageOps.invert(img)
    img.save(f"{folder_path}/ok/ok_{filename}")
print(f"Done on {timex()}")

Done on 2023-04-27 00:59:27.328855


### 4.识别数字并转化为数独


In [11]:
from torchvision.transforms import ToTensor, Resize
from pathlib import Path
import sys
from models.experimental import attempt_load
from utils.general import non_max_suppression
from utils.torch_utils import select_device
import numpy as np
from PIL import Image

sys.path.insert(0, str(Path('yolov5')))

def load_model(model_path):
    device = select_device()
    model = attempt_load(model_path, device)
    model.eval()
    return model

def predict(model, image_path):
    device = select_device()
    img = Image.open(image_path).convert("RGB")
    img = Resize((128, 128))(img)
    img_tensor = ToTensor()(img).unsqueeze(0).to(device)
    pred = model(img_tensor)[0]
    # Apply non-max suppression
    results = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False)
    return results, img.size

def process_images(model_path, image_folder):
    model = load_model(model_path)
    sudoku = np.zeros((9, 9), dtype=np.int32)
    for img_name in sorted(os.listdir(image_folder)):
        if img_name.startswith("ok_extract_"):
            row = int(img_name.split("_")[3].split('.')[0]) // 9
            col = int(img_name.split("_")[3].split('.')[0]) % 9
            img_path = os.path.join(image_folder, img_name)
            results, img_size = predict(model, img_path)
            if len(results) > 0 and len(results[0]) > 0:
                most_likely_class = int(results[0][0][5].item())
                sudoku[row, col] = most_likely_class
        else:
            continue
    print(sudoku)
process_images("detect_number.pt", "sudoku_pic/extract/ok")
print(f"Done on {timex()}")

YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
Model summary: 157 layers, 7037095 parameters, 0 gradients
YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)

YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)

YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)

YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)

YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)

YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)

YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)

YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)

YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)

YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)

YOLOv5 🚀 2023-4-27 Python-3.9.16 torch-2.0.

[[0 4 1 0 0 0 3 8 0]
 [5 0 0 0 4 0 0 0 7]
 [8 0 0 7 0 3 0 0 4]
 [0 0 7 0 2 0 8 0 0]
 [0 6 0 3 0 8 0 9 0]
 [0 0 3 0 9 0 6 0 0]
 [3 0 0 2 0 1 0 0 6]
 [6 0 0 0 3 0 0 0 8]
 [0 7 4 0 0 0 2 3 0]]
Done on 2023-04-27 00:59:30.622733
