In [3]:
from PIL import Image

import imagehash

def img(img_path):
    """
    图片哈希（类似：4f999cc90979704c）
    :param img_path: 图片路径
    :return: <class 'imagehash.ImageHash'>
    """
    img1 = Image.open(img_path)
    res = imagehash.dhash(img1)
    return res


def hamm_img(res1, res2):
    """
    汉明距离，汉明距离越小说明越相似，等 0 说明是同一张图片，大于10越上，说明完全不相似
    :param res1:
    :param res2:
    :return:
    """
    str1 = str(res1)  # <class 'imagehash.ImageHash'> 转成 str
    str2 = str(res2)
    num = 0  # 用来计算汉明距离
    for i in range(len(str1)):
        if str1[i] != str2[i]:
            num += 1
    return num


if __name__ == '__main__':
    img_path1 = 'D:/image/16.jpg'
    img_path2 = 'D:/image/b47.png'
    res = hamm_img(img(img_path1),img(img_path2))
    print('汉明距离是:', res)

汉明距离是: 16


In [1]:
import cv2
import numpy as np
from PIL import Image
import requests
from io import BytesIO
# import matplotlib
# matplotlib.use('TkAgg')
# import matplotlib.pyplot as plt

def pHash(img):
    # 感知哈希算法
    # 缩放32*32
    img = cv2.resize(img, (32, 32))   # , interpolation=cv2.INTER_CUBIC
 
    # 转换为灰度图
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # 将灰度图转为浮点型，再进行dct变换
    dct = cv2.dct(np.float32(gray))
    # opencv实现的掩码操作
    dct_roi = dct[0:8, 0:8]
 
    hash = []
    avreage = np.mean(dct_roi)
    for i in range(dct_roi.shape[0]):
        for j in range(dct_roi.shape[1]):
            if dct_roi[i, j] > avreage:
                hash.append(1)
            else:
                hash.append(0)
    return hash

def cmpHash(hash1, hash2):
    # Hash值对比
    # 算法中1和0顺序组合起来的即是图片的指纹hash。顺序不固定，但是比较的时候必须是相同的顺序。
    # 对比两幅图的指纹，计算汉明距离，即两个64位的hash值有多少是不一样的，不同的位数越小，图片越相似
    # 汉明距离：一组二进制数据变成另一组数据所需要的步骤，可以衡量两图的差异，汉明距离越小，则相似度越高。汉明距离为0，即两张图片完全一样
    n = 0
    # hash长度不同则返回-1代表传参出错
    if len(hash1) != len(hash2):
        return -1
    # 遍历判断
    for i in range(len(hash1)):
        # 不相等则n计数+1，n最终为相似度
        if hash1[i] != hash2[i]:
            n = n + 1
    return n
 
def getImageByUrl(url):
    # 根据图片url 获取图片对象
    html = requests.get(url, verify=False)
    image = Image.open(BytesIO(html.content))
    return image


# def PILImageToCV():
#     # PIL Image转换成OpenCV格式
#     path = "D:/image/b47.png"
#     img = Image.open(path)
# #     plt.subplot(121)
# #     plt.imshow(img)
#     print(isinstance(img, np.ndarray))
#     img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
#     print(isinstance(img, np.ndarray))
# #     plt.subplot(122)
# #     plt.imshow(img)
# #     plt.show()
 
# def CVImageToPIL():
#     # OpenCV图片转换为PIL image
#     path = "D:/image/b47.png"
#     img = cv2.imread(path)
#     # cv2.imshow("OpenCV",img)
# #     plt.subplot(121)
# #     plt.imshow(img)
 
#     img2 = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
# #     plt.subplot(122)
# #     plt.imshow(img2)
# #     plt.show()


# def bytes_to_cvimage(filebytes):
#     # 图片字节流转换为cv image
#     image = Image.open(filebytes)
#     img = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
#     return img

def runAllImageSimilaryFun(para1, para2):
    # 均值、差值、感知哈希算法三种算法值越小，则越相似,相同图片值为0
    # 三直方图算法和单通道的直方图 0-1之间，值越大，越相似。 相同图片为1
 
    # t1,t2   14;19;10;  0.70;0.75
    # t1,t3   39 33 18   0.58 0.49
    # s1,s2  7 23 11     0.83 0.86  挺相似的图片
    # c1,c2  11 29 17    0.30 0.31
 
    if para1.startswith("http"):
         # 根据链接下载图片，并转换为opencv格式
        img1 = getImageByUrl(para1)
        img1 = cv2.cvtColor(np.asarray(img1), cv2.COLOR_RGB2BGR)
 
        img2 = getImageByUrl(para2)
        img2 = cv2.cvtColor(np.asarray(img2), cv2.COLOR_RGB2BGR)
    else:
        # 通过imread方法直接读取物理路径
        img1 = cv2.imread(para1)
        img2 = cv2.imread(para2)
 
 
    hash1 = pHash(img1)
    hash2 = pHash(img2)
    n3 = cmpHash(hash1, hash2)
    print('感知哈希算法相似度pHash：', n3)

In [4]:
def runAllImageSimilaryFun(para1, para2):
    # 均值、差值、感知哈希算法三种算法值越小，则越相似,相同图片值为0
    # 三直方图算法和单通道的直方图 0-1之间，值越大，越相似。 相同图片为1
 
    # t1,t2   14;19;10;  0.70;0.75
    # t1,t3   39 33 18   0.58 0.49
    # s1,s2  7 23 11     0.83 0.86  挺相似的图片
    # c1,c2  11 29 17    0.30 0.31
 
    if para1.startswith("http"):
         # 根据链接下载图片，并转换为opencv格式
        img1 = getImageByUrl(para1)
        img1 = cv2.cvtColor(np.asarray(img1), cv2.COLOR_RGB2BGR)
 
        img2 = getImageByUrl(para2)
        img2 = cv2.cvtColor(np.asarray(img2), cv2.COLOR_RGB2BGR)
    else:
        # 通过imread方法直接读取物理路径
        img1 = cv2.imread(para1)
        img2 = cv2.imread(para2)
 
 
    hash1 = pHash(img1)
    hash2 = pHash(img2)
    n3 = cmpHash(hash1, hash2)
    print('感知哈希算法相似度pHash：', n3)


if __name__ == "__main__":
    p1="D:/image/hg. (1).jpg"
    p2="D:/image/hg. (41).jpg"
    runAllImageSimilaryFun(p1,p2)

感知哈希算法相似度pHash： 14


In [8]:
import os 
if __name__ == "__main__": 
def file_name(file_dir):  
    L=[]  
    for root, dirs, files in os.walk(file_dir): 
    for file in files: 
        if os.path.splitext(file)[1] == '.jpeg': 
        L.append(os.path.join(root, file)) 
    return L

img0 = getImageByUrl(para0)
img1 = getImageByUrl(para1)
img2 = getImageByUrl(para2)
img3 = getImageByUrl(para3)
img4 = getImageByUrl(para4)


In [48]:
import os 
if __name__ == "__main__": 
    def file_name(file_dir):  
        L=[]  
        for root, dirs, files in os.walk(file_dir): 
            for file in files: 
                if os.path.splitext(file)[1] == '.jpg' or 'png': 
                    L.append(os.path.join(root, file))
        return L
file_name("D:/image")

['D:/image\\16.jpg',
 'D:/image\\5b1a4df184176adba0438e165398be7.png',
 'D:/image\\7d5f69b950ab832c639f03baa659de7.png',
 'D:/image\\b47.png',
 'D:/image\\hg. (41).jpg',
 'D:/image\\不合格图片.zip',
 'D:/image\\不合格问题图片.zip',
 'D:/image\\合格图片.zip',
 'D:/image\\问题图片收集.rar',
 'D:/image\\dataset\\image.py',
 'D:/image\\dataset\\image_best_model.hdf5',
 'D:/image\\dataset\\image_ide.py',
 'D:/image\\dataset\\image_test1.zip',
 'D:/image\\dataset\\image_train.zip',
 'D:/image\\dataset\\hg1\\hg.23.jpg',
 'D:/image\\dataset\\hg1\\hg.24.jpg',
 'D:/image\\dataset\\hg1\\hg.25.jpg',
 'D:/image\\dataset\\hg1\\hg.26.jpg',
 'D:/image\\dataset\\hg1\\hg.27.jpg',
 'D:/image\\dataset\\hg1\\hg.28.jpg',
 'D:/image\\dataset\\hg1\\hg.29.jpg',
 'D:/image\\dataset\\hg1\\hg.30.jpg',
 'D:/image\\dataset\\hg1\\hg.31.jpg',
 'D:/image\\dataset\\hg1\\hg.32.jpg',
 'D:/image\\dataset\\hg1\\hg.33.jpg',
 'D:/image\\dataset\\hg1\\hg.34.jpg',
 'D:/image\\dataset\\hg1\\hg.35.jpg',
 'D:/image\\dataset\\hg1\\hg.36.jpg',
 'D:/imag

In [11]:
import os
from openpyxl import Workbook

path = "D:/image/"
file_name = os.listdir(path)
item_list =[]
 
for item in file_name:
    if item[-4:].lower() == '.jpg' or item[-4:].lower() == '.png' or item[-5:].lower() == '.jpeg':
        item = path + item
        item_list.append(item)
        
item_list =[]

In [10]:
list_set = []
for i in range(len(item_list)):
    for j in range(i+1,len(item_list)):
        list = [item_list[i],item_list[j]]
        list_set.append(list)    
list_set

[]

In [7]:
b = []
if __name__ == "__main__":
    list_set = []
    for i in range(len(item_list)):
        for j in range(i+1,len(item_list)):
            list = [item_list[i],item_list[j]]
            list_set.append(list) 
    for i in range(len(list_set)):
        p1= item_list[i][0]
        p2= item_list[i][1]
        a = runAllImageSimilaryFun(p1,p2)