## Бинаризация изображений

In [158]:
import numpy as np
import pandas as pd
#import random
from PIL import Image, ImageDraw
from skimage import io
import matplotlib.pyplot as plt

import cv2

import glob

from sklearn.metrics import f1_score, precision_score, recall_score

%matplotlib inline

In [159]:
def paths(original_path, GT_path):
    original_paths = sorted(glob.glob(original_path))
    GT_paths = sorted(glob.glob(GT_path))
    return original_paths, GT_paths

In [160]:
def compare(path_true, path_pred, width, height):
    image_t, draw_t, width_t, height_t, pixels_t, gray_matrix_t = load_image(path_true)
    image_p, draw_p, width_p, height_p, pixels_p, gray_matrix_p = load_image(path_pred)
    #print(width_t * height_t)
    #print(width_p * height_p)
    pixels_array_true = (np.reshape(gray_matrix_t, width_t * height_t) % 2).astype(int)
    pixels_array_pred = (np.reshape(gray_matrix_p, width_p * height_p)).astype(int)
    
    #print(pixels_array_true)
    #print(pixels_array_pred)
    
    precision = precision_score(pixels_array_true, pixels_array_pred)
    recall = recall_score(pixels_array_true, pixels_array_pred)
    f1_metrics = f1_score(pixels_array_true, pixels_array_pred)
    return precision, recall, f1_metrics
    

In [161]:
def load_image(image_path):
    img = Image.open(image_path) #Открываем изображение. 
    #print(image.shape)
    #image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = img.convert('RGB')
    draw = ImageDraw.Draw(image) #Создаем инструмент для рисования. 
    width = image.size[0] #Определяем ширину. 
    height = image.size[1] #Определяем высоту.
    pixels = image.load() #Выгружаем значения пикселей.
    gray_matrix = io.imread(image_path, as_grey=True) # матрица с уровнями серого всех пикселей
    #print(gray_matrix.shape)
    #print(width, height)
    return image, draw, width, height, pixels, gray_matrix

In [162]:
def gray_matrix_to_sorted_series(gray_matrix, width, height):
    pixels_array = np.reshape(gray_matrix, width*height)
    pixels_array_sorted = sorted(pixels_array)
    pixels_series_sorted = pd.Series(pixels_array_sorted)
    return pixels_series_sorted

In [163]:
def plot_pixels_hist(pixels_series_sorted, figsize = (15, 10), width =0.01):
    pixels_series_sorted.hist(bins = 75, figsize = figsize, width = width);

In [164]:
def otsu_algorithm_find_threshold(pixels_series_sorted):
    pixels_counts_series = pixels_series_sorted.value_counts()
    pixels_counts_series_sorted = pixels_counts_series.sort_index(ascending = True)
    gray_levels = np.array(pixels_counts_series_sorted.index)
    numbers_of_pixels = np.array(pixels_counts_series_sorted.values)
    
    N_0 = width*height
    n = gray_levels.size
    
    w = np.zeros(n-1)
    for k in range(1,n):
        w[k - 1] = np.sum(numbers_of_pixels[:k]) / N_0
        
    mu = np.zeros(n-1)
    for j in range(1, n):
            mu[j - 1] = np.sum((numbers_of_pixels[:j]/N_0)*gray_levels[:j])
            
    var = (((mu[n-1]*w) - mu)**2)/(w*(1 - w))
        
    k_star = var.argmax()
    
    threshold = gray_levels[k_star]
    
    return threshold

In [165]:
def otsu_modified_find_threshold(pixels_series_sorted, width, height):
    pixels_counts_series = pixels_series_sorted.value_counts()
    pixels_counts_series_sorted = pixels_counts_series.sort_index(ascending = True)
    gray_levels = np.array(pixels_counts_series_sorted.index)
    numbers_of_pixels = np.array(pixels_counts_series_sorted.values)
    
    N_0 = width*height
    n = gray_levels.size
    
    w = np.zeros(n-1)
    for k in range(1,n):
        w[k - 1] = np.sum(numbers_of_pixels[:k]) / N_0
        
    w1 = 1 - w
    
        
    mu = np.zeros(n-1)
    for j in range(1, n):
            mu[j - 1] = np.sum((numbers_of_pixels[:j]/N_0)*gray_levels[:j])

    mu0 = mu/w
    mu1 = (mu[n-2] - mu)/w1
        
    var0 = np.zeros(n-1)
    var1 = np.zeros(n-1)
    for i in range(1, n):
        var0[i - 1] = np.sum(((gray_levels[:i] -mu0[i-1]) ** 2) * (numbers_of_pixels[:i]/N_0) / w[i-1])
        var1[i - 1] = np.sum(((gray_levels[i:] -mu1[i-1]) ** 2) * (numbers_of_pixels[i:]/N_0) / w1[i-1])
        
    var_w = w*var0 + w1*var1
    
    Q = w*np.log(w) + w1*np.log(w1) - np.log(var_w)
        
    k_star = Q.argmax()
    
    threshold = gray_levels[k_star]
    
    return threshold

In [166]:
import numpy as np
a = 1 - np.array([0, 1, 2, 3])

In [167]:
def binarize_image(draw, pixels, gray_level_threshold, gray_matrix, width, height):
    for i in range(width):
        for j in range(height):
            red = pixels[i, j][0]
            green = pixels[i, j][1]
            blue = pixels[i, j][2]
            if (gray_matrix[j, i] > gray_level_threshold):
                red, green, blue = 255, 255, 255
            else:
                red, green, blue = 0, 0, 0
            draw.point((i, j), (red, green, blue))
    

## Эксперименты

In [168]:
def make_ans(image_path):
    image, draw, width, height, pixels, gray_matrix = load_image(image_path)
    pixels_series_sorted = gray_matrix_to_sorted_series(gray_matrix, width, height)
    gray_level_threshold = otsu_modified_find_threshold(pixels_series_sorted, width, height)
    binarize_image(draw, pixels, gray_level_threshold, gray_matrix, width, height)
    image.save("ans.jpg", "JPEG")


In [169]:
original_path = '/home/verochka/image_binarization/originals/*.png'
GT_path = '/home/verochka/image_binarization/GT/*.png'
original_paths, GT_paths = paths(original_path, GT_path)

In [170]:
experiment_results =pd.DataFrame(columns = ["Image", "precision", "recall", "f1"])

In [172]:
for i in range(len(original_paths)):
    make_ans(original_paths[i])
    precision, recall, f1_metrics = compare(GT_paths[i], 'ans.jpg', width, height)
    experiment_results.loc[i] = [original_paths[i], precision, recall, f1_metrics]
    

In [142]:
make_ans(original_paths[149])
precision, recall, f1_metrics = compare(GT_paths[149], 'ans.jpg', width, height)

[[1. 1. 1. ... 1. 1. 0.]
 [1. 1. 1. ... 1. 1. 0.]
 [1. 1. 1. ... 1. 1. 0.]
 ...
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]]
[[1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 ...
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]]


In [173]:
experiment_results

Unnamed: 0,Image,precision,recall,f1
0,/home/verochka/image_binarization/originals/H0...,0.994160,0.940319,0.966490
1,/home/verochka/image_binarization/originals/H0...,0.999033,0.971176,0.984908
2,/home/verochka/image_binarization/originals/H0...,0.997130,0.891017,0.941092
3,/home/verochka/image_binarization/originals/H0...,0.998701,0.732732,0.845288
4,/home/verochka/image_binarization/originals/H0...,0.998432,0.793250,0.884092
5,/home/verochka/image_binarization/originals/H1...,0.992472,0.892227,0.939683
6,/home/verochka/image_binarization/originals/H1...,0.996343,0.960782,0.978240
7,/home/verochka/image_binarization/originals/H1...,0.988372,0.909356,0.947219
8,/home/verochka/image_binarization/originals/H1...,0.986486,0.926006,0.955290
9,/home/verochka/image_binarization/originals/H1...,0.998259,0.935134,0.965666
