In [91]:
import cv2
import numpy as np
import os
import pandas as pd

In [2]:
# Load image, grayscale, Otsu's threshold
image = cv2.imread('PingAn images/image9.png')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

In [3]:
# Dilate with horizontal kernel
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20,10))
dilate = cv2.dilate(thresh, kernel, iterations=2)

In [4]:
# Find contours and remove non-diagram contours
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    x,y,w,h = cv2.boundingRect(c)
    area = cv2.contourArea(c)
    if w/h > 2 and area > 10000:
        cv2.drawContours(dilate, [c], -1, (0,0,0), -1)

In [5]:
# Iterate through diagram contours and form single bounding box
boxes = []
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    x, y, w, h = cv2.boundingRect(c)
    boxes.append([x,y, x+w,y+h])

In [6]:
boxes = np.asarray(boxes)
x = np.min(boxes[:,0])
y = np.min(boxes[:,1])
w = np.max(boxes[:,2]) - x
h = np.max(boxes[:,3]) - y

In [7]:
# Extract ROI
cv2.rectangle(image, (x,y), (x + w,y + h), (36,255,12), 3)
ROI = original[y:y+h, x:x+w]

In [8]:
cv2.imwrite("GDFIV2_Out/GDFI_V2_img.png", image)
cv2.imwrite("GDFIV2_Out/GDFI_V2_thresh.png", thresh)
cv2.imwrite("GDFIV2_Out/GDFI_V2_dilate.png", dilate)
cv2.imwrite("GDFIV2_Out/GDFI_V2_ROI.png", ROI)

True

In [215]:
df = []
def process_image(img_path, count):
    image = cv2.imread(img_path)
    original = image.copy()
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    #thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_TOZERO+ cv2.THRESH_OTSU)[1]
    thresh = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,2)
    # Dilate with horizontal kernel
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,1))
    dilate = cv2.dilate(thresh, kernel, iterations=1)
    cv2.imwrite("ChartExtraction_Output/dilate/"+str(count)+"img.png", image)
    cv2.imwrite("ChartExtraction_Output/dilate/"+str(count)+"dilate.png", dilate)
    RGB_raw = cv2.mean(dilate)[::-1]
    red = int(RGB_raw[1])
    green =int(RGB_raw[2])
    blue =int(RGB_raw[3])
    bluegreen = blue + green
    df.append([red, green, blue, bluegreen])

    
def process_image1(img_path, count):
    image = cv2.imread(img_path)
    original = image.copy()
    colors = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    cv2.imwrite("ChartExtraction_Output/dilate/"+str(count)+"img.png", image)
    cv2.imwrite("ChartExtraction_Output/dilate/"+str(count)+"dilate.png", colors)
    RGB_raw = cv2.mean(colors)[::-1]
    RGB = (int(RGB_raw[1]), int(RGB_raw[2]), int(RGB_raw[3]))
    red = int(RGB_raw[1])
    green =int(RGB_raw[2])
    blue =int(RGB_raw[3])
    bluegreen = blue + green
    df.append([red, green, blue, bluegreen])

In [216]:

count = 0
ROI_correct = "ChartExtraction_Output/ROI_correct/" #28
ROI_wrong = "ChartExtraction_Output/ROI_wrong/" #89
directory = ROI_wrong
directory_2 = ROI_wrong

for filename in os.listdir(directory):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        count = count + 1
        img_path = os.path.join(directory, filename)
        print(f"NOW DOING no:{count} ====>{img_path} ")
        process_image(img_path, count)
        print(f"END processing ====>{img_path} ")
        print("***************************************************")
    else:
        continue   
#print(f"performance: {len(filtered)}")

NOW DOING no:1 ====>ChartExtraction_Output/ROI_wrong/page24_0_15.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page24_0_15.png 
***************************************************
NOW DOING no:2 ====>ChartExtraction_Output/ROI_wrong/page57_0_22.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page57_0_22.png 
***************************************************
NOW DOING no:3 ====>ChartExtraction_Output/ROI_wrong/page52_a_1.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page52_a_1.png 
***************************************************
NOW DOING no:4 ====>ChartExtraction_Output/ROI_wrong/page38_0_38.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page38_0_38.png 
***************************************************
NOW DOING no:5 ====>ChartExtraction_Output/ROI_wrong/page53_0_14.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page53_0_14.png 
***************************************************
NOW DOING no:6 ====>ChartExtraction_Output

END processing ====>ChartExtraction_Output/ROI_wrong/page58_b_2.png 
***************************************************
NOW DOING no:48 ====>ChartExtraction_Output/ROI_wrong/page91_0_19.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page91_0_19.png 
***************************************************
NOW DOING no:49 ====>ChartExtraction_Output/ROI_wrong/page75_0_18.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page75_0_18.png 
***************************************************
NOW DOING no:50 ====>ChartExtraction_Output/ROI_wrong/page33_0_11.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page33_0_11.png 
***************************************************
NOW DOING no:51 ====>ChartExtraction_Output/ROI_wrong/page15_b_21.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page15_b_21.png 
***************************************************
NOW DOING no:52 ====>ChartExtraction_Output/ROI_wrong/page9_b_3.png 
END processing ====>ChartExtraction_Ou

END processing ====>ChartExtraction_Output/ROI_wrong/page4_0_21.png 
***************************************************
NOW DOING no:97 ====>ChartExtraction_Output/ROI_wrong/page60_0_3.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page60_0_3.png 
***************************************************
NOW DOING no:98 ====>ChartExtraction_Output/ROI_wrong/page6_0_5.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page6_0_5.png 
***************************************************
NOW DOING no:99 ====>ChartExtraction_Output/ROI_wrong/page66_0_39.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page66_0_39.png 
***************************************************
NOW DOING no:100 ====>ChartExtraction_Output/ROI_wrong/page82_0_10.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page82_0_10.png 
***************************************************
NOW DOING no:101 ====>ChartExtraction_Output/ROI_wrong/page66_0_11.png 
END processing ====>ChartExtraction_Outp

END processing ====>ChartExtraction_Output/ROI_wrong/page33_b_29.png 
***************************************************
NOW DOING no:146 ====>ChartExtraction_Output/ROI_wrong/page22_0_60.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page22_0_60.png 
***************************************************
NOW DOING no:147 ====>ChartExtraction_Output/ROI_wrong/page11_0_13.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page11_0_13.png 
***************************************************
NOW DOING no:148 ====>ChartExtraction_Output/ROI_wrong/page40_b_2.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page40_b_2.png 
***************************************************
NOW DOING no:149 ====>ChartExtraction_Output/ROI_wrong/page13_0_40.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page13_0_40.png 
***************************************************
NOW DOING no:150 ====>ChartExtraction_Output/ROI_wrong/page103_0_21.png 
END processing ====>ChartExtrac

END processing ====>ChartExtraction_Output/ROI_wrong/page36_0_1.png 
***************************************************
NOW DOING no:190 ====>ChartExtraction_Output/ROI_wrong/page12_0_1 2.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page12_0_1 2.png 
***************************************************
NOW DOING no:191 ====>ChartExtraction_Output/ROI_wrong/page27_0_24.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page27_0_24.png 
***************************************************
NOW DOING no:192 ====>ChartExtraction_Output/ROI_wrong/page26_0_18.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page26_0_18.png 
***************************************************
NOW DOING no:193 ====>ChartExtraction_Output/ROI_wrong/page54_0_13.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page54_0_13.png 
***************************************************
NOW DOING no:194 ====>ChartExtraction_Output/ROI_wrong/page15_a_1.png 
END processing ====>ChartExtra

END processing ====>ChartExtraction_Output/ROI_wrong/page40_0_8 2.png 
***************************************************
NOW DOING no:240 ====>ChartExtraction_Output/ROI_wrong/page27_0_3.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page27_0_3.png 
***************************************************
NOW DOING no:241 ====>ChartExtraction_Output/ROI_wrong/page42_b_10.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page42_b_10.png 
***************************************************
NOW DOING no:242 ====>ChartExtraction_Output/ROI_wrong/page21_0_46.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page21_0_46.png 
***************************************************
NOW DOING no:243 ====>ChartExtraction_Output/ROI_wrong/page45_0_3.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page45_0_3.png 
***************************************************
NOW DOING no:244 ====>ChartExtraction_Output/ROI_wrong/page95_0_74.png 
END processing ====>ChartExtracti

END processing ====>ChartExtraction_Output/ROI_wrong/page5_0_4.png 
***************************************************
NOW DOING no:284 ====>ChartExtraction_Output/ROI_wrong/page15_a_14.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page15_a_14.png 
***************************************************
NOW DOING no:285 ====>ChartExtraction_Output/ROI_wrong/page23_a_51.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page23_a_51.png 
***************************************************
NOW DOING no:286 ====>ChartExtraction_Output/ROI_wrong/page40_a_2.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page40_a_2.png 
***************************************************
NOW DOING no:287 ====>ChartExtraction_Output/ROI_wrong/page7_b_11.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page7_b_11.png 
***************************************************
NOW DOING no:288 ====>ChartExtraction_Output/ROI_wrong/page10_a_8.png 
END processing ====>ChartExtraction_O

END processing ====>ChartExtraction_Output/ROI_wrong/page14_0_2.png 
***************************************************
NOW DOING no:328 ====>ChartExtraction_Output/ROI_wrong/page27_a_2.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page27_a_2.png 
***************************************************
NOW DOING no:329 ====>ChartExtraction_Output/ROI_wrong/page37_a_2.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page37_a_2.png 
***************************************************
NOW DOING no:330 ====>ChartExtraction_Output/ROI_wrong/page4_0_71.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page4_0_71.png 
***************************************************
NOW DOING no:331 ====>ChartExtraction_Output/ROI_wrong/page64_0_12.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page64_0_12.png 
***************************************************
NOW DOING no:332 ====>ChartExtraction_Output/ROI_wrong/page22_0_27.png 
END processing ====>ChartExtraction_O

END processing ====>ChartExtraction_Output/ROI_wrong/page12_0_14.png 
***************************************************
NOW DOING no:383 ====>ChartExtraction_Output/ROI_wrong/page11_0_53.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page11_0_53.png 
***************************************************
NOW DOING no:384 ====>ChartExtraction_Output/ROI_wrong/page32_a_8.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page32_a_8.png 
***************************************************
NOW DOING no:385 ====>ChartExtraction_Output/ROI_wrong/page22_0_20.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page22_0_20.png 
***************************************************
NOW DOING no:386 ====>ChartExtraction_Output/ROI_wrong/page7_0_31.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page7_0_31.png 
***************************************************
NOW DOING no:387 ====>ChartExtraction_Output/ROI_wrong/page7_b_6.png 
END processing ====>ChartExtraction_

END processing ====>ChartExtraction_Output/ROI_wrong/page32_a_11.png 
***************************************************
NOW DOING no:429 ====>ChartExtraction_Output/ROI_wrong/page32_a_13.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page32_a_13.png 
***************************************************
NOW DOING no:430 ====>ChartExtraction_Output/ROI_wrong/page31_b_9.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page31_b_9.png 
***************************************************
NOW DOING no:431 ====>ChartExtraction_Output/ROI_wrong/page4_a_7.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page4_a_7.png 
***************************************************
NOW DOING no:432 ====>ChartExtraction_Output/ROI_wrong/page82_0_7.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page82_0_7.png 
***************************************************
NOW DOING no:433 ====>ChartExtraction_Output/ROI_wrong/page7_b_5.png 
END processing ====>ChartExtraction_Outp

END processing ====>ChartExtraction_Output/ROI_wrong/page60_a_2.png 
***************************************************
NOW DOING no:472 ====>ChartExtraction_Output/ROI_wrong/page34_0_43.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page34_0_43.png 
***************************************************
NOW DOING no:473 ====>ChartExtraction_Output/ROI_wrong/page35_0_43.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page35_0_43.png 
***************************************************
NOW DOING no:474 ====>ChartExtraction_Output/ROI_wrong/page18_b_3.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page18_b_3.png 
***************************************************
NOW DOING no:475 ====>ChartExtraction_Output/ROI_wrong/page17_a_14.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page17_a_14.png 
***************************************************
NOW DOING no:476 ====>ChartExtraction_Output/ROI_wrong/page53_0_2.png 
END processing ====>ChartExtractio

END processing ====>ChartExtraction_Output/ROI_wrong/page40_0_7 2.png 
***************************************************
NOW DOING no:516 ====>ChartExtraction_Output/ROI_wrong/page7_a_10.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page7_a_10.png 
***************************************************
NOW DOING no:517 ====>ChartExtraction_Output/ROI_wrong/page14_a_50.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page14_a_50.png 
***************************************************
NOW DOING no:518 ====>ChartExtraction_Output/ROI_wrong/page50_a_36.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page50_a_36.png 
***************************************************
NOW DOING no:519 ====>ChartExtraction_Output/ROI_wrong/page52_0_4.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page52_0_4.png 
***************************************************
NOW DOING no:520 ====>ChartExtraction_Output/ROI_wrong/page55_b_1.png 
END processing ====>ChartExtractio

END processing ====>ChartExtraction_Output/ROI_wrong/page153_0_2.png 
***************************************************
NOW DOING no:568 ====>ChartExtraction_Output/ROI_wrong/page20_0_11.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page20_0_11.png 
***************************************************
NOW DOING no:569 ====>ChartExtraction_Output/ROI_wrong/page48_b_111.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page48_b_111.png 
***************************************************
NOW DOING no:570 ====>ChartExtraction_Output/ROI_wrong/page20_0_39.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page20_0_39.png 
***************************************************
NOW DOING no:571 ====>ChartExtraction_Output/ROI_wrong/page10_b_2.png 
END processing ====>ChartExtraction_Output/ROI_wrong/page10_b_2.png 
***************************************************
NOW DOING no:572 ====>ChartExtraction_Output/ROI_wrong/page21_0_5.png 
END processing ====>ChartExtrac

In [217]:
df_frame = pd.DataFrame (df, columns = ['red', 'green', 'blue','bluegreen'])
#min(df_frame.RGB), max(df_frame.RGB), max(df_frame.RGB) -min(df_frame.RGB)
df_frame

Unnamed: 0,red,green,blue,bluegreen
0,0,0,19,19
1,0,0,22,22
2,0,0,6,6
3,0,0,24,24
4,0,0,24,24
...,...,...,...,...
605,0,0,27,27
606,0,0,2,2
607,0,0,33,33
608,0,0,16,16


In [209]:
max(df_frame.blue)

25

In [222]:
len(df_frame[df_frame.blue< 25])

442

In [192]:
# correct 
# (7, 25, 18) . 11
# 23
# wrong
# (0, 92, 92) . 11
2 65 
5 108
7 124
30 544
29 498
28 482
27 468
26 456

SyntaxError: invalid syntax (<ipython-input-192-d7bf8c406fd1>, line 6)

In [223]:
610 - 468

162

In [226]:
1- 468/610

0.23278688524590163