In [1]:
import os
import pandas
import cv2
import numpy as np
import time

### Configuration

In [2]:
# color
color_of_edges = (0, 0, 0) # black
color_of_holes = (255, 255, 255) # white
color_of_nodes = (0, 0, 0)
color_of_nodes_in_contour = (0, 0, 255)

# path
data_path = "../../Data/"
gt_folder_name = "ground-truth"
sparse_folder = "Sparse"
uniform_folder = "Uniform"

### Contour tracing functions for finding the boundary nodes

In [3]:
def fill_color_demo(image):
    '''
    Fill the outer contours using flood fill algorithm
    Args: cv2.img file
    Returns: cv2.img file
    '''
    copyIma = image.copy()
    h, w = image.shape[:2]
    print(h, w)
    mask = np.zeros([h+2, w+2], np.uint8)
    
    cv2.floodFill(copyIma, mask, (0, 0), (0, 0, 0), (100, 100, 100), (50, 50, 50), cv2.FLOODFILL_FIXED_RANGE)
    cv2.floodFill(copyIma, mask, (w-1, 0), (0, 0, 0), (100, 100, 100), (50, 50, 50), cv2.FLOODFILL_FIXED_RANGE)  
    cv2.floodFill(copyIma, mask, (0, h-1), (0, 0, 0), (100, 100, 100), (50, 50, 50), cv2.FLOODFILL_FIXED_RANGE)  
    cv2.floodFill(copyIma, mask, (w-1, h-1), (0, 0, 0), (100, 100, 100), (50, 50, 50), cv2.FLOODFILL_FIXED_RANGE)  

    return copyIma

In [4]:
def findHoleNodes(hole, gt_nodes_list, img, color = color_of_nodes_in_contour, d = -4.99, thickness = 3, label = False):
    '''
    Find the nodes of hole
    Args: 
        hole: the jpg file generated by fd algorithm
        gt_nodes_list: ground truth nodes list
        img: the image to be painted
        color: the paint color of the nodes in contour
        d: The distance which between the nodes and hole
        thickness: thickness value of the nodes in contour
        label: True->paint the nodes in contour
    Returns: nodes_in_contour_list, nodes_not_in_contour_list
    '''
    nodes_in_contour_list = []  #The nodes of hole 
    nodes_not_in_contour_list = []  #The nodes of hole 
    a = 0.0         #The return value of distance which between the nodes and hole in the image  
    for i in range (len(gt_nodes_list)):
        a = cv2.pointPolygonTest(hole, all_nodes[i], True) 
        if (a >= d):
            nodes_in_contour_list.append(i)
            if (label == True):
                cv2.circle(img, all_nodes[i], 14,color,thickness)  #Add the node result
        else:
            nodes_not_in_contour_list.append(i)                     
    return nodes_in_contour_list, nodes_not_in_contour_list

In [5]:
def gt_nodes_ct_contours(fd_file_path, gt_nodes_list, area_min):
    '''
    Calculate the number of the nodes in or not in the contours > area_min
    Args: 
        jpg_file_path: the jpg file generated by fd algorithm
        gt_nodes_list: ground truth nodes list
        canvas_size: the size of the jpg file generated by fd algorithm
    Returns: nodes_dict[file name] = the ground truth nodes' coordinates
    '''
    img = cv2.imread(fd_file_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find holes (inner contours) if hole's are > are_min
    holes = []
    for i in range(len(contours)):
        if hierarchy[0][i][3] != -1 and contours[i] > area_min:
            holes.append(contours[i])
            cv2.fillPoly(img, pts=[contours[i]], color=(255,0,0))
    print(f"{len(holes)} holes detected.")

    # get list of ground truth nodes in or not in the contours 
    nodes_in_contours_list = []
    nodes_not_in_contours_list = []
    for node in gt_nodes_list:
        for hole in holes:
            nodes_in_contours_list, nodes_not_in_contours_list = findHoleNodes(hole, gt_nodes_list, img, label=True)
                        
    cv2.imwrite("CT_output.jpg",img)
    # Draw contours and holes
    img_holes = cv2.drawContours(img, holes, -1, (0, 0, 255), 3)
    # Print the results
    print("Number of contours found: ", len(contours))
    print("Number of nodes lying in the contours: ", len(nodes_in_contours_list))
    print("Number of nodes not lying in the contours: ", len(nodes_not_in_contours_list))
    return len(nodes_in_contours_list), len(nodes_not_in_contours_list)

### Drawing functions

In [6]:
#Draw all the nodes
def drawAllNodes(img,all_nodes,radiu,color = color_of_nodes,thickness=1,lineType=0,shift=0):
    for i in range (0,int(len(all_nodes))): 
        cv2.circle(img, all_nodes[i], radiu ,color,thickness,lineType,shift)  #Add the node result
        
    return img

#Draw all the edges
def drawAllEdges(img,all_nodes,all_edges,color = color_of_edges,thickness=3,lineType=8,shift=0):
    for i in range (0,int(len(all_edges))):
        cv2.line(img, all_nodes[all_edges[i][0]], all_nodes[all_edges[i][1]],color,thickness, lineType, shift)
        
    return img

### Load ground truth and generate jpg with nodes in or not in contours functions

In [7]:
def load_ground_truth_data(ground_truth_fpath):
    '''
    Load ground truth file and generate the jpg file for finding holes
    Args: ground_truth_fpath, target_fpath
    Returns: the nodes position list and jpg file name
    '''
    data_list = [] # The data list
    num_nodes = int
    all_nodes = []
    node_x = []
    all_edges = []
    node_y = []
    data_f = open(ground_truth_fpath, 'r')
    for line in data_f.readlines():
        data_list.append(line.strip())
    num_nodes = int (data_list.pop(0)) #Pop out the number of nodes 
    
    #Get all the number node coordinate
    for i in range (0,num_nodes):
        node = data_list.pop(0) #Pop out the node coordinate
        node = node.split()
        node.pop(0) #Pop out the node index  
        node_x.append(node[0])
        node_y.append(node[1])
    #Change it to list    
    node_x = list(map(float, node_x))
    node_y = list(map(float, node_y))
    
    #Find the minimun value in x and y list
    min_x = min(node_x)
    min_y = min(node_y)
    #If the coorodinate of nodes are nagative, change it to be positive
    if min_x or min_y < 0:
        for i in range (0,num_nodes):
            node_x[i] = node_x[i] + min_x * -1
            node_y[i] = node_y[i] + min_y * -1
    #Find the maximun value in x and y list        
    max_x = max(node_x)
    max_y = max(node_y)
    
    #Origin coordinates
    xc = 0
    yc = 0
    
    #Calulate the canvas size by the node numbers and radius of node
    canvas_size = (4 * num_nodes)
    #Calulate the ratio for enlarge the coorodinate of node that fitting the canvas size
    node_ratio = (min((canvas_size / max_x), (canvas_size / max_y)))
    
    for i in range (0,num_nodes):
        x = int(xc + node_ratio * (node_x[i] - xc))
        y = int(yc + node_ratio * (node_y[i] - yc))
        all_nodes.append((x, y)) #(X, Y) 
    
    num_edges = int (data_list.pop(0)) #Pop out the number of edges 
    #Get all the edge
    for i in range (0,num_edges): 
        edge = data_list.pop(0) #Pop out the edge connection
        edge = edge.split()
        all_edges.append([int(edge[0]),int(edge[1])]) #(point 1, point 2)
    data_f.close

    area_min_radio = 5000 / 4000
    area_min = area_min_radio * canvas_size
    
    return all_nodes, all_edges, num_nodes, canvas_size, area_min

In [8]:
def generateJPG_forGT(fname, gt_folder_name, gt_jpg_folder_name):
    '''
    Generate jpg files for ground truth nodes
    '''
    time_start = time.time()
    all_nodes = []
    all_edges = []
    result_dict = {}
    #Read data 
    all_nodes, all_edges, num_nodes, canvas_size, area_min = load_ground_truth_data(fname) 
    
    results_img = np.full((canvas_size, canvas_size, 3), 255 ,np.uint8) #create a img

    #Draw all the edge
    drawAllEdges(results_img,all_nodes,all_edges,color_of_edges,lineType= cv2.LINE_AA)

    #Draw all the node
    drawAllNodes(results_img,all_nodes,1,color_of_nodes,lineType= cv2.LINE_AA);

    print(fname.replace(gt_folder_name, gt_jpg_folder_name) + ".jpg")
    cv2.imwrite(fname.replace(gt_folder_name, gt_jpg_folder_name) + ".jpg", results_img)
    result_dict[fname.replace(gt_folder_name, gt_jpg_folder_name) + ".jpg"] = all_nodes
    time_end = time.time()
    print('It cost %f seconds' % (time_end - time_start))
    print ("DONE.")
    return result_dict

In [10]:
# single test
# fpath = "/Users/kevinhui/WorkPlace/ThesisProject/Experiments/Data/ground-truth/Sparse/n=2000d=6"
fpath = os.path.join(data_path, gt_folder_name, sparse_folder, "n=500d=8")
result_dict = generateJPG_forGT(fpath, gt_folder_name, "jpg_gt_files")

../../Data/jpg_gt_files/Sparse/n=500d=8.jpg
It cost 0.221303 seconds
DONE.


### Batch process

In [19]:
def add_GT_files(data_path, gt_folder_name, type_folder_name, node_list, degree_list):
    '''
    Traverse the ground truth files and append the files path to the list
    Args: 
        data_path: Data path
        gt_folder_name: Ground truth folder name
        type_folder_name: Type folder name
        node_list: The number of nodes we need to process with
        degree_list: The degree we need to process with
    Returns: the list of files
    '''
    # data_path = "../../Data/ground-truth/"
    flist = []
    for n in node_list:
        for d in degree_list:
            fname = "n=" + str(n) + "d=" + str(d)
            flist.append(os.path.join(data_path,gt_folder_name,type_folder_name, fname))
    return flist

In [20]:
def read_GT_files(flist):
    '''
    Read the ground truth files 
    Args: flist
    Returns: nodes_dict[gt jpg file name] = the ground truth nodes' coordinates
    '''
    nodes_dict = {}
    for fpath in flist:
        nodes_dict = generateJPG_forGT(fpath, gt_folder_name, "jpg_gt_files")    
    return nodes_dict
        

In [21]:
# batch test
node_list = [500,1000,2000,3000]
degree_list = [6,8,10,12,15]

nodes_dict = read_GT_files(add_GT_files(data_path,gt_folder_name,sparse_folder,node_list,degree_list))

../../Data/jpg_gt_files/Sparse/n=500d=6.jpg
It cost 0.116970 seconds
DONE.
../../Data/jpg_gt_files/Sparse/n=500d=8.jpg
It cost 0.103717 seconds
DONE.
../../Data/jpg_gt_files/Sparse/n=500d=10.jpg
It cost 0.127359 seconds
DONE.
../../Data/jpg_gt_files/Sparse/n=500d=12.jpg
It cost 0.131766 seconds
DONE.
../../Data/jpg_gt_files/Sparse/n=500d=15.jpg
It cost 0.170013 seconds
DONE.
../../Data/jpg_gt_files/Sparse/n=1000d=6.jpg
It cost 0.313859 seconds
DONE.
../../Data/jpg_gt_files/Sparse/n=1000d=8.jpg
It cost 0.348621 seconds
DONE.
../../Data/jpg_gt_files/Sparse/n=1000d=10.jpg
It cost 0.400036 seconds
DONE.
../../Data/jpg_gt_files/Sparse/n=1000d=12.jpg
It cost 0.473564 seconds
DONE.
../../Data/jpg_gt_files/Sparse/n=1000d=15.jpg
It cost 0.690878 seconds
DONE.
../../Data/jpg_gt_files/Sparse/n=2000d=6.jpg
It cost 1.687073 seconds
DONE.
../../Data/jpg_gt_files/Sparse/n=2000d=8.jpg
It cost 1.329808 seconds
DONE.
../../Data/jpg_gt_files/Sparse/n=2000d=10.jpg
It cost 1.406744 seconds
DONE.
../../Data

### Computation for GT_GN and GT_LN by using CT on jpg_file

In [None]:
def get_TP_TN_FP_FN(nodes_dict, fd_list):
    '''
    GT_GN：利用GroundTruth生成的Graph中在大于某个面积的洞的点
    GT_LN：利用GroundTruth生成的Graph中不在大于某个面积的洞的点
    TP（真阳性）= GT_GN in FD-CT/FD-CCL contours
    TN（真阴性）= GT_LN not in FD-CT/FD-CCL contours
    FP（假阳性）= GT_LN in FD-CT/FD-CCL contours
    FN（假阴性）= GT_GN not in FD-CT/FD-CCL contours
    Args: nodes_dict[jpg_gt_fname] = all nodes' coordinates
    Returns: TP,TN,FP,FN
    '''
    for fd in fd_list:
        for fpath, coor in nodes_dict.items():
            fd_file_path = 
            gt_nodes_ccl_contours()
            gt_nodes_ct_contours


In [16]:
def gt_nodes_ccl_contours(fd_file_path, gt_nodes_list, canvas_size):
    '''
    Calculate the number of the nodes in or not in the contours > area_min
    Args: 
        jpg_file_path: the jpg file generated by fd algorithm
        gt_nodes_list: ground truth nodes list
        canvas_size: the size of the jpg file generated by fd algorithm
    Returns: nodes_dict[file name] = the ground truth nodes' coordinates
    '''
    # Load the image
    img = cv2.imread(fd_file_path, cv2.IMREAD_GRAYSCALE)
    img = fill_color_demo(img)
    # Apply binary thresholding to create a binary image
    thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[1]

    # Apply connected component labeling to find the contours
    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, connectivity=8)

    area_min_radio = 5000 / 4000
    area_min = area_min_radio * canvas_size
    # Find the contours that correspond to the foreground
    contours = []
    for i in range(1, num_labels):
        x, y, w, h, area = stats[i]
        if area > area_min:
            contours.append(cv2.findContours((labels == i).astype('uint8'), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0])

    # Count the number of nodes lying on the contours
    num_nodes_on_contours = 0
    for node in gt_nodes_list:
        for contour in contours:
            dist = cv2.pointPolygonTest(contour, node, False)
            if dist >= 0:
                num_nodes_on_contours += 1
                break


    # Print the results
    print("Number of contours found: ", len(contours))
    print("Number of nodes lying in the contours: ", num_nodes_in_contours)


## GT_GN：利用GroundTruth生成的Graph中在大于某个面积的洞的点
## GT_LN：利用GroundTruth生成的Graph中不在大于某个面积的洞的点
### TP（真阳性）= GT_GN in FD-CT/FD-CCL contours
### TN（真阴性）= GT_LN not in FD-CT/FD-CCL contours
### FP（假阳性）= GT_LN in FD-CT/FD-CCL contours
### FN（假阴性）= GT_GN not in FD-CT/FD-CCL contours

In [63]:
import cv2
import numpy as np

# Load the image and convert it to grayscale
img = cv2.imread(file_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Threshold the image to create a binary image
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

# Use connected component labeling to find the contours
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, connectivity=8)

# Loop through each label (excluding the background label 0)
num_nodes_in_contours = 0
for i in range(1, num_labels):
    # Create a mask for the current label
    mask = np.zeros(thresh.shape, dtype=np.uint8)
    mask[labels == i] = 255
    
    # Find the contour for the current label
    contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Check if any of the nodes in the given list are lying on the contour
    for node in ground_truth_nodes:
        distance = cv2.pointPolygonTest(contours[0], node, measureDist=True)
        if distance >= 0:
            num_nodes_in_contours += 1

output = img
for i in range(1, num_labels):
    # Area of the component
    area = stats[i, cv2.CC_STAT_AREA] 
#     if area > 10000:
#         print(str(i) + " area is " + str(area))
    mask = labels == i
    output[:, :, 0][mask] = 255
    output[:, :, 1][mask] = 0
    output[:, :, 2][mask] = 0
cv2.imwrite("CCL_output.jpg",output)

# Print the results
print("Number of contours found: ", num_labels)
print("Number of nodes lying in the contours: ", num_nodes_in_contours)

Number of contours found:  687
Number of nodes lying in the contours:  455
