In [276]:
import os
import pandas
import cv2
import numpy as np
import time

### Configuration

In [277]:
# color
color_of_edges = (0, 0, 0) # black
color_of_holes = (255, 255, 255) # white
color_of_nodes = (0, 0, 0)
color_red = (0, 0, 255)
color_blue = (255, 0, 0)
color_green = (0, 255, 0)
color_yellow = (255, 255, 0)
color_of_inner_node = color_green
color_of_outer_node = color_red

# path
data_path = "../../Data/"
gt_folder_name = "ground-truth"
sparse_folder = "Sparse"
uniform_folder = "Uniform"
gt_jpg_folder_name = "jpg_gt_files"

### Contour tracing functions for finding the boundary nodes

In [278]:
def fill_color_demo(image):
    '''
    Fill the outer contours using flood fill algorithm
    Args: cv2.img file
    Returns: cv2.img file
    '''
    copyIma = image.copy()
    h, w = image.shape[:2]
    print(h, w)
    mask = np.zeros([h+2, w+2], np.uint8)
    
    cv2.floodFill(copyIma, mask, (0, 0), (0, 0, 0), (100, 100, 100), (50, 50, 50), cv2.FLOODFILL_FIXED_RANGE)
    cv2.floodFill(copyIma, mask, (w-1, 0), (0, 0, 0), (100, 100, 100), (50, 50, 50), cv2.FLOODFILL_FIXED_RANGE)  
    cv2.floodFill(copyIma, mask, (0, h-1), (0, 0, 0), (100, 100, 100), (50, 50, 50), cv2.FLOODFILL_FIXED_RANGE)  
    cv2.floodFill(copyIma, mask, (w-1, h-1), (0, 0, 0), (100, 100, 100), (50, 50, 50), cv2.FLOODFILL_FIXED_RANGE)  

    return copyIma

In [279]:
def findHoleNodes(hole, gt_nodes_list, img, all_inner_list, d = -4.99):
    '''
    Find the nodes of hole
    Args: 
        hole: the jpg file generated by fd algorithm
        gt_nodes_list: ground truth nodes list
        img: the image to be painted
        color: the paint color of the nodes in contour
        d: The distance which between the nodes and hole
        thickness: thickness value of the nodes in contour
        label: True->paint the nodes in contour
    Returns: nodes_in_contour_list, nodes_not_in_contour_list
    '''
    inner_list = []  #The nodes of hole 
    # Check if any of the nodes in the given list are lying on the contour
    for node in gt_nodes_list:
        distance = cv2.pointPolygonTest(hole, node, measureDist=True)
        if distance >= d:
            if node not in all_inner_list:
                inner_list.append(node)
    return inner_list

In [280]:
def gt_nodes_ct_contours(fpath, gt_nodes_list, area_min, color_inner = color_of_inner_node, 
                  color_outer = color_of_outer_node, label=True):
    '''
    Calculate the number of the nodes in or not in the contours > area_min
    Args: 
        jpg_file_path: the jpg file generated by fd algorithm
        gt_nodes_list: ground truth nodes list
        canvas_size: the size of the jpg file generated by fd algorithm
    Returns: nodes_dict[file name] = the ground truth nodes' coordinates
    '''
    img = cv2.imread(fpath)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find holes (inner contours) if hole's are > are_min
    holes = []
    for i in range(len(contours)):
        if hierarchy[0][i][3] == -1 and cv2.contourArea(contours[i]) > int(area_min):
            holes.append(contours[i])
            # cv2.fillPoly(img, pts=[contours[i]], color=(255,0,0))

    # get list of ground truth nodes in or not in the contours 
    nodes_inner_list = []
    nodes_outer_list = []
    for hole in holes:
        inner_list = findHoleNodes(hole, gt_nodes_list, img, nodes_inner_list)
        nodes_inner_list.extend(inner_list)
    for node in gt_nodes_list:
        if node in nodes_inner_list:
            continue
        else:
            nodes_outer_list.append(node)
    # img = cv2.drawContours(img, holes, -1, (255, 0, 0), 3)
    if label == True:
        for outer_node in nodes_outer_list:
            cv2.circle(img, outer_node, radius = 10, color = color_outer, thickness = 4)  #Add the node result
        for inner_node in nodes_inner_list:
            cv2.circle(img, inner_node, radius = 10, color = color_inner, thickness = 4)  #Add the node result
    # Print the results
    print("CT Results: Num_Contours=%d , Num_Holes=%d, Num_InnerNodes=%d, Num_OuterNodes=%d" % (len(contours),len(holes),len(nodes_inner_list),len(nodes_outer_list)))
    return nodes_inner_list, nodes_outer_list, img

### Drawing functions

In [281]:
#Draw all the nodes
def drawAllNodes(img,all_nodes,radiu,color = color_of_nodes,thickness=1,lineType=0,shift=0):
    for i in range (0,int(len(all_nodes))): 
        cv2.circle(img, all_nodes[i], radiu ,color,thickness,lineType,shift)  #Add the node result
        
    return img

#Draw all the edges
def drawAllEdges(img,all_nodes,all_edges,color = color_of_edges,thickness=3,lineType=8,shift=0):
    for i in range (0,int(len(all_edges))):
        cv2.line(img, all_nodes[all_edges[i][0]], all_nodes[all_edges[i][1]],color,thickness, lineType, shift)
        
    return img

### Load ground truth and generate jpg with nodes in or not in contours functions

In [282]:
def load_ground_truth_data(ground_truth_fpath):
    '''
    Load ground truth file and generate the jpg file for finding holes
    Args: ground_truth_fpath, target_fpath
    Returns: the nodes position list and jpg file name
    '''
    data_list = [] # The data list
    num_nodes = int
    all_nodes = []
    node_x = []
    all_edges = []
    node_y = []
    data_f = open(ground_truth_fpath, 'r')
    for line in data_f.readlines():
        data_list.append(line.strip())
    num_nodes = int (data_list.pop(0)) #Pop out the number of nodes 
    
    #Get all the number node coordinate
    for i in range (0,num_nodes):
        node = data_list.pop(0) #Pop out the node coordinate
        node = node.split()
        node.pop(0) #Pop out the node index  
        node_x.append(node[0])
        node_y.append(node[1])
    #Change it to list    
    node_x = list(map(float, node_x))
    node_y = list(map(float, node_y))
    
    #Find the minimun value in x and y list
    min_x = min(node_x)
    min_y = min(node_y)
    #If the coorodinate of nodes are nagative, change it to be positive
    if min_x or min_y < 0:
        for i in range (0,num_nodes):
            node_x[i] = node_x[i] + min_x * -1
            node_y[i] = node_y[i] + min_y * -1
    #Find the maximun value in x and y list        
    max_x = max(node_x)
    max_y = max(node_y)
    
    #Origin coordinates
    xc = 0
    yc = 0
    
    #Calulate the canvas size by the node numbers and radius of node
    canvas_size = (4 * num_nodes)
    #Calulate the ratio for enlarge the coorodinate of node that fitting the canvas size
    node_ratio = (min((canvas_size / max_x), (canvas_size / max_y)))
    
    for i in range (0,num_nodes):
        x = int(xc + node_ratio * (node_x[i] - xc))
        y = int(yc + node_ratio * (node_y[i] - yc))
        all_nodes.append((x, y)) #(X, Y) 
    
    num_edges = int (data_list.pop(0)) #Pop out the number of edges 
    #Get all the edge
    for i in range (0,num_edges): 
        edge = data_list.pop(0) #Pop out the edge connection
        edge = edge.split()
        all_edges.append([int(edge[0]),int(edge[1])]) #(point 1, point 2)
    data_f.close

    area_min_radio = 5000 / 4000
    area_min = area_min_radio * canvas_size
    
    return all_nodes, all_edges, num_nodes, canvas_size, area_min

In [283]:
def generateJPG_forGT(fname, gt_folder_name, gt_jpg_folder_name):
    '''
    Generate jpg files for ground truth nodes
    Args:
        fname: ground truth file name
        gt_folder_name: ground truth folder name
        gt_jpg_folder_name: the folder name of ground truth jpg file
    Returns: 
        jpg_gt_name: ground truth jpg path, 
        all_nodes: all nodes of ground truth  
        area_min: the minimum area to sort contours
    '''
    time_start = time.time()
    all_nodes = []
    all_edges = []
    #Read data 
    all_nodes, all_edges, num_nodes, canvas_size, area_min = load_ground_truth_data(fname) 
    
    results_img = np.full((canvas_size, canvas_size, 3), 255 ,np.uint8) #create a img

    #Draw all the edge
    drawAllEdges(results_img,all_nodes,all_edges,color_of_edges,lineType= cv2.LINE_AA)

    #Draw all the node
    drawAllNodes(results_img,all_nodes,1,color_of_nodes,lineType= cv2.LINE_AA)
    
    jpg_gt_name = fname.replace(gt_folder_name, gt_jpg_folder_name) + ".jpg"
    cv2.imwrite(jpg_gt_name, results_img)
    time_end = time.time()
    print('It cost %f seconds' % (time_end - time_start))
    print ("DONE.")
    return jpg_gt_name, all_nodes, area_min

### Batch process

In [284]:
def add_GT_files(data_path, gt_folder_name, type_folder_name, node_list, degree_list):
    '''
    
    Args: 
        data_path: Data path
        gt_folder_name: Ground truth folder name
        type_folder_name: Type folder name
        node_list: The number of nodes we need to process with
        degree_list: The degree we need to process with
    Returns: the list of files
    '''
    # data_path = "../../Data/ground-truth/"
    flist = []
    for n in node_list:
        for d in degree_list:
            fname = "n=" + str(n) + "d=" + str(d)
            flist.append(os.path.join(data_path,gt_folder_name,type_folder_name, fname))
    return flist

In [285]:
def read_gt_files(data_path, gt_folder_name, gt_jpg_folder_name, type_folder_name, node_list, degree_list):
    '''
    1.Traverse the ground truth files and append the files path to the list
    2.Read the ground truth files 
    Args: 
        data_path: Data path
        gt_folder_name: Ground truth folder name
        type_folder_name: Type folder name
        node_list: The number of nodes we need to process with
        degree_list: The degree we need to process with
    Returns: 
        gt_nodes_dict: key=jpg_gt_path, value=[all_nodes, area_min]
    '''
    # Add all required 
    gt_list = []
    for n in node_list:
        for d in degree_list:
            fname = "n=" + str(n) + "d=" + str(d)
            gt_list.append(os.path.join(data_path,gt_folder_name,type_folder_name, fname))

    # Read the ground truth files 
    gt_nodes_dict = {}
    for gt_path in gt_list:
        # jpg_gt_path = gt_path.replace(gt_folder_name, gt_jpg_folder_name) + ".jpg"
        # if os.path.exists(jpg_gt_path):
        #     print("File " + jpg_gt_path + " exist!")
        #     continue
        jpg_gt_name, all_nodes, area_min = generateJPG_forGT(gt_path, gt_folder_name, gt_jpg_folder_name)    
        gt_nodes_dict[jpg_gt_name] = [all_nodes,area_min]
    return gt_nodes_dict
        

### Computation for GT_GN and GT_LN by using CT on jpg_file

In [286]:
def get_all_gt_inner_outer(gt_nodes_dict):
    '''
    Get the nodes in or not in the contours whose area is smaller than area_min from ground truth nodes dictionary 
    Returns: dictionary key=gt jpg path, value=[gt inner, gt outer, area_min]
    '''
    inner_outer_dict = {}
    for fpath, nodes in gt_nodes_dict.items():
        area_min = nodes[1]
        inner,outer,img = gt_nodes_ct_contours(fpath, nodes[0], area_min)
        cv2.imwrite(fpath.replace("jpg_gt_files","jpg_gt_files_ct"),img)
        inner_outer_dict[fpath] = [inner,outer,area_min]
    return inner_outer_dict

In [287]:
def gt_path2fd_jpg_path(inner_outer_dict,fd_list):
    '''
    Convert the file name of ground truth jpg file to fd algorithm file
    Returns: dictionary key=fd path, value=[gt inner, gt outer, area_min]
    '''
    result_dict = {}
    for fpath, nodes in inner_outer_dict.items():
        for fd in fd_list:
            fnlist = fpath.split("/")
            file_dir = os.path.join(fnlist[0],fnlist[0],fnlist[1],"jpg_files", fnlist[4], fd)
            for root, dirs, files in os.walk(file_dir): 
                for file in files: 
                    if fnlist[5].replace(".jpg","") in os.path.splitext(file)[0]: 
                        result_dict[os.path.join(root,file)] = nodes
    return result_dict

In [292]:
def get_all_TP_TN_FP_FN(inner_outer_dict, fd_list):
    '''
    GT_GN：利用GroundTruth生成的Graph中在大于某个面积的洞的点
    GT_LN：利用GroundTruth生成的Graph中不在大于某个面积的洞的点
    TP（真阳性）= GT_GN in FD-CT/FD-CCL contours
    TN（真阴性）= GT_LN not in FD-CT/FD-CCL contours
    FP（假阳性）= GT_LN in FD-CT/FD-CCL contours
    FN（假阴性）= GT_GN not in FD-CT/FD-CCL contours
    Get TP TN FP FN based on the fd jpg path, inner and outer nodes of ground truth graph
    Args: gt_gn_ln_dict[jpg_fd_fpath] = [gt_gn, gt_ln]
    Returns: dict key=fd_path, value=[TP,TN,FP,FN]
    '''
    nodes_dict = {}
    TPR_dict = {}
    TNR_dict = {}
    PPV_dict = {} #
    NPV_dict = {} # Negative Precision Value
    # Create fd jpg path based on the ground truth path
    fd_files_dict = gt_path2fd_jpg_path(inner_outer_dict, fd_list)
    for fd_path, inner_outer in fd_files_dict.items():
            print("")
            print("================== Start processing: " + fd_path + " area_min: " + str(inner_outer[2]) + "=======================")
            print("================== TP FP =================================================")
            # for inner nodes of GT if in or not in FD graph 
            gn_nodes_inner_fd, gn_nodes_outer_fd, painted_gn_img = gt_nodes_ct_contours(fd_path, inner_outer[0], inner_outer[2])
            print("================== FN TN =================================================")
            # for outer nodes of GT if in or not in FD graph
            ln_nodes_inner_fd, ln_nodes_outer_fd, painted_ln_img = gt_nodes_ct_contours(fd_path, inner_outer[1], inner_outer[2])
            print("================== Finish processing: "+ fd_path +" ======================")
            TP,FP,FN,TN = len(gn_nodes_inner_fd),len(gn_nodes_outer_fd),len(ln_nodes_inner_fd),len(ln_nodes_outer_fd)
            nodes_dict[fd_path] = [gn_nodes_inner_fd, gn_nodes_outer_fd, ln_nodes_inner_fd, ln_nodes_outer_fd]
            TPR_dict[fd_path] = [TP/(TP+FN)]
            TNR_dict[fd_path] = [TN/(TN+FP)]
            PPV_dict[fd_path] = [TP/(TP+FP)]
            NPV_dict[fd_path] = [TN/(TN+FN)]
    return nodes_dict, TPR_dict, TNR_dict, PPV_dict, NPV_dict


In [293]:
# batch test
node_list = [500, 1000, 2000, 3000]
degree_list = [6]
fd_list = ["FR","FA2","JIGGLE","KK"]

# test get all the files' path with all nodes through nodes number and degrees
gt_nodes_dict = read_gt_files(data_path, gt_folder_name, gt_jpg_folder_name, sparse_folder, node_list, degree_list)

# test gt2fd detect inner and outer nodes of GT graph
gt_inner_outer_dict = get_all_gt_inner_outer(gt_nodes_dict)

# test get TP TN FP FN
nodes_dict, TPR_dict, TNR_dict, PPV_dict, NPV_dict =  get_all_TP_TN_FP_FN(gt_inner_outer_dict, fd_list)



It cost 0.173782 seconds
DONE.
It cost 0.345459 seconds
DONE.
It cost 1.129563 seconds
DONE.
It cost 2.297659 seconds
DONE.
CT Results: Num_Contours=1305 , Num_Holes=131, Num_InnerNodes=353, Num_OuterNodes=147
CT Results: Num_Contours=2981 , Num_Holes=267, Num_InnerNodes=700, Num_OuterNodes=300
CT Results: Num_Contours=6717 , Num_Holes=496, Num_InnerNodes=1392, Num_OuterNodes=608
CT Results: Num_Contours=10674 , Num_Holes=816, Num_InnerNodes=2145, Num_OuterNodes=855

CT Results: Num_Contours=1016 , Num_Holes=103, Num_InnerNodes=307, Num_OuterNodes=46
CT Results: Num_Contours=1016 , Num_Holes=103, Num_InnerNodes=141, Num_OuterNodes=6

CT Results: Num_Contours=686 , Num_Holes=40, Num_InnerNodes=326, Num_OuterNodes=27
CT Results: Num_Contours=686 , Num_Holes=40, Num_InnerNodes=130, Num_OuterNodes=17

CT Results: Num_Contours=972 , Num_Holes=42, Num_InnerNodes=327, Num_OuterNodes=26
CT Results: Num_Contours=972 , Num_Holes=42, Num_InnerNodes=133, Num_OuterNodes=14

CT Results: Num_Contours

In [296]:
len(nodes_dict)

16

In [None]:
# def gt_nodes_ccl_contours(fd_file_path, gt_nodes_list, canvas_size):
#     '''
#     Calculate the number of the nodes in or not in the contours > area_min
#     Args: 
#         jpg_file_path: the jpg file generated by fd algorithm
#         gt_nodes_list: ground truth nodes list
#         canvas_size: the size of the jpg file generated by fd algorithm
#     Returns: nodes_dict[file name] = the ground truth nodes' coordinates
#     '''
#     # Load the image
#     img = cv2.imread(fd_file_path, cv2.IMREAD_GRAYSCALE)
#     img = fill_color_demo(img)
#     # Apply binary thresholding to create a binary image
#     thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[1]

#     # Apply connected component labeling to find the contours
#     num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, connectivity=8)

#     area_min_radio = 5000 / 4000
#     area_min = area_min_radio * canvas_size
#     # Find the contours that correspond to the foreground
#     contours = []
#     for i in range(1, num_labels):
#         x, y, w, h, area = stats[i]
#         if area > area_min:
#             contours.append(cv2.findContours((labels == i).astype('uint8'), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0])

#     # Count the number of nodes lying on the contours
#     num_nodes_on_contours = 0
#     for node in gt_nodes_list:
#         for contour in contours:
#             dist = cv2.pointPolygonTest(contour, node, False)
#             if dist >= 0:
#                 num_nodes_on_contours += 1
#                 break


#     # Print the results
#     print("CCL Results: Num_Contours=%d , Num_Holes=%d, Num_InnerNodes=%d, Num_OuterNodes=%d" % (len(contours),len(holes),len(nodes_inner_list),len(nodes_outer_list)))


In [None]:
# import cv2
# import numpy as np

# # Load the image and convert it to grayscale
# img = cv2.imread(file_path)
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# # Threshold the image to create a binary image
# ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

# # Use connected component labeling to find the contours
# num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, connectivity=8)

# # Loop through each label (excluding the background label 0)
# num_nodes_in_contours = 0
# for i in range(1, num_labels):
#     # Create a mask for the current label
#     mask = np.zeros(thresh.shape, dtype=np.uint8)
#     mask[labels == i] = 255
    
#     # Find the contour for the current label
#     contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
#     # Check if any of the nodes in the given list are lying on the contour
#     for node in ground_truth_nodes:
#         distance = cv2.pointPolygonTest(contours[0], node, measureDist=True)
#         if distance >= 0:
#             num_nodes_in_contours += 1

# output = img
# for i in range(1, num_labels):
#     # Area of the component
#     area = stats[i, cv2.CC_STAT_AREA] 
# #     if area > 10000:
# #         print(str(i) + " area is " + str(area))
#     mask = labels == i
#     output[:, :, 0][mask] = 255
#     output[:, :, 1][mask] = 0
#     output[:, :, 2][mask] = 0
# cv2.imwrite("CCL_output.jpg",output)

# # Print the results
# print("Number of contours found: ", num_labels)
# print("Number of nodes lying in the contours: ", num_nodes_in_contours)