In [None]:
import numpy as np
import h5py
import seaborn
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
import multiprocessing as mp
import glob
import sys
from functools import partial
from sklearn.cluster import DBSCAN
import matplotlib as mpl
import collections
from collections import defaultdict

In [None]:
def Edge_Filter(image,window_size=5,threshold=99):
    '''This function aims at finding the PIL using a transformed image.
    Basically, for each point in the image, we look at its neighborhood, with the point being the center a window of size
    (window_size*window_size), and take the corresponding maximum and minimum out of it, and then take the difference, select the
    points with the largest contrast, and whose window spans both positive and negative polars'''
    image = np.nan_to_num(image)
    
    height = image.shape[0]
    width = image.shape[1]
    
    pos_weight = []
    neg_weight = []
    
    for i in range(height):
        for j in range(width):
            # Firstly, make the polarity of each pixel right

            pixel = image[i,j]

            if pixel>(1e-6):
                pos_weight.append(np.abs(pixel))
            elif pixel<-(1e-6):
                neg_weight.append(np.abs(pixel))

    pos_weight, neg_weight = np.array(pos_weight), np.array(neg_weight)
    pos_threshold = np.percentile(pos_weight, q=threshold)
    neg_threshold = np.percentile(neg_weight, q=threshold)
    
    
    
    pad_size = int((window_size-1)/2)
    image_padded = np.pad(image,((pad_size,pad_size),(pad_size,pad_size)),'constant')
    
    image_max = np.zeros((height,width))
    image_min = np.zeros((height,width))
    mask_image = np.zeros((height,width))
    
    
    for i in range(height):
        for j in range(width):
            x = i+pad_size
            y = j+pad_size
            
            window = image_padded[(x-pad_size):(x+pad_size+1),(y-pad_size):(y+pad_size+1)] # a sub-image
            maximum = np.amax(window)
            minimum = np.amin(window)
            
            image_max[i,j] = maximum
            image_min[i,j] = minimum
            if maximum>pos_threshold and minimum<-(neg_threshold):
                mask_image[i,j] = image[i,j]
                
    return mask_image


In [None]:
def Edge_Detection(image,threshold=99.5,image_threshold=97,radius=15,show=True,select=3,imagename='Nil'):
    '''This function uses the skimage edge detection filters to find the PIL'''
    image = Edge_Filter(image,threshold=image_threshold)
    edge = scharr(image)
    pixel_coor = []
    pixel_weight = []
    threshold = np.percentile(edge,threshold)
    
    for i in range(edge.shape[0]):
        for j in range(edge.shape[1]):
            if edge[i,j]<threshold:
                edge[i,j]=0
            else:
                pixel_coor.append(np.array([i,j]))
                pixel_weight.append(edge[i,j]) # the edge detection magnitude
    
    pixel_coor, pixel_weight = np.array(pixel_coor), np.array(pixel_weight)
    # now we have the key pixels of interest stored in pixel_coor and pixel_weight
    # now we do the clustering
    clust = DBSCAN(eps=radius,min_samples=5)
    thecluster = clust.fit_predict(X=pixel_coor)
    
    # now we are going to select the top few clusters with the largest sum of xy-gradient
    N = len(set(thecluster))
    cluster_coor = defaultdict(list)
    cluster_gradient = defaultdict(list)
    whole_data = list(zip(pixel_coor,pixel_weight,thecluster))
    new_data = []
    
    if (-1) in thecluster:
        N = N-1
        for item in whole_data:
            if item[2]!=-1:
                new_data.append(item)
    
    for c in range(N):
        cluster_coor[c] = [item[0] for item in new_data if item[2]==c]
        cluster_gradient[c] = [item[1] for item in new_data if item[2]==c]
        
    cluster_sum = [sum(cluster_gradient[c]) for c in cluster_gradient]
    if len(cluster_sum)>select:
        top = np.argsort(cluster_sum)[-(select):] 
    else:
        top = list(range(len(cluster_sum)))
        
    PIL_pixel = []
    
    for c in top:
        coor = cluster_coor[c]
        for point in coor:
            PIL_pixel.append(point)
    
    PIL_pixel = np.array(PIL_pixel)
                
    if show==True:
        N = len(set(thecluster))
        cmap = plt.cm.jet
        cmaplist = [cmap(i) for i in range(cmap.N)]
        cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N)
        bounds = np.linspace(-1,N,N+2)
        norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
        
        fig = plt.subplots(nrows=1, ncols=2)

        plt.subplot(1, 2, 1)
        seaborn.heatmap(image, center=0, cbar=False,xticklabels=False, yticklabels=False)
            
        plt.subplot(1, 2, 2)
        plt.scatter(pixel_coor[:, 1], pixel_coor[:, 0], c=thecluster,cmap=cmap, marker='o', s=0.5)
        plt.xlim(0, image.shape[1])
        plt.ylim(0, image.shape[0])
        plt.gca().invert_yaxis()
        plt.tick_params(labelbottom='off')
        plt.tick_params(labelleft='off')

        plt.show()
    elif show=='select':
        fig = plt.subplots(nrows=1, ncols=2)

        plt.subplot(1, 2, 1)
        seaborn.heatmap(image, center=0, cbar=False,xticklabels=False, yticklabels=False)
            
        plt.subplot(1, 2, 2)
        plt.scatter(PIL_pixel[:, 1], PIL_pixel[:, 0], color='red',marker='o', s=0.05)
        plt.xlim(0, image.shape[1])
        plt.ylim(0, image.shape[0])
        plt.gca().invert_yaxis()
        plt.tick_params(labelbottom='off')
        plt.tick_params(labelleft='off')
        plt.savefig(imagename+'_s_f'+str(select)+'.pdf')
        plt.show()
    else:
        return PIL_pixel

In [None]:
class PIL:
    '''This class creates a PIL detector which can be called for fitting PIL on any Bz image or an entire hdf5 file.
        
       Summary on the class "PIL":
       
       For each image of the vertical component of the magnetic field, namely the image of "Bz", we want to find the 
       contour of Bz==0 that separates the strong positive magnetic polars and the associated negative polars, which 
       is called the Polarity Inversion Line(PIL). Physicists are interested in the local magnetic field features 
       around the PIL.
       
       In a pixelized image, the pixels that consist of the PIL should be those pixels that have a large Bz gradient in 
       its neighborhood. And finding these points is analogous to the edge detection task in the image processing field.
       So by applying an edge detection filter upon each Bz image, we could find the candidate pixels of the PIL. And by 
       further applying a clustering algorithm on the candidate pixels, we are able to locate several PIL segments in the
       image. Finally, one could retain, as many as one wanted, the PILs with large average gradients for future purposes.
       
       The class PIL is a parameterized PIL detector that combines the data preparation, edge detection, clustering and 
       parallelized training, and visualization procedures. It can be applied to a piece of image in numpy ndarray form, 
       and even an entire hdf5 file with rather fast implementation.
    
    '''
    
    def __init__(self,edge_retain=0.005,polar_threshold=0.97,radius=15):
        '''Initialize the PIL detector
        
           Params:
           edge_retain: The fraction of candidate PIL pixels of the input image.
                        What fraction of the pixels of the whole input image can be considered as a candidate for PIL. 
                        The larger the amount is, the more pixels there will be for the PIL. The default amount is 0.005
                        which corresponds to 0.5% of all pixels in the input image. These pixels are the pixels with the 
                        largest local gradient. By specifying the edge_retain to be too large, we may include many irrelevant
                        pixels when drawing the PIL. By setting the parameter to be too small, we may lose some pixels on
                        the true PIL. The best way to tune the parameter is to use the .visualize() method to check visually. 
                        Generally speaking, each image won't have more than 1% of its pixels as PIL.
                        
           polar_threshold: The strong polar threshold.
                            The quantile of the magnitude of the vertical component above which a pixel is considered as 
                            a strong polar. Basically, each image is decomposed into a positive image and a complementing
                            negative image. In a positive image, all negative image pixels are coerced to 0, and vice versa.
                            In the positive image, for instance, all pixels' values are ranked from the smallest to the 
                            largest, and all the pixels that are ranking at the quantile no greater than polar_threshold are
                            further coerced to 0. The same procedure applies to the negative image. And the positive and 
                            negative image are put back together as a sparser image, upon which the edge detector will be
                            applied. Setting the parameter too small can lead to many weaker polars being considered when
                            drawing the PIL.
                            
           radius: The maximum pixel distance between two pixels that make the pixels being considered as on the same PIL. 
                   The larger the quantity, the less PIL segments will be drawn in the image, but maybe two originally 
                   separated PILs will be considered mistakenly as a single PIL.
                        
        '''
        
        self.edge_retain = edge_retain
        self.polar_threshold = polar_threshold
        self.radius = radius
    
    def Polar_Filter(self,image,window_size=5):
        '''Data preparation step:
        
        '''