In [13]:
import cv2
import numpy as np
from pandas import Series, DataFrame
import json

In [1]:
###########################
### Spectral Analysis
###########################

try:
    HISTOGRAM_BITS_PER_COLOR
except:
    HISTOGRAM_BITS_PER_COLOR = 3
    
NUM_BINS = 1 << HISTOGRAM_BITS_PER_COLOR
COLOR_SPECTRUM = 2 ** (3 * HISTOGRAM_BITS_PER_COLOR) + 1

ADAPTIVE_HISTOGRAM_THRESHOLD = True
HISTOGRAM_SUGGESTED_THRESHOLD = 0
HISTOGRAM_COUNT_THRESHOLD = HISTOGRAM_SUGGESTED_THRESHOLD

if HISTOGRAM_BITS_PER_COLOR == 1:
    HISTOGRAM_CEILING_THRESHOLD = 1500
    HISTOGRAM_FLOOR_THRESHOLD = 75
elif HISTOGRAM_BITS_PER_COLOR == 2:
    HISTOGRAM_CEILING_THRESHOLD = 1500
    HISTOGRAM_FLOOR_THRESHOLD = 50
else:
    HISTOGRAM_CEILING_THRESHOLD = 1500
    HISTOGRAM_FLOOR_THRESHOLD = 15

SHOW_DUPES = False
DEDUPE_REGIONS = True
MIN_DEDUPE_REGIONS = 5
DDUPE_ACCURACY_BY = 1

REGION_MERGE_PIXEL_DISTANCE = 20
MIN_REGION_SCORE = 0

# Private Constants
AVG_IMAGE_PIXEL = 0

class BinStats:
    spacing = 0
    min = 0
    max = 0
    
class Bin:
    r = BinStats()
    g = BinStats()
    b = BinStats()
    
def InitBin(image):
    global REGION_MERGE_PIXEL_DISTANCE
    height, width, depth = image.shape
    
    print 'InitBin:'
    print "image height=%i,width=%i,depth=%i" % (height,width,depth)
    
    if height<1280 and width<1900:
        REGION_MERGE_PIXEL_DISTANCE = 10
        
    bin = Bin()    
    r = image[:,:,0]
    bin.r.min = np.min(r)
    bin.r.max = np.max(r)
    bin.r.spacing = 1 + (bin.r.max - bin.r.min)/NUM_BINS
    print "min_r=%i, max_r=%i, bin_spacing.r=%i" % (bin.r.min, bin.r.max, bin.r.spacing)
    
    g = image[:,:,1]
    bin.g.min = np.min(g)
    bin.g.max = np.max(g)
    bin.g.spacing = 1 + (bin.g.max - bin.g.min)/NUM_BINS
    print "min_g=%i, max_g=%i, bin_spacing.g=%i" % (bin.g.min, bin.g.max, bin.g.spacing)
    
    b = image[:,:,2]
    bin.b.min = np.min(b)
    bin.b.max = np.max(b)
    bin.b.spacing = 1 + (bin.b.max - bin.b.min)/NUM_BINS
    print "min_b=%i, max_b=%i, bin_spacing.b=%i" % (bin.b.min, bin.b.max, bin.b.spacing)
    
    print "\nREGION_MERGE_PIXEL_DISTANCE = %i" % REGION_MERGE_PIXEL_DISTANCE
    
    return bin
    
def InitLUT(bin):
    r_lut = np.zeros(256)
    g_lut = np.zeros(256)
    b_lut = np.zeros(256)
    
    for i in range(256):
        r_lut[i] = (i - bin.r.min) / bin.r.spacing
        if r_lut[i] >= NUM_BINS:
            r_lut[i] = NUM_BINS - 1
            
        g_lut[i] = (i - bin.g.min) / bin.g.spacing
        if g_lut[i] >= NUM_BINS:
            g_lut[i] = NUM_BINS - 1
            
        b_lut[i] = (i - bin.b.min) / bin.b.spacing
        if b_lut[i] >= NUM_BINS:
            b_lut[i] = NUM_BINS - 1
    
#    print r_lut
#    print g_lut
#    print b_lut
    
    return r_lut,g_lut,b_lut

def quantize(image, lut):
    global AVG_IMAGE_PIXEL
    height,width,depth = image.shape
    
    r_lut,g_lut,b_lut = lut
    
    qimage = np.zeros((height,width,depth),dtype='uint8')
    qimage[:,:,0] = r_lut[image[:,:,0]]
    qimage[:,:,1] = g_lut[image[:,:,1]]
    qimage[:,:,2] = b_lut[image[:,:,2]]
        
#    for h in range(height):
#        for w in range(width):
#            qimage_r[h][w] = r_lut[r[h][w]]
#            qimage_g[h][w] = g_lut[g[h][w]]
#            qimage_b[h][w] = b_lut[b[h][w]]

    nz_r = qimage[:,:,0][qimage[:,:,0]>0]
    nz_g = qimage[:,:,1][qimage[:,:,1]>0]
    nz_b = qimage[:,:,2][qimage[:,:,2]>0]
    AVG_IMAGE_PIXEL = BuildPixel((int(nz_r.mean()+0.5),
                                  int(nz_g.mean()+0.5),
                                  int(nz_b.mean()+0.5)))
    
    print 'Average Image Pixel: %i' % AVG_IMAGE_PIXEL
#     print 'len(nz_r): %i, nz_r.mean: %i' % (len(nz_r),nz_r.mean()+0.5)
#     print 'len(nz_g): %i, nz_g.mean: %i' % (len(nz_g),nz_g.mean()+0.5)
#     print 'len(nz_b): %i, nz_b.mean: %i' % (len(nz_b),nz_b.mean()+0.5)
    
    return qimage

def is_zero_pixel(pixel):
    r,g,b = pixel
    
    return r == 0 and g == 0 and b == 0

def unquantize(qimage, bin):
    height,width,depth = qimage.shape
    
    dimage = np.zeros((height,width,depth),dtype='uint8')
    dimage[:,:,0] = qimage[:,:,0]*bin.r.spacing + bin.r.min
    dimage[:,:,1] = qimage[:,:,1]*bin.g.spacing + bin.g.min
    dimage[:,:,2] = qimage[:,:,2]*bin.b.spacing + bin.b.min

# Old way is super slow, in the order of several seconds
#    for h in range(height):
#        for w in range(width):
#            if is_zero_pixel(qimage[h][w]):
#                dimage[h][w] = qimage[h][w]
#            else:
#                dimage_r[h][w] = qimage_r[h][w] * bin.r.spacing + bin.r.min
#                dimage_g[h][w] = qimage_g[h][w] * bin.g.spacing + bin.g.min
#                dimage_b[h][w] = qimage_b[h][w] * bin.b.spacing + bin.b.min
    
    return dimage

def BuildPixel(rgb):
    r,g,b = rgb
    
    return (r << 2*HISTOGRAM_BITS_PER_COLOR) + (g << HISTOGRAM_BITS_PER_COLOR) + b

SyntaxError: invalid syntax (<ipython-input-1-27b218b773e5>, line 126)

In [15]:
########################
### Spatial Analysis
########################

MAX_REGIONS = 4000
REGION_UNKNOWN = -2
REGION_NONE = -1

MAX_REGION_AREA = 0.25
MIN_REGION_AREA = 0.04
MAX_REGION_SIZE = 0.5
MIN_REGION_SIZE = 0.2

try:
    MIN_REGION_SCORE
except NameError:
    MIN_REGION_SCORE = -1

class ARegion:
    def __init__(self):
        self.minx = 0
        self.maxx = 0
        self.miny = 0
        self.maxy = 0
        self.score = 0
        self.size = 0
        self.avg_pixel = 0 # avg foreground pixel values (below the threshold)
        self.active = True # True==in-the-running and False==filtered-out-due-to-constraint
        self.rarity = 0 # region rarity as compared to the average image pixel
        self.fg_rarity = 0 # foreground rarity as compared to the average image pixel
        self.priority = 1 # 1==very important, 2==might be important, 3==possibly noise
    
class Regions:
    def __init__(self, qimage):
        self.height,self.width,self.depth = qimage.shape

        self.num_regions = 0
        self.catalog = []
        
        self.data = np.zeros((self.height, self.width),dtype='int')
#        for h in range(self.height):
#            for w in range(self.width):
#                self.data[h,w] = REGION_UNKNOWN

        self.data = self.data.reshape(self.height*self.width)
        self.data = self.data + REGION_UNKNOWN
        self.data = self.data.reshape(self.height, self.width)
        
        hist = self._BuildHistogram(qimage)
        mask = self._HistogramThreshold(qimage, hist)
        print 'Histogram Count Threshold: %i' % HISTOGRAM_COUNT_THRESHOLD

        self._AssignRegions(mask)
        self._ScoreRegions(qimage, hist)

    def _BuildHistogram(self, qimage):
        hist = np.zeros(COLOR_SPECTRUM)

        r = qimage[:,:,0]
        g = qimage[:,:,1]
        b = qimage[:,:,2]
        pixel= BuildPixel((r,g,b))

        flat = pixel.reshape(pixel.shape[0]*pixel.shape[1])
        hist = Series(flat).value_counts()
        hist[hist > 10000] = 10000

        # Fill in the missing indexes with zero values
        for i in range(COLOR_SPECTRUM):
            try:
                hist[i]
            except:
                hist[i] = 0

        hist = hist.sort_index(axis=0)

    #    for h in range(height):
    #        for w in range(width):
    #            rgb = qimage[h,w]
    #            pixel = BuildPixel(rgb)
    #            hist[pixel] += 1
    #            hist[pixel] = min(hist[pixel], 10000)

        return hist.values

    def _HistogramThreshold(self, qimage, hist):
        global HISTOGRAM_COUNT_THRESHOLD

        max_threshold = HISTOGRAM_SUGGESTED_THRESHOLD
        if ADAPTIVE_HISTOGRAM_THRESHOLD:
            max_threshold = max(min(min(hist[np.nonzero(hist)]) / 100, HISTOGRAM_CEILING_THRESHOLD/100) * 100, HISTOGRAM_FLOOR_THRESHOLD)

        threshold = min(hist, key=lambda x:int(abs(x-max_threshold))) + 1
        HISTOGRAM_COUNT_THRESHOLD = threshold

        mask = np.zeros((self.height,self.width,self.depth),dtype='uint8')
    #    mask_r = mask[:,:,0]
    #    mask_g = mask[:,:,1]
    #    mask_b = mask[:,:,2]

    #    for h in range(height):
    #        for w in range(width):
    #            pixel = BuildPixel(qimage[h,w])
    #            if hist[pixel] > HISTOGRAM_COUNT_THRESHOLD:
    #                mask_r[h,w] = mask_g[h,w] = mask_b[h,w] = 0
    #            else:
    #                mask[h,w] = qimage[h,w]

        # Use the super-cool-and-fast pandas DataFrame to subtract out the background pixels
        r = qimage[:,:,0]
        g = qimage[:,:,1]
        b = qimage[:,:,2]
        pixel= BuildPixel((r,g,b))
        flat_pixel = pixel.reshape(pixel.shape[0]*pixel.shape[1])

        isBackground = hist[flat_pixel] > HISTOGRAM_COUNT_THRESHOLD

        flat_r = qimage[:,:,0].reshape(self.height*self.width)
        flat_g = qimage[:,:,1].reshape(self.height*self.width)
        flat_b = qimage[:,:,2].reshape(self.height*self.width)

        df = DataFrame({'pixel':flat_pixel,'r':flat_r,'g':flat_g,'b':flat_b,'is_background':isBackground})

        tt = df['pixel'].groupby(df['is_background'])
        if True in tt.indices:
            background_ix = tt.indices[True]
            df.ix[background_ix] = 0

        mask[:,:,0] = df['r'].values.reshape(self.height,self.width)
        mask[:,:,1] = df['g'].values.reshape(self.height,self.width)
        mask[:,:,2] = df['b'].values.reshape(self.height,self.width)

        return mask

    def _FindRegion(self, w, h):
        region_merge = REGION_MERGE_PIXEL_DISTANCE #regions.width / 100

        for yofs in range(-1*region_merge, 1):
            for xofs in range(-1*region_merge, region_merge+1):
                if yofs+h < 0:
                    continue
                if xofs+w < 0:
                    continue
                if xofs+w >= self.width:
                    continue

                if self.data[yofs+h][xofs+w] >= 0:
                    return self.data[yofs+h][xofs+w]

        for xofs in range(-1*region_merge, 0):
            if xofs+w  < 0:
                continue
            if self.data[h][xofs+w] >= 0:
                return self.data[h][xofs+w]

        return REGION_NONE

    def _AssignRegions(self, mask):
        r = mask[:,:,0]
        g = mask[:,:,1]
        b = mask[:,:,2]
        pixel = BuildPixel((r,g,b))
        nz_pixels = np.nonzero(pixel)

        # Initialize region matrix for all empty pixels with REGION_NONE
        pixel = pixel.reshape(self.height * self.width)
        is_empty_pixel = pixel == 0
        flat_data = self.data.reshape(self.height * self.width)
        df = DataFrame({'data':flat_data, 'is_empty':is_empty_pixel})
        tt = df['data'].groupby(df['is_empty'])
        if True in tt.indices:
            background_ix = tt.indices[True]
            df.ix[background_ix] = REGION_NONE
        self.data = df['data'].values.reshape(self.height,self.width)

        # for all the foreground pixels (non-zero) assign a region
        for i in range(len(nz_pixels[0])):
            h = nz_pixels[0][i]
            w = nz_pixels[1][i]

            r = self._FindRegion(w, h)
            if r == REGION_NONE:
                r = self.num_regions
                self.num_regions += 1

                aregion = ARegion()
                aregion.minx = w
                aregion.maxx = w
                aregion.miny = h
                aregion.maxy = h
                aregion.size = 1

                self.catalog.append(aregion)
            else:
                self.catalog[r].minx = min(self.catalog[r].minx, w)
                self.catalog[r].miny = min(self.catalog[r].miny, h)
                self.catalog[r].maxx = max(self.catalog[r].maxx, w)
                self.catalog[r].maxy = max(self.catalog[r].maxy, h)
                self.catalog[r].size = (1 + self.catalog[r].maxx - self.catalog[r].minx) * (1 + self.catalog[r].maxy - self.catalog[r].miny)

            self.data[h,w] = r

    def RemoveRegionAt(self, index):
        self.catalog[index].minx = 0
        self.catalog[index].miny = 0
        self.catalog[index].maxx = 0
        self.catalog[index].maxy = 0

    def IsRegionTooLargeAt(self, index):
        xlen = self.catalog[index].maxx - self.catalog[index].minx
        ylen = self.catalog[index].maxy - self.catalog[index].miny

        return (xlen*ylen > MAX_REGION_AREA) or (xlen > MAX_REGION_SIZE) or (ylen > MAX_REGION_SIZE)

    def PruneLargeRegions(self):
        for i in range(self.num_regions):
            if IsRegionTooLargeAt(i):
                RemoveRegionAt(i)

    def IsRegionTooSmallAt(self, index):
        xlen = self.catalog[index].maxx - self.catalog[index].minx
        ylen = self.catalog[index].maxy - self.catalog[index].miny

        return (xlen*ylen < MAX_REGION_AREA) or (xlen < MAX_REGION_SIZE) or (ylen < MAX_REGION_SIZE)

    def PruneSmallRegions(self):
        for i in range(self.num_regions):
            if IsRegionTooSmallAt(i):
                RemoveRegionAt(i)

    def PruneRegionsByThreshold(self):
        global MIN_REGION_SCORE
        EVALUATE_PREVIOUS_INDEX = 5
        NEAREST_FACTOR = 100

        total_regions = len(self.catalog)
        if total_regions > 0:
            array = np.sort(np.array([self.catalog[i].score for i in range(self.num_regions)]))
            nz = np.nonzero(array)
            floor_score = average_score = ceiling_score = 0
    #        if len(nz[0]) > 0:
    #            floor_score = array[nz[0][0]]
    #            average_score = np.mean(array)
    #            median_score = np.median(nz[0])
    #            ceiling_score = array[nz[0][-1]]

    #        print "  Min Region Score: %i" % floor_score
    #        print "  Average Region Score: %i" % average_score
    #        print "  Median Region Score: %i" % median_score
    #        print "  Max Region Score: %i" % ceiling_score

            print "Prune Common Regions (below threshold):"
            min_score = MIN_REGION_SCORE
            if min_score == -1:
                scores = np.sort([region.score for region in self.catalog])
                score = scores[0]
                if len(scores) > EVALUATE_PREVIOUS_INDEX:
                    score = scores[-EVALUATE_PREVIOUS_INDEX]
                min_score = (score/NEAREST_FACTOR) * NEAREST_FACTOR - 1 #(np.median(nz) + np.max(nz))/2

                print "  Evaluate Previous Index: %i" % EVALUATE_PREVIOUS_INDEX
                print "  Nearest Factor: %i" % NEAREST_FACTOR
            print "  Min Score: %i" % min_score

            active_regions = []
            for aregion in self.catalog:
                if aregion.score > min_score:
                    active_regions.append(aregion)
            self.catalog = active_regions
            self.num_regions = len(active_regions)
            print "  Regions Above Threshold (Active): %i" % self.num_regions

    def PruneRegionsByMean(self):
    #     scores = []
    #     avg_pixels = []
    #     active = []

    #     for aregion in regions.catalog:
    #         scores.append(aregion.score)
    #         avg_pixels.append(aregion.avg_pixel)
    #         active.append(aregion.active)

    #     df = DataFrame({'Region Score':scores,'Region Pixel Avg':avg_pixels, 'Region Active':active})
    #     mean = df['Region Pixel Avg'].mean()

        mean = np.mean([aregion.avg_pixel for aregion in self.catalog])
        mean = 0 if np.isnan(mean) else mean

        active_regions = []
        index = 0
        for aregion in self.catalog:
            if aregion.active:
                if aregion.avg_pixel >= mean:
                    aregion.active = False
                else:
                    active_regions.append(index)
            index += 1

        print "Prune Regions Exceeding Mean:"
        print "  Mean: %i" % mean
        print "  Regions Below Mean (Active): %i" % len(active_regions)
        print "  [id]: " + str(sorted(active_regions))

    def PruneRegionsByMedian(self):
        median = np.median([aregion.avg_pixel for aregion in self.catalog])
        median = 0 if np.isnan(median) else median

        active_regions = []
        index = 0
        for aregion in self.catalog:
            if aregion.active:
                if aregion.avg_pixel >= median:
                    aregion.active = False
                else:
                    active_regions.append(index)
            index += 1

        print "Prune Regions Exceeding Median:"
        print "  Median: %i" % median
        print "  Regions Below Median (Active): %i" % len(active_regions)
        print "  [id]: " + str(sorted(active_regions))

    def PruneRegionsBySpectra(self):
        neighborhood = 50.0 #REGION_MERGE_PIXEL_DISTANCE
        active_regions = []
        index = 0
        for aregion in self.catalog:
            if aregion.active:
                if aregion.rarity < neighborhood or aregion.fg_rarity < neighborhood:
                    aregion.active = False

            if aregion.rarity > 100.0 or aregion.fg_rarity > 100.0:
                aregion.active = True

            if aregion.active:
                active_regions.append(index)

            index += 1

        print "Prune Regions in Neighborhood:"
        print "  Neighborhood: %i" % neighborhood
        print "  Regions Above Neighborhood (Active): %i" % len(active_regions)
        print "  [id]: " + str(sorted(active_regions))

    def PruneRegionsByDupe(self):
        like_regions = np.zeros(1000)

        print "Prune Duplicated Regions:"
        print "  Accuracy: %i" % DDUPE_ACCURACY_BY

        active_regions = []
        index = 0
        if DEDUPE_REGIONS == True:
            for aregion in self.catalog:
                if aregion.active:
                    val = int(aregion.rarity) #((aregion.score/DDUPE_ACCURACY_BY)*DDUPE_ACCURACY_BY) / aregion.avg_pixel
                    like_regions[val] += 1
                    if like_regions[val] > 1:
                        aregion.active = False
                    else:
                        active_regions.append(index)
                index += 1

        if self.num_regions > 0 and self.num_regions <= MIN_DEDUPE_REGIONS:
            active_regions = []
            index = 0
            for aregion in self.catalog:
                aregion.active = True
                active_regions.append(index)
                index += 1
            print "  Reinstated All Regions Below Count: %i" % MIN_DEDUPE_REGIONS
        elif len([aregion for aregion in self.catalog if aregion.active == True]) == 0:
            active_regions = []
            index = 0
            for aregion in self.catalog:
                if aregion.rarity >= 50 or aregion.fg_rarity >= 50:
                    aregion.active = True
                    active_regions.append(index)
                index += 1
            print "  Reinstated Borderline Regions with Rarity Above: 50"

        print "  Regions Deduped (Active): %i" % len(active_regions)
        print "  [id]: " + str(sorted(active_regions))

    def PruneRegionsByProximity(self):
        previous_active_region = None
        radius = REGION_MERGE_PIXEL_DISTANCE * 10

        active_regions = []
        index = 0
        for aregion in self.catalog:
            if aregion.active:
                if len(active_regions) > 0:
                    for i in active_regions:
                        region = self.catalog[i]
                        pregion_centerx = region.maxx #(previous_region.maxx - previous_region.minx)/2
                        pregion_centery = region.maxy #(previous_region.maxy - previous_region.miny)/2
                        aregion_centerx = aregion.maxx #(aregion.maxx - aregion.minx)/2
                        aregion_centery = aregion.maxy #(aregion.maxy - aregion.miny)/2
                        if (aregion_centerx >= max(pregion_centerx-radius,0) and aregion_centerx <= (pregion_centerx+radius)) and \
                           (aregion_centery >= max(pregion_centery-radius,0) and aregion_centery <= (pregion_centery+radius)):
                            aregion.active = False
                            break

                    if aregion.active:
                        active_regions.append(index)
                else:
                    active_regions.append(index)
            index += 1

        print "Pruned Clustered Regions:"
        print "  Radius of Separation: %i" % radius
        print "  Regions Isolated (Active): %i" % len(active_regions)
        print "  [id]: " + str(sorted(active_regions))

    def _ScoreOneRegion(self, aregion, qimage, hist):
        score = 0
        count = 0
        r_count = g_count = b_count = 0
        rgb_count = 0
        total_pixels = 0
        fg_pixels = 0

        # each region should be at least REGION_MERGE_PIXEL_DISTANCE pixels wide and high
        maxy = aregion.maxy
        miny = aregion.miny
        ysize = maxy - miny
    #     if ysize < REGION_MERGE_PIXEL_DISTANCE:
    #         delta = REGION_MERGE_PIXEL_DISTANCE - ysize
    #         maxy = min(maxy+delta/2+1, height)
    #         miny = max(miny-delta/2+1, 0)

        maxx = aregion.maxx
        minx = aregion.minx
        xsize = maxx - minx
    #     if xsize < REGION_MERGE_PIXEL_DISTANCE:
    #         delta = REGION_MERGE_PIXEL_DISTANCE - xsize
    #         maxx = min(maxx+delta/2+1, width)
    #         minx = max(minx-delta/2+1, 0)

        for h in range(miny, maxy):
            for w in range(minx, maxx):
                r,g,b = qimage[h,w]
                pixel = BuildPixel((r,g,b))
                total_pixels += 1

                fg_pixels += abs(float(AVG_IMAGE_PIXEL-pixel)/AVG_IMAGE_PIXEL)

                if hist[pixel] > HISTOGRAM_COUNT_THRESHOLD:
                    continue

                r_count += r
                g_count += g
                b_count += b
                rgb_count += 1

                distance = HISTOGRAM_COUNT_THRESHOLD - hist[pixel]
                count += 1
                score += distance

        if count == 0:
            return 0
        else:
            r_count /= rgb_count
            g_count /= rgb_count
            b_count /= rgb_count
            aregion.avg_pixel = BuildPixel((r_count,g_count,b_count))
            aregion.rarity = round((float(fg_pixels)/total_pixels), 4)*100
            aregion.fg_rarity= round(float(abs(AVG_IMAGE_PIXEL-aregion.avg_pixel))/AVG_IMAGE_PIXEL, 4) * 100
    #        aregion.rarity = round(float(avg_rpixel)/avg_qpixel, 4)
    #        aregion.rarity = round(float(fg_pixels)/(total_pixels), 4)*100
    #        aregion.rarity = round(float(aregion.avg_pixel)/avg_qpixel, 4)*100

            return min(1000, int((1000*score)/(count*HISTOGRAM_COUNT_THRESHOLD)))

    def _ScoreRegions(self, qimage, hist):
        for aregion in self.catalog:
            aregion.score = self._ScoreOneRegion(aregion, qimage, hist)
            
    def Save(self, output_dir, image_file_name):
        tups = []
        for aregion in self.catalog:
            tups.append((aregion.minx,aregion.maxx,aregion.miny,aregion.maxy))

        data = {}
        data[image_file_name] = tups

        output = output_dir + 'regions.json'
        with open(output, 'a+') as outfile:
             json.dump(data, outfile, sort_keys = True, indent = 4, ensure_ascii=False)


In [1]:
def RegionPredictor(model, regions):
    scores = []
    avg_pixels = []
    active = []
    mod = []
    div = []
    region_rarity = []
    fg_rarity = []
    size = []
    for aregion in regions.catalog:
        scores.append(aregion.score)
        avg_pixels.append(aregion.avg_pixel)
        active.append(aregion.active)
        mod.append(np.mod(aregion.score,aregion.avg_pixel))
        div.append(aregion.score/aregion.avg_pixel)
        region_rarity.append(aregion.rarity)
        fg_rarity.append(aregion.fg_rarity)
        size.append(aregion.size)
    df = DataFrame({'Region Score':scores, 
                    'FG Pixel Avg':avg_pixels, 
                    'Region Active':active, 
                    'Region Rarity':region_rarity, 
                    'Size':size,
                    'FG Rarity':fg_rarity})
    df = df.reindex(columns=['Region Score', 'Region Rarity', 'FG Pixel Avg', 'FG Rarity', 'Size', 'Region Active'])
    
    if model is not None and regions.num_regions > 0:
        test_df = df.drop(['Region Active'],axis=1)
        df['Prediction'] = model.predict(test_df)
        df = df.reindex(columns=['Region Score', 'Region Rarity', 'FG Pixel Avg', 'FG Rarity', 'Size', 'Region Active', 'Prediction'])
        print df
        
        for i in range(regions.num_regions):
            aregion = regions.catalog[i]
            # very important
            if aregion.active and df['Prediction'][i]:
                aregion.priority = 1
            # important
            elif ~aregion.active and df['Prediction'][i]:
                aregion.priority = 2
            # somewhat important
            elif aregion.active and ~df['Prediction'][i]:
                aregion.priority = 3
            # not as important
            else:
                aregion.priority = 4
            
#         if len(df.index) > 0:
#             by_score = df['FG Pixel Avg'].groupby([df['Prediction'],df['Region Score']])
#             print by_score.value_counts()
#             print '\nmean: '
#             print df.mean()
#             print '\nstd: '
#             print df.std()
#             print '\n'
#             print df['Region Score'].groupby(df['Prediction']).size()

    df.to_csv(proc_files_dir + 'df.csv', mode='a', sep=',')
    
    return df

def MarkRegions(image, regions):
    width = regions.width
    height = regions.height
    red = (255,0,0)
    yellow = (255,255,0)
    green = (124,252,0)
    blue = (0,0,255)
    
#    for aregion in regions.catalog:
    for i in range(regions.num_regions):
        aregion = regions.catalog[i]
        cv2.rectangle(image, (max(aregion.minx-2, 0), max(aregion.miny-2, 0)), (min(aregion.maxx+2, width-1), min(aregion.maxy+2, height-1)), red, thickness=2)
        if aregion.priority == 1:
            cv2.putText(image, str(i), (max(aregion.minx-2, 0), max(aregion.miny-2, 0)), cv2.FONT_HERSHEY_SIMPLEX, 2,red,4)
        elif aregion.priority == 2:
            cv2.putText(image, str(i), (max(aregion.minx-2, 0), max(aregion.miny-2, 0)), cv2.FONT_HERSHEY_SIMPLEX, 2,yellow,4)
        elif aregion.priority == 3:
            cv2.putText(image, str(i), (max(aregion.minx-2, 0), max(aregion.miny-2, 0)), cv2.FONT_HERSHEY_SIMPLEX, 2,green,4)
        elif SHOW_DUPES:
            cv2.putText(image, str(i), (max(aregion.minx-2, 0), max(aregion.miny-2, 0)), cv2.FONT_HERSHEY_SIMPLEX, 2,blue,4)

def MarkAnnotations(image, notes, alr_width, alr_height):
    pink = (255,105,180)
    if notes is None:
        return
    
    for anote in notes:
        print anote
        (x,y,r) = anote
        pos = (int(x*image.shape[1])/alr_width,int(y*image.shape[0])/alr_height)        
        cv2.circle(image, pos, int(r*image.shape[1]/alr_width), pink, thickness=3)
    
def PlotHistogram_RGB(qimage):
    qr = qimage[:,:,0]
    qg = qimage[:,:,1]
    qb = qimage[:,:,2]

    height,width,depth = qimage.shape
    r_hist = Series(qr.reshape(height*width)).value_counts()
    g_hist = Series(qg.reshape(height*width)).value_counts()
    b_hist = Series(qb.reshape(height*width)).value_counts()

    hist_index = np.arange(2**HISTOGRAM_BITS_PER_COLOR)
    my_dpi = 160
    bar_width = 0.2
    r_index = hist_index
    g_index = hist_index + bar_width
    b_index = hist_index + (bar_width * 2)
    plt.figure(figsize=(1900/my_dpi, 1280/my_dpi), dpi=my_dpi)
    plt.bar(r_index, r_hist, bar_width, color='r')
    plt.bar(g_index, g_hist, bar_width, color='g')
    plt.bar(b_index, b_hist, bar_width, color='b')

    plt.xlabel('Bins')
    plt.ylabel('Count')
    plt.xticks(hist_index + bar_width, hist_index)
    
def PrintSettings():
    settings = \
    'HISTOGRAM_BITS_PER_COLOR = {0}\n'\
    'NUM_BINS = {1}\n'\
    'COLOR_SPECTRUM = {2}\n'\
    'ADAPTIVE_HISTOGRAM_THRESHOLD = {3}\n'\
    'HISTOGRAM_CEILING_THRESHOLD = {4}\n'\
    'HISTOGRAM_FLOOR_THRESHOLD = {5}\n'\
    'SHOW_DUPES = {6}\n'\
    'DEDUPE_REGIONS = {7}\n'\
    'MIN_DEDUPE_REGIONS = {8}\n'\
    'MIN_REGION_SCORE = {9}\n'\
    .format(HISTOGRAM_BITS_PER_COLOR,
             NUM_BINS,
             COLOR_SPECTRUM,
             ADAPTIVE_HISTOGRAM_THRESHOLD,
             HISTOGRAM_CEILING_THRESHOLD,
             HISTOGRAM_FLOOR_THRESHOLD,
             SHOW_DUPES,
             DEDUPE_REGIONS,
             MIN_DEDUPE_REGIONS,
             MIN_REGION_SCORE)
    
    if ADAPTIVE_HISTOGRAM_THRESHOLD:
        print 'Adaptive Threshold Enabled \n'
    else:
        print 'Adaptive Threshold Disabled with count set at %i \n' % HISTOGRAM_SUGGESTED_THRESHOLD
        
    print settings

In [None]:
import json

class AnnotationsLR():
    def __init__(self, width=0, height=0, annotations={}):
        self.width = width
        self.height = height
        self.annotations = annotations

    def jsonSerializer(self, f):
        with open(f, 'w') as outfile:
            json.dump(self, outfile, cls=JsonSerializer)

    def jsonDeserializer(self, f):
        with open(f, 'r') as infile:
            data = infile.read()
            self.__dict__ = json.loads(data)

class JsonSerializer(json.JSONEncoder):
    def default(self, obj):
        if not isinstance(obj, AnnotationsLR):
            return super(JsonSerializer, self).default(obj)

        return obj.__dict__

