In [1]:
from shapely.geometry import Polygon
from PIL import ImageDraw as draw
import json
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import random
import pickle
random.seed(41)
import tqdm
import os
import sys

sys.path.append('/trunk/shared/cuneiform/CuneiformDating/image_classification/segmentation/code/segmentAnythingUtils')
from segmentAnythingUtils import *

# sys.path.append('/trunk/shared/cuneiform/CuneiformDating/image_classification/segmentation/code/ensembleSegmentationUtils')
# from ensembleSegmentationUtils import *
# sys.path.append('/trunk/shared/cuneiform/CuneiformDating/image_classification/segmentation/code/segmentAnythingUtils')
# from segmentAnythingUtils import *

In [2]:
with open('/trunk/shared/cuneiform/full_data/all_ids.json', 'r') as f:
    all_ids = json.load(f)

#sample 200 random images
sampled_imgs = random.sample(all_ids,50)
image_names = ['P' + img.zfill(6) +'.jpg' for img in sampled_imgs]

In [None]:
#check iou and segmentation from all three methods for these sample images
def showIoUResults(pid_list):
    with open('/trunk/shared/cuneiform/CuneiformDating/image_classification/segmentation/code/temp_results/iou_segmentation.json', 'r') as f:
        iou_info = json.load(f)

    for pid in pid_list:

        f, axarr = plt.subplots(1,4, figsize=(15, 15))

        image_path = "/trunk/shared/cuneiform/full_data/images/"+ "P"+ str(pid).zfill(6)+".jpg"
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2. COLOR_BGR2RGB)

        null_img = np.tile(145, [500,500,3])

        cc_path = "/trunk2/datasets/cuneiform/segmentation/seg_viz_July05/P" + str(pid).zfill(6)+ ".json"
        if os.path.exists(cc_path):
            cc_example = read_cc_based(cc_path)
            cc_example = resizeCCExample(cc_example)
            cc_front = get_front_polygon(cc_example)
            cc_cutout = getCoutoutFromCoords(cc_front, image)
        else:
            cc_cutout = null_img



        peak_path="/trunk2/datasets/cuneiform/segmentation/seg_peak_base_July06/P" + str(pid).zfill(6)+ ".json"
        if os.path.exists(peak_path):
            peak_example = read_peak_based(peak_path)
            peak_front = get_front_polygon(convert_peak_to_coords(peak_example))
            peak_cutout = getCoutoutFromCoords(peak_front, image)
        else:
            peak_cutout = null_img


        sa_cutout_path = "/trunk/shared/cuneiform/full_data/segmented_images/"+ "P"+ str(pid).zfill(6)+".jpg"
        if os.path.exists(sa_cutout_path):
            sa_cutout = cv2.imread(sa_cutout_path)
            sa_cutout = cv2.cvtColor(sa_cutout, cv2. COLOR_BGR2RGB)
        else:
            sa_cutout = null_img

        iou_sa_cc = iou_info[pid]["iou_sa_cc"] 
        iou_sa_peak = iou_info[pid]["iou_sa_peak"] 
        iou_peak_cc = iou_info[pid]["iou_peak_cc"] 

        axarr[0].imshow(image)
        axarr[1].imshow(cc_cutout)
        axarr[1].set_xlabel("connected components")
        axarr[2].imshow(peak_cutout)
        axarr[2].set_xlabel("peak based")
        axarr[3].imshow(sa_cutout)
        axarr[3].set_xlabel("segmentAnything")
        plt.show()
        print("P"+ str(pid).zfill(6)+".jpg")
        print("IoU_sa_cc", iou_sa_cc)
        print("iou_sa_peak", iou_sa_peak)
        print("iou_peak_cc", iou_peak_cc)
        
showIoUResults(sampled_imgs)

In [None]:
#try kmeans clustering to decide threshold

from sklearn.cluster import KMeans
from collections import defaultdict

iou_array = []
with open('/trunk/shared/cuneiform/CuneiformDating/image_classification/segmentation/code/temp_results/iou_segmentation.json', 'r') as f:
        iou_info = json.load(f)
for pid in all_ids:
    iou_sa_cc = iou_info[pid]["iou_sa_cc"] 
    iou_sa_peak = iou_info[pid]["iou_sa_peak"] 
    iou_peak_cc = iou_info[pid]["iou_peak_cc"] 
    iou_array.append([iou_sa_cc, iou_sa_peak, iou_peak_cc])
    
iou_array = np.array(iou_array)
kmeans = KMeans(n_clusters=6, random_state=0, n_init="auto").fit(iou_array)
labels = kmeans.labels_

c_dict = {0:'red',1:'green',2:'blue', 3:'yellow', 4:'pink', 5:'purple'}
colors = [c_dict[l] for l in labels]

fig = plt.figure(figsize=(16,16))
ax = plt.axes(projection='3d')
ax.set_xlabel('iou_sa_cc')
ax.set_ylabel('iou_sa_peak')
ax.set_zlabel('iou_peak_cc')
ax.scatter3D(iou_array[:,0], iou_array[:,1], iou_array[:,2], c=colors)

clusters = defaultdict(list)
for idx, pid in enumerate(all_ids):
    clusters[labels[idx]].append(pid)

In [None]:
#Visualize examples from each of the clusters

for label,cluster in clusters.items():
    print('*'*100)
    print(c_dict[label], "cluster has", len(cluster), "images")
    print('*'*100)
    showIoUResults(random.sample(cluster,10))

In [None]:
#apply rules to get good segments
total = len(all_ids)
bad_count = 0
back_face = 0
non_rgb = 0
threshold_cc = 0.78
threshold_peak =0.72
non_rgb_images = []
bad_images = []
good_images = []
back_faces = []


image_anno = json.load(open("/trunk2/datasets/cuneiform/image_anno.json", 'r'))

with open('/trunk/shared/cuneiform/CuneiformDating/image_classification/segmentation/code/temp_results/iou_segmentation.json', 'r') as f:
    iou_info = json.load(f)

for pid in tqdm.tqdm(all_ids):
    iou_sa_cc = iou_info[pid]["iou_sa_cc"] 
    iou_sa_peak = iou_info[pid]["iou_sa_peak"] 
    
    if not ("RGB" in image_anno[pid].keys() and image_anno[pid]["RGB"]):
        non_rgb+=1
        non_rgb_images.append(pid)
        continue
    if(iou_sa_cc==-1 and iou_sa_peak==-1):
        non_rgb+=1
        non_rgb_images.append(pid)
        continue
        
    if(iou_sa_cc==0 and iou_sa_peak==0):
        back_face+=1
        bad_count+=1
        bad_images.append(pid)
        back_faces.append(pid)
        continue
    
    if iou_sa_cc<threshold_cc and iou_sa_peak<threshold_peak:
        bad_count+=1
        bad_images.append(pid)
    else:
        good_images.append(pid)
        
print("Total images", total)
print("Non rgb images", non_rgb)
print("Bad Segmentation", bad_count, "out of which", back_face, "are back faces")
print("Final usable images", len(good_images))

In [None]:
#visualize good segments
for pid in random.sample(good_images,50):
    f, axarr = plt.subplots(1,2)
    segmented_image = Image.open('/trunk/shared/cuneiform/full_data/segmented_images/P'+str(pid).zfill(6)+".jpg")
    full_image = Image.open('/trunk/shared/cuneiform/full_data/images/P'+str(pid).zfill(6)+".jpg")
    axarr[0].imshow(full_image)
    axarr[1].imshow(segmented_image)
    plt.show()
    print(img)