# Verify against CSV file if Ground Truth Marking is correct

In [None]:
import os
import gzip
import pickle
import logging
import numpy as np
from skimage import io
from datetime import datetime

log_location = "../logs/"
PATH_TO_TRAIN = "../data/input/data_stage1_train.zip"
PATH_TO_CSV_FILE = "../data/input/stage1_train_labels.csv"
PATH_TO_INTERMEDIATE = "../data/intermediate"

kernel_size = 16

In [None]:
logger = logging.getLogger()
logging.basicConfig(format="%(asctime)-15s %(message)s",
                    level=logging.DEBUG,
                    filename=os.path.join(log_location,'csv.' + datetime.now().strftime("%Y%m%d%H%M%S.%f") + '.log'))

In [None]:
def log(msg):
    logging.debug(msg)
 
def print_log(msg):
    log(msg)
    print(msg)   

In [None]:
csv_data = []
images_to_process = {}
with open(PATH_TO_CSV_FILE,'r') as f:
    csv_data = f.readlines()

for current_line in csv_data[1:]:
    current_image = current_line.split(",")[0]
    if not (current_image in images_to_process):
        current_image_file = os.path.join(PATH_TO_INTERMEDIATE,current_image + ".png")
        if not os.path.isfile(current_image_file):
            continue        
        original_img = io.imread(current_image_file, as_grey=True) 
        image_shape = original_img.shape
        image_shape = (image_shape[0] - kernel_size, image_shape[1] - kernel_size)
        images_to_process[current_image] = {}
        images_to_process[current_image]['shape'] = image_shape
        images_to_process[current_image]['pixels'] = np.zeros(image_shape[0] * image_shape[1], dtype='int8')
        images_to_process[current_image]['values'] = []
        del original_img 
    current_image_pixel_sequences = current_line.split(",")[1]
    teste = zip(*[current_image_pixel_sequences.split(" ")[i::2] for i in range(2)]) 
    values_to_sort = []
    for pixel_start,pixel_length in teste:
        values_to_sort.append((int(pixel_start),int(pixel_length)))
    images_to_process[current_image]['values'] += values_to_sort

#print(images_to_process)
#images_to_process['564fa390d9a9c26f986bf860d9091cbd84244bc1c8e3c9369f2f2e5b5fd99b92']['values'].sort(key=lambda x : x[0])
#print(images_to_process['564fa390d9a9c26f986bf860d9091cbd84244bc1c8e3c9369f2f2e5b5fd99b92']['values'])
    
for current_image in images_to_process.keys():
    images_to_process[current_image]['values'].sort(key=lambda x : x[0])
    for current_value in images_to_process[current_image]['values']:
        images_to_process[current_image]['pixels'][int(current_value[0]) - 1:int((current_value[0] + current_value[1])) - 1] = 1
    print(images_to_process[current_image]['values'])
# [int(values[0]):int(values[0] + values[1])] = 1   
     

In [None]:
for current_image in images_to_process.keys():
    print_log("Processing:{}".format(current_image))
    error_count = 0
    with gzip.open(os.path.join(PATH_TO_INTERMEDIATE,current_image + '.pickle'),'rb') as f:
        image_pickled = pickle.load(f)
    last_pos = -1
    print(image_pickled['original_shape'], len(image_pickled['slices']))
    for current_slice in image_pickled['slices']:
        current_pos = current_slice['current_x'] + current_slice['current_y'] * image_pickled['original_shape'][0]
        if current_pos == last_pos:
            continue
        if current_slice['augmented'] == 1:
            continue
        if images_to_process[current_image]['pixels'][current_pos] != current_slice['is_nuclei']:
            print('augmented:', current_slice['augmented'])
            print('current_y', current_slice['current_y'] , 'current_x', current_slice['current_x'], 'current_y(hk)', current_slice['current_y'] + 8 , 'current_x(hk)', current_slice['current_x'] + 8 )
            print('csv:' , images_to_process[current_image]['pixels'][current_pos] , 'mask:', current_slice['is_nuclei'])
            print("Error in {} - {} ".format(current_image, current_pos))
            error_count +=1
        last_pos = current_pos
    print('error_count:', error_count)
    

In [None]:
"""

def is_valid_file(file_name):
    if current_image_file.startswith("train") or current_image_file.startswith("test") or current_image_file.startswith("validation") or (not current_image_file.endswith("pickle")):
        return False
    else:
        return True

image_pickled = None
for current_image_file in os.listdir(PATH_TO_INTERMEDIATE):
    if not is_valid_file(current_image_file):
        continue
    current_image_name = current_image_file.split(".")[0]
    log("Processing:{}".format(current_image_name))
    with gzip.open(os.path.join(PATH_TO_INTERMEDIATE,current_image_file),'rb') as f:
        image_pickled = pickle.load(f)
    images_to_process[current_image_name] = image_pickled 
    
"""    
     