# Predict Noisy Nodes in Unknown Input

In [None]:
PATH_PREFIX = '../'
MODEL_SAVE_DIR = 'Model(Noise)/'
MODEL_FILE = 'model.ckpt'
DATA_DIR = 'InclusiveEvents_Iteration1_Parsed/'
DATA_FILE_PATH = PATH_PREFIX + DATA_DIR + 'dataset_{:04d}.json'
OUTPUT_DIR = PATH_PREFIX + 'Prediction1/'
OUT_FILE_PATH = OUTPUT_DIR + 'result.csv'

import sys
sys.path.append(PATH_PREFIX)

FILE_NUMBER_TO_START_TESTING_FROM = 81
FILES_TO_TEST = 20

NUMBER_OF_THREADS = 20

In [None]:
#@title Imports

from __future__ import absolute_import, division, print_function

from graphGenerator import to_graph_dict_without_edges
from plotLayer import PlotSingleImage, plotLayersSinglePlot, PlotModelPrediction

import threading
import os.path
import ujson
import time

from graph_nets import utils_np, utils_tf
import modelNoise as model

import numpy as np
import tensorflow as tf

In [None]:
#@title Helper functions


def generate_dict_graphs(raw_data, offset, batch_size):
    input_graphs = []
    for i in range(offset, offset + batch_size):        
        input_graph_dict, _ = to_graph_dict_without_edges(raw_data[i])
        input_graphs.append(input_graph_dict)
    return input_graphs


def create_placeholders(raw_data, offset, batch_size):
    input_graphs = generate_dict_graphs(raw_data, offset, batch_size)
    input_ph = utils_tf.placeholders_from_data_dicts(input_graphs)
    return input_ph


def create_feed_dict(raw_data, offset, batch_size, input_ph):
    inputs = generate_dict_graphs(raw_data, offset, batch_size)
    input_graphs = utils_np.data_dicts_to_graphs_tuple(inputs)
    feed_dict = {input_ph: input_graphs}
    return feed_dict


def get_noise_pos(data):
    noisy, non_noisy = set([]), set([])
    for pList in data['hL']:
        for x, y in pList:
            noisy.add((x, y))
    for pList in data['gthL']:
        for x, y in pList:
            if (x, y) not in noisy:
                raise Exception(
                    'Ground Truth Hit Layer has extra point that doesn\'t exist in Hit Layer'
                )
            non_noisy.add((x, y))
    return non_noisy, noisy.difference(non_noisy)

In [None]:
#@title Restore Model

tf.reset_default_graph()

test_raw_data = ujson.loads(open(DATA_FILE_PATH.format(FILE_NUMBER_TO_START_TESTING_FROM), 'r').read())

input_ph = create_placeholders(test_raw_data, 0, 100)

num_processing_steps = 20

model = model.EncodeProcessDecode(edge_output_size=None, node_output_size=2)

test_outputs = model(input_ph, num_processing_steps)

input_ph = utils_tf.make_runnable_in_session(input_ph)

saver = tf.train.Saver()
sess = tf.Session()
saver.restore(sess, PATH_PREFIX + MODEL_SAVE_DIR + MODEL_FILE)
print("Model restored")

In [None]:
#@title Test Unknown Input

skipped_files = []
thread_pool = []
lock = threading.Lock()

if not os.path.isfile(OUT_FILE_PATH):
    out_file = open(OUT_FILE_PATH, 'w')
    out_file.write(
        'File number, Datapoint, # Noisy hits, # Noisy hits predicted by the model, # Noisy hits failed to predict, # True hits incorrectly predicted as noisy\n'
    )
    out_file.close()


def testing_thread(fnum):
    f = open(DATA_FILE_PATH.format(FILE_NUMBER_TO_START_TESTING_FROM + fnum), 'r')
    test_raw_data = ujson.loads(f.read())
    f.close()

    lock.acquire()
    print('testing file #{:04d} ...'.format(FILE_NUMBER_TO_START_TESTING_FROM + fnum), end=' ')
    start_time = time.time()
    try:
        feed_dict = create_feed_dict(test_raw_data, 0, 100, input_ph)
        test_values = sess.run({
            'inputs': input_ph,
            'outputs': test_outputs
        },
                               feed_dict=feed_dict)
    except Exception as e:
        print('skipped because of exception: {}'.format(e))
        skipped_files.append(FILE_NUMBER_TO_START_TESTING_FROM + fnum)
        lock.release()
        return
    else:
        elapsed_time = time.time() - start_time
        print('finished in {:.2f}s'.format(elapsed_time))
    lock.release()

    input = test_values['inputs']
    output = test_values['outputs'][-1]

    idds = utils_np.graphs_tuple_to_data_dicts(input)
    odds = utils_np.graphs_tuple_to_data_dicts(output)

    for i in range(100):
        raw_data = test_raw_data[i]
        dimension = 1024

        id = idds[i]
        od = odds[i]
        noise = np.argmax(od['nodes'], axis=-1).astype(bool)
        predicted_noise_pos = set([])
        for j, node in enumerate(id['nodes']):
            if noise[j]:
                x, y = tuple((node[:2] * dimension).astype(int))
                predicted_noise_pos.add((x, y))

        # set of points predicted to be noise points by the model
        # print(predicted_noise_pos)

        _input_non_noise_pos, input_noise_pos = get_noise_pos(raw_data)
        missed = input_noise_pos.difference(predicted_noise_pos)
        false_prediction = predicted_noise_pos.difference(input_noise_pos)

        if len(missed) or len(false_prediction):
            lock.acquire()
            out_file = open(OUT_FILE_PATH, 'a')
            out_file.write(
                '{:04d}, {:03d}, {:03d}, {:03d}, {:03d}, {:03d}\n'.format(
                    FILE_NUMBER_TO_START_TESTING_FROM + fnum, i + 1,
                    len(input_noise_pos), len(predicted_noise_pos),
                    len(missed), len(false_prediction)))
            out_file.close()

            PlotSingleImage(
                raw_data,
                # width=512,
                title='Labelled image {:04d}.{:03d} for reference'.format(
                    FILE_NUMBER_TO_START_TESTING_FROM + fnum, i + 1),
                save_loc=OUTPUT_DIR + '{:04d}.{:03d}_reference'.format(
                    FILE_NUMBER_TO_START_TESTING_FROM + fnum, i + 1))

            raw_data_hl = []
            for hL in raw_data['hL']:
                zeros = np.zeros((1024, 1024))
                zeros[tuple(np.array(list(hL)).T.tolist())] = 1
                raw_data_hl.append(zeros)
            plotLayersSinglePlot(
                np.array(raw_data_hl),
                title='Input image {:04d}.{:03d} to the model'.format(
                    FILE_NUMBER_TO_START_TESTING_FROM + fnum, i + 1),
                save_loc=OUTPUT_DIR + '{:04d}.{:03d}_input'.format(
                    FILE_NUMBER_TO_START_TESTING_FROM + fnum, i + 1))

            PlotModelPrediction(
                raw_data['hL'],
                predicted_noise_pos,
                missed,
                false_prediction,
                title='Prediction by the model for image {:04d}.{:03d}'.format(
                    FILE_NUMBER_TO_START_TESTING_FROM + fnum, i + 1),
                save_loc=OUTPUT_DIR + '{:04d}.{:03d}_output'.format(
                    FILE_NUMBER_TO_START_TESTING_FROM + fnum, i + 1))
            lock.release()


for fnum in range(FILES_TO_TEST):
    if len(thread_pool) == NUMBER_OF_THREADS:
        thread = thread_pool.pop(0)
        thread.join()
    new_thread = threading.Thread(target=testing_thread, args=(fnum, ))
    thread_pool.append(new_thread)
    new_thread.start()

for thread in thread_pool:
    thread.join()

f = open(OUTPUT_DIR + 'inconsistent.txt', 'w')
f.write('Inconsistent files skipped while testing: {}'.format(skipped_files))
f.close()
print('Output generated in: {}'.format(OUT_FILE_PATH))

sess.close()