In [None]:
# Imports
import json
import os
import numpy as np
from math import floor
from random import random


In [None]:
!ls

In [None]:
# Task class to handle tasks
class Task:
    '''
        Handle a Task of the ARC Dataset
    '''
    def __init__(self, task_name, task_data):
        self.name = task_name
        self.data = task_data

    def examples(self):
        return map(lambda x: (x['input'], x['output']), self.data['train'])

    def test(self):
        return map(lambda x: x['input'], self.data['test'])

    def get_solutions(self):
        return map(lambda x: x['output'], self.data['test'])


In [None]:
#ARC dataset class to switch easily between train, eval and test

def get_dataset():
    dataset = {
        "train": {},
        "test": {},
        "evaluation": {}
    }
    for dirname, _, filenames in os.walk('/kaggle/input'):
        for filename in filenames:
            beg = dirname.split('/')[-1][:3]
            if beg == "abs":
                continue
            print(os.path.join(dirname, filename))
            with open(os.path.join(dirname, filename)) as json_file:
                data = json.load(json_file)
                key = filename[:-5]
                if beg == "eva":
                    dataset["evaluation"][key] = data
                if beg == 'tes':
                    dataset["test"][key] = data
                if beg == 'tra':
                    dataset["train"][key] = data
    return dataset

class ARC:
    '''
        Handle the ARC Dataset
    '''
    def __init__(self):
        self.dataset = get_dataset()

    def tasks(self, task_type = 'all'):
        '''
            Iterate over tasks of task_type type

            Params:
                task_type: 'train', 'test', 'eval', 'all'

            Return:
            Iterator
        '''
        for t_type, tasks in self.dataset.items():
            if task_type != 'all' and task_type != t_type:
                continue
            for t_name, task in tasks.items():
                yield Task(t_name, task)

In [None]:
# Features class

features = [
    'width',
    'height',
    'const',
    'anti_diag_symetry',
    'diag_symetry',
    'hotizontal_symetry',
    'vertical_symetry',
    'number_colors',
    'number_colors_wob',
    'n_shapes',
    'n_shapes_wob'
]

NB_BEST_COLOR_COUNT = 3

for i in range(10):
    features.append('color_'+str(i)+'count')
    features.append('nb_shape_color_'+str(i))

for i in range(NB_BEST_COLOR_COUNT):
    features.append(str(i+1)+'th_color_count')

class Features:
    def __init__(self, image, reversed=False):
        if reversed:
            self.dic = {}
            for k, v in zip(features, image):
                self.dic[k] = int(round(v))
            self.image = []
            self.vec = image
        else:
            self.dic = {}
            self.image = np.array(image)
            self.extract_features()
            self.vec = None

    def extract_features(self):
        self.dic['const'] = 1
        colors = {}
        c_colors = 0
        c_colors_wob = 0
        for i in range(10):
            colors[i] = (self.image == i).sum()
            self.dic['color_'+str(i)+'count'] = colors[i]
            if colors[i] > 0:
                c_colors += 1
                if i > 0:
                    c_colors_wob += 0
        self.dic['number_colors'] = c_colors
        self.dic['number_colors_wob'] = c_colors_wob
        top_colors = sorted(colors.items(), key=lambda x: -x[1])[:NB_BEST_COLOR_COUNT]
        for i, n in enumerate(top_colors):
            self.dic[str(i+1)+'th_color_count'] = n[1]
        self.dic['height'], self.dic['width'] = self.image.shape

        upper_half = self.image[:floor(self.dic['height']/2),:]
        down_half = self.image[-floor(self.dic['height']/2):,:]
        left_half = self.image[:,:floor(self.dic['width']/2)]
        right_half = self.image[:,-floor(self.dic['width']/2):]

        self.dic['hotizontal_symetry'] = int(np.all(down_half == np.flip(upper_half, 0)))
        self.dic['vertical_symetry'] = int(np.all(left_half == np.flip(right_half, 1)))
        self.dic['diag_symetry'] = int(np.all(self.image == self.image.T))
        rot = np.rot90(self.image, 1)
        self.dic['anti_diag_symetry'] = int(np.all(rot == rot.T))

        # What do we want to find
        # img = (self.image > 0).astype(np.uint8)
        img = self.image
        if img.shape[0] * img.shape[1] == 0:
            shape_matrix = np.array([[-1]])
        else:
            shape_matrix = -np.ones_like(img, dtype=np.int32)
        colors = [[] for _ in range(10)]
        for i in range(img.shape[0]):
            for j in range(img.shape[1]):
                pix = int(img[i, j])
                neighboors = []
                if j > 0:
                    neighboors.append(shape_matrix[i, j - 1])
                if i > 0:
                    if j > 0:
                        neighboors.append(shape_matrix[i - 1, j - 1])
                    neighboors.append(shape_matrix[i - 1, j])
                    if j < img.shape[1] - 1:
                        neighboors.append(shape_matrix[i - 1, j + 1])
                neighboors = list(set(filter(lambda x: x > -1  and x in colors[pix], neighboors)))
                if len(neighboors) == 0:
                    shape_n = shape_matrix.max() + 1
                    shape_matrix[i, j] = shape_n
                    colors[pix].append(shape_n)
                else:
                    iterator = iter(neighboors)
                    ref = next(iterator)
                    shape_matrix[i, j] = ref
                    for n in iterator:
                        shape_matrix[shape_matrix==n] = ref
                        colors[pix].remove(n)
        uq = sorted(np.unique(shape_matrix))
        for i, n in enumerate(uq):
            if i == n:
                continue
            shape_matrix[shape_matrix == n] = i
            for c in colors:
                if n in c:
                    c[c.index(n)] = i
        self.shapes = (shape_matrix, colors)
        self.dic['n_shapes'] = shape_matrix.max()+1

        for i, c in enumerate(colors):
            self.dic['nb_shape_color_'+str(i)] = len(c)

        self.dic['n_shapes_wob'] = self.dic['n_shapes'] - self.dic['nb_shape_color_0']

    def to_dic(self):
        return self.dic

    def to_vec(self):
        if self.vec is None:
            vec = []
            for k in features:
                vec.append(self.dic[k])
            self.vec = np.array(vec)
        return self.vec

    def __str__(self):
        return str(self.dic)

    @staticmethod
    def size():
        return len(features)

In [None]:
# Find pattern
class Pattern_finder:
    def __init__(self, shape, opt_mask=None):
        self.shape = shape
        self.mask = None
        if opt_mask is None:
            self.opt_mask = None
        elif not isinstance(opt_mask, str):
            self.opt_mask = opt_mask
        else:
            with open('./strategies/feature_matching/masks/'+opt_mask+'.json') as f:
                self.opt_mask = np.array(json.load(f))
    
    def add(self, f_x, f_y):
        n_mask = []
        for i in f_y:
            if i == 0:
                n_mask.append((f_x==0).astype(np.float64))
            else:
                x = (i/f_x)
                x[x == np.inf] = 0
                n_mask.append(x.tolist())
        n_mask = np.array(n_mask)
        if self.mask is None:
            self.mask = n_mask
            if self.opt_mask is not None:
                if self.opt_mask.shape == n_mask.shape:
                    self.mask *= self.opt_mask
                # else:
                #     print('Mask provided does not match shape constraint')
        else:
            self.mask = self.mask * (self.mask == n_mask).astype(np.uint8)

    def guess_features(self, f_x, display_corelation_matrix=False):
        f_x = f_x.to_vec()
        if self.mask is None:
            return f_x * np.ones((self.shape, self.shape))
        else:
            vec = []
            votes_h = []
            for weights in self.mask:
                g = (weights * f_x)
                #This is mean dicision
                # n = (g != 0).sum()
                # if n == 0:
                #     vec.append(0)
                # else:
                #     vec.append(g.sum() / n)
                # This is majority decision
                if g.sum() == 0:
                    vec.append(0)
                    votes_h.append(0)
                else:
                    votes = {}
                    for v in g:
                        if v != 0:
                            votes[int(round(v))] = votes.get(v, 0) + 1
                    vec.append(max(votes.items(), key=lambda x: x[1])[0])
                    votes_h.append(votes)
            if display_corelation_matrix:
                print("Matrice de corélation:")
                print(np.array(self.mask.tolist()))
                print("Votes:")
                print(votes_h)
                print('input', f_x)
                print('Features Guessed:')
                print(vec)
            return vec

In [None]:
# Find solution
def match_score(X, f):
    return (np.array(Features(X).to_vec()) == np.array(f.to_vec())).sum()

class Solution:
    def __init__(self, features):
        self.features = features

    def select_propositions(self, propositions):
        f = self.features.to_dic()
        s = 0
        for i in range(10):
            ccount = f['color_{}count'.format(i)]
            s += ccount
            if ccount == f['1th_color_count'] and ccount == f['width'] * f['height'] and ccount > 0:
                propositions.append((i * np.ones((f['height'], f['width']), dtype=np.uint8)))

        s_propositions = []
        if len(propositions) > 0:
            for p in propositions:
                s_propositions.append({
                    'score': match_score(p, self.features),
                    'data': p
                })

        if len(s_propositions) <= 3:
            return list(map(lambda x: x['data'].tolist(), s_propositions))
        return list(map(lambda x: x['data'].tolist(), sorted(s_propositions, key=lambda x: x['score'])[-3]))


In [None]:
# Propositions
def get_pos(container, subject):
    if container.shape[0] < subject.shape[0] or container.shape[1] < subject.shape[1]:
        return []
    c_shape = container.shape
    s_shape = subject.shape
    pos = []
    for i in range(c_shape[0] - s_shape[0] + 1):
        for j in range(c_shape[1] - s_shape[1] + 1):
            if  np.array_equal(container[i:i+s_shape[0], j:j+s_shape[1]], subject):
                pos.append((i, j))
    return pos

def raw_delta(X, y):
    X_img, _ = X
    y_img, _ = y
    if X_img.shape != y_img.shape:
        return np.zeros((1,1))
    return X_img - y_img

def occurences_input(X, y):
    X_img, _ = X
    y_img, _ = y

    pos = []
    pos.append(get_pos(y_img, X_img))
    pos.append(get_pos(y_img, X_img.T))

    for i in range(3):
        pos.append(get_pos(y_img, np.rot90(X_img, i+1)))
        pos.append(get_pos(y_img, np.rot90(X_img.T, i+1)))
    
    return pos

def compare_matrix(a, n):
    if a.shape != n.shape:
        return np.zeros((1,1))
    binary = (a == n).astype(np.float64)
    #binary[binary == 0] = None
    return binary * a

def compare_pos_array(a, n):
    new_pos = []
    for a_el, n_el in zip(a, n):
        new_pos.append(list(filter(lambda x: x in n_el, a_el)))
    return new_pos

class Proposition:
    def __init__(self):
        self.indicateurs = {}
        self.indicateur_builder = [
            raw_delta,
            occurences_input
        ]
        self.merger = [
            compare_matrix,
            compare_pos_array
        ]
        self.first = True
        pass

    def add_train_example(self, X, y):
        indic = map(lambda x: x((np.array(X[0]), X[1]), (np.array(y[0]), y[1])), self.indicateur_builder)
        for f, indication, merger in zip(self.indicateur_builder, indic, self.merger):
            k = f.__name__
            if self.first:
                self.indicateurs[k] = indication
            else:
                self.indicateurs[k] = merger(self.indicateurs[k], indication)
        self.first = False
        pass

    def propose(self, X, f_x):
        proposition = []
        X_img, X_f = np.array(X[0]), X[1]
        if np.abs(self.indicateurs['raw_delta']).sum() > 0 and X_img.shape == self.indicateurs['raw_delta'].shape:
            proposition.append(np.abs(X_img + self.indicateurs['raw_delta'])%10)
        
        f_x_dic = f_x.to_dic()
        processes = [
            lambda x: x,
            lambda x: x.T,
            lambda x: np.rot90(x, 1),
            lambda x: np.rot90(x.T, 1),
            lambda x: np.rot90(x, 2),
            lambda x: np.rot90(x.T, 2),
            lambda x: np.rot90(x, 3),
            lambda x: np.rot90(x.T, 3)
        ]
        empty = np.ones((f_x_dic['height'], f_x_dic['width']), dtype=np.uint8)
        for i, occurences in enumerate(self.indicateurs['occurences_input']):
            for n in occurences:
                if empty.shape[0] * empty.shape[1] > 0:
                    empty[n[0]:n[0]+X_img.shape[0], n[1]:n[1]+X_img.shape[1]] = processes[i](X_img)
        proposition.append(empty)
        return proposition

In [None]:
# Solver, holds the bricks together
class Solver:
    def __init__(self, task, opt_mask=None):
        self.task = task
        self.pattern_finder = Pattern_finder(Features.size(), opt_mask)
        self.proposer = Proposition()
        self.find_pattern()

    def find_pattern(self):
        print(self.task.name)
        for X, y in self.task.examples():
            F = Features(X)
            f = Features(y)
            self.pattern_finder.add(F.to_vec(), f.to_vec())
            self.proposer.add_train_example((X, F), (y, f))

    def get_predictions(self, verbosity=False):
        solutions = []
        if verbosity:
                print(self.task.name)
        for X in self.task.test():
            F_x = Features(X)
            f_x = self.pattern_finder.guess_features(F_x, verbosity)
            f_x = Features(f_x, True)
            s = Solution(f_x)
            propositions = self.proposer.propose((X, F_x), f_x)
            solutions.append(s.select_propositions(propositions))
        return solutions

    def evaluate(self):
        s = 0
        t = 0
        for X, y in zip(self.task.test(), self.task.get_solutions()):
            f_x = self.pattern_finder.guess_features(Features(X))
            Y = np.array(f_x)
            y = np.array(Features(y).to_vec(), dtype=np.float64)
            s += (Y == y).sum()
            t += len(y)
        return s/t

    def on_success(self):
        print('SUCESS ON TASK', self.task.name)

    def on_error(self):
        print('ERROR ON TASK', self.task.name)

    def on_unprononced(self):
        print('NO CORRELATION FOUND ON TASK ', self.task.name)

In [None]:
# Submission utils

def flattener(pred):
    str_pred = str([list(map(lambda x: int(x), row)) for row in pred])
    str_pred = str_pred.replace(', ', '')
    str_pred = str_pred.replace('[[', '|')
    str_pred = str_pred.replace('][', '|')
    str_pred = str_pred.replace(']]', '|')
    return str_pred

In [None]:
# Actual submission

submission_type = 'test'

arc = ARC()

response = 'output_id,output\n'

for t in arc.tasks(submission_type):
    s = Solver(t)
    Y = s.get_predictions()
    for i, resp in enumerate(Y):
        response += t.name + '_' + str(i) + ','
        for trials in resp:
            if len(trials) > 0:
                response += flattener(trials) + ' '
            else:
                response += '|1| '
        response = response[:-1] + "\n"

with open('submission.csv', 'w') as f:
    f.write(response)

In [None]:

!head submission.csv -n 110