In [None]:
import json

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import numpy as np
import json
import os
from pathlib import Path
import matplotlib.pyplot as plt

data_path = Path('/content/drive/MyDrive/input')
training_path = data_path / 'training'

def load_arc_task(filename):
    """Load an ARC task from a JSON file."""
    with open(filename, 'r') as file:
        task = json.load(file)
    return task

def pad_array(array, target_size=(40, 40)):
    """Pad the input array to the target size with zeros."""
    padded_array = np.zeros(target_size, dtype=int)
    height, width = array.shape
    padded_array[:height, :width] = array
    return padded_array

# Load the first 15 tasks from the training folder
tasks = []
for task_file in sorted(training_path.glob('*.json'))[:10]:
    task = load_arc_task(task_file)
    tasks.append(task)

# Pad each array in each task to 40x40 size and store them in NumPy arrays
X_train, Y_train, X_test, Y_test = [], [], [], []
for task in tasks:
    for example in task['train']:
        X_train.append(pad_array(np.array(example['input']), target_size=(30,30)))
        Y_train.append(pad_array(np.array(example['output']), target_size=(30,30)))

    for example in task['test']:
        X_test.append(pad_array(np.array(example['input']), target_size=(30,30)))
        Y_test.append(pad_array(np.array(example['output']), target_size=(30,30)))

X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test = np.array(X_test)
Y_test = np.array(Y_test)

In [None]:
X_train.shape

(32, 30, 30)

In [None]:
X_train[0]

array([[0, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

SPATIAL OCCUPANCY


In [None]:
import numpy as np

def spatial_occupancy(X_train):
  """
  This function calculates the spatial occupancy of each 2D slice in a 3D numpy array.

  Args:
      X_train: A 3D numpy array.

  Returns:
      A list of tuples, where each tuple contains a 2D slice from X_train and its corresponding spatial occupancy (percentage of non-zero elements).
      The list is sorted in ascending order of spatial occupancy.
  """
  occupancy_list = []
  for i in range(X_train.shape[0]):
    slice = X_train[i]
    non_zero_count = np.count_nonzero(slice)
    total_elements = slice.size
    occupancy = (non_zero_count / total_elements) * 100
    occupancy_list.append((slice, occupancy))

  # Sort the list by occupancy in ascending order
  occupancy_list.sort(key=lambda x: x[1])

  return occupancy_list

occupancy_list = spatial_occupancy(X_train)

for slice, occupancy in occupancy_list:
  print(f"Slice:\n{slice}\nOccupancy: {occupancy:.2f}%")




Slice:
[[4 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

PATTERN REPETITION


In [None]:
import numpy as np

def find_submatrix_repetitions(X_train):
  """
  Identifies all repeating sub-matrices within each 2D slice of a 3D NumPy array.

  Args:
      X_train (np.ndarray): The 3D array to search for repeating sub-matrices.

  Returns:
      list: A list of dictionaries for each 2D slice, where each dictionary contains:
          - slice_index (int): The index of the 2D slice in X_train.
          - submatrices (list): A list of dictionaries representing repeating sub-matrices:
              - submatrix (np.ndarray): The actual sub-matrix data.
              - positions (list): A list of tuples representing top-left corner coordinates
                                   of each occurrence (row, col).
              - repetition_count (int): The number of times the submatrix is repeated.
  """

  repetitions = []
  for slice_index in range(X_train.shape[0]):
    slice = X_train[slice_index]

    # Efficiently process sub-matrices in batches (reduce memory usage)
    batch_size = 10  # Adjust batch size based on Colab memory constraints
    submatrices = {}

    for row_start in range(0, slice.shape[0], batch_size):
      for col_start in range(0, slice.shape[1], batch_size):
        # Process sub-matrices within the current batch
        for row in range(row_start, min(row_start + batch_size, slice.shape[0])):
          for col in range(col_start, min(col_start + batch_size, slice.shape[1])):
            for sub_row in range(row, slice.shape[0]):
              for sub_col in range(col, slice.shape[1]):
                submatrix = slice[row:sub_row+1, col:sub_col+1]
                if np.all(submatrix == submatrix[0, 0]):  # Check if submatrix is identical
                  key = str(submatrix)  # Use submatrix data as dictionary key
                  if key not in submatrices:
                    submatrices[key] = {
                        "submatrix": submatrix,
                        "positions": [],
                        "repetition_count": 0
                    }
                  submatrices[key]["positions"].append((row, col))
                  submatrices[key]["repetition_count"] += 1

    # Extract and format information from submatrices dictionary
    slice_data = {"slice_index": slice_index, "submatrices": []}
    for submatrix_key, submatrix_info in submatrices.items():
      slice_data["submatrices"].append({
          "submatrix": submatrix_info["submatrix"],
          "positions": submatrix_info["positions"],
          "repetition_count": submatrix_info["repetition_count"]
      })

    repetitions.append(slice_data)

  return repetitions

# Example usage (reduce the number of files or matrix size for Colab testing)
X_train = np.random.randint(0, 10, size=(5, 10, 10))  # Sample 3D array (reduced size)

repetitions = find_submatrix_repetitions(X_train)
print(repetitions)



[{'slice_index': 0, 'submatrices': [{'submatrix': array([[4]]), 'positions': [(0, 0), (0, 4), (2, 5), (2, 9), (4, 4), (4, 7), (5, 3), (5, 4), (6, 4), (7, 1), (7, 4), (7, 5), (9, 1), (9, 9)], 'repetition_count': 14}, {'submatrix': array([[2]]), 'positions': [(0, 1), (0, 5), (0, 7), (1, 9), (3, 1), (3, 5), (3, 6), (4, 1), (4, 6), (5, 6), (6, 9), (8, 7)], 'repetition_count': 12}, {'submatrix': array([[1]]), 'positions': [(0, 2), (1, 2), (1, 8), (2, 0), (4, 9), (5, 5), (5, 9), (6, 7), (6, 8), (7, 2), (7, 9)], 'repetition_count': 11}, {'submatrix': array([[1],
       [1]]), 'positions': [(0, 2), (4, 9)], 'repetition_count': 2}, {'submatrix': array([[9]]), 'positions': [(0, 3), (1, 0), (4, 3), (5, 0), (5, 7), (7, 8)], 'repetition_count': 6}, {'submatrix': array([[3]]), 'positions': [(0, 6), (1, 4), (3, 0), (3, 2), (3, 9), (4, 2), (4, 8), (6, 3), (8, 0), (9, 3), (9, 6)], 'repetition_count': 11}, {'submatrix': array([[8]]), 'positions': [(0, 8), (1, 3), (2, 7), (3, 7), (7, 3), (8, 2), (8, 5), 

NO OF COLORS


In [None]:
import numpy as np

def analyze_colors(X_train):
  """
  Analyzes the number of unique values (colors) from 1 to 9 in each 2D slice of a 3D NumPy array.

  Args:
      X_train: A 3D NumPy array representing the input data.

  Returns:
      A dictionary where keys are the number of unique values (colors) and
      values are lists of 2D arrays containing those colors.
  """
  color_counts = {}
  for image in X_train:
    # Count the number of unique values from 1 to 9 (excluding 0).
    unique_values = np.unique(image[image != 0])
    num_colors = len(unique_values)
    if num_colors not in color_counts:
      color_counts[num_colors] = []
    color_counts[num_colors].append(image)

  # Sort the dictionary by the number of colors (ascending order).
  return dict(sorted(color_counts.items()))


color_analysis = analyze_colors(X_train)

# Access images based on color count
for num_colors, images in color_analysis.items():
  print(f"{num_colors} colors:")
  for image in images:
    print(image)
  print()








9 colors:
[[4 2 1 9 4 2 3 2 8 5]
 [9 0 1 8 3 6 5 0 1 2]
 [1 5 5 6 6 4 0 8 0 4]
 [3 2 3 7 5 2 2 8 7 3]
 [6 2 3 9 4 6 2 4 3 1]
 [9 5 5 4 4 1 2 9 5 1]
 [7 6 0 3 4 6 5 1 1 2]
 [0 4 1 8 4 4 6 7 9 1]
 [3 0 8 0 0 8 7 2 8 7]
 [6 4 6 3 0 5 3 6 5 4]]
[[0 5 5 4 6 7 8 2 9 3]
 [3 8 6 1 8 5 6 7 2 5]
 [4 8 4 5 8 5 6 1 1 6]
 [6 6 5 9 5 6 2 9 0 2]
 [4 4 0 8 9 2 7 1 8 5]
 [7 2 9 7 4 0 7 7 3 0]
 [3 5 2 3 0 3 7 2 2 9]
 [3 7 8 1 6 8 7 8 5 3]
 [6 4 5 2 3 7 4 0 9 0]
 [4 2 4 9 4 8 5 1 7 1]]
[[4 3 6 8 8 7 9 0 9 4]
 [1 9 0 3 9 2 1 7 6 9]
 [3 7 3 2 7 1 0 3 5 0]
 [9 2 2 4 8 2 3 5 1 8]
 [8 6 9 5 9 1 7 4 7 0]
 [8 4 9 5 6 2 6 4 7 1]
 [0 7 0 6 1 6 0 7 8 7]
 [6 4 7 9 2 3 8 2 0 1]
 [3 1 5 8 6 2 5 9 3 0]
 [2 5 6 6 0 8 7 9 0 1]]
[[8 6 2 2 3 0 4 3 1 6]
 [8 7 7 7 1 5 6 6 2 8]
 [8 6 9 7 8 7 9 6 0 5]
 [4 4 4 5 7 5 5 4 1 9]
 [8 5 9 3 0 6 2 2 9 0]
 [3 8 0 3 2 2 1 9 3 4]
 [4 8 0 3 5 5 4 4 0 5]
 [5 3 7 1 9 7 3 0 1 7]
 [8 9 7 4 7 9 1 3 9 2]
 [2 9 5 8 8 0 5 5 3 6]]
[[9 2 1 2 1 0 9 6 2 2]
 [8 2 3 3 0 0 6 8 4 5]
 [0 6 0 8 9 1 3 2 0 

In [None]:
import numpy as np


def reorder_dataset(x_train, occupancy, num_colors, pattern_repetition=None):
  """
  Reorders a 3D NumPy array `x_train` based on custom criteria.

  Args:
      x_train: The 3D NumPy array to reorder.
      occupancy: A 1D NumPy array containing spatial occupancy values for each array in x_train.
      num_colors: A 1D NumPy array containing the number of colors for each array in x_train.
      pattern_repetition (optional): A 1D NumPy array containing pattern repetition values for each array in x_train.

  Returns:
      A tuple containing the reordered `x_train` array and the corresponding indices for sorting.
  """

  sorting_array = np.stack((occupancy, num_colors), axis=-1)
  sorted_indices = np.argsort(sorting_array, axis=0)
  print(f"Shape of X_train: {X_train.shape}")
  print(f"Shape of occupancy: {occupancy.shape}")
  print(f"Shape of num_colors: {num_colors.shape}")
  print(f"Shape of pattern_repetition (if used): {pattern_repetition.shape}")

  if X_train.shape[0] == 0:
    print("X_train is empty. Please provide data for sorting.")
    # Handle the empty array case (e.g., return an empty array or raise an exception)
  else:
    # Proceed with the function call
    reordered_x_train, sorted_indices = reorder_dataset(X_train, occupancy, num_colors, pattern_repetition)


  reordered_x_train = x_train[sorted_indices]

  return reordered_x_train, sorted_indices


occupancy = np.random.randint(1, 100, 10)  # Sample spatial occupancy values
num_colors = np.random.randint(1, 10, 10)  # Sample number of colors for each array
pattern_repetition = np.random.randint(1, 5, 10)  # Sample pattern repetition values

# Assuming you have your actual 3D NumPy array stored in a variable named X_train
reordered_x_train, sorted_indices = reorder_dataset(X_train, occupancy, num_colors, pattern_repetition)

# Now you can use the reordered_x_train for further processing
print(reordered_x_train)


Shape of X_train: (5, 10, 10)
Shape of occupancy: (10,)
Shape of num_colors: (10,)
Shape of pattern_repetition (if used): (10,)
Shape of X_train: (5, 10, 10)
Shape of occupancy: (10,)
Shape of num_colors: (10,)
Shape of pattern_repetition (if used): (10,)
Shape of X_train: (5, 10, 10)
Shape of occupancy: (10,)
Shape of num_colors: (10,)
Shape of pattern_repetition (if used): (10,)
Shape of X_train: (5, 10, 10)
Shape of occupancy: (10,)
Shape of num_colors: (10,)
Shape of pattern_repetition (if used): (10,)
Shape of X_train: (5, 10, 10)
Shape of occupancy: (10,)
Shape of num_colors: (10,)
Shape of pattern_repetition (if used): (10,)
Shape of X_train: (5, 10, 10)
Shape of occupancy: (10,)
Shape of num_colors: (10,)
Shape of pattern_repetition (if used): (10,)
Shape of X_train: (5, 10, 10)
Shape of occupancy: (10,)
Shape of num_colors: (10,)
Shape of pattern_repetition (if used): (10,)
Shape of X_train: (5, 10, 10)
Shape of occupancy: (10,)
Shape of num_colors: (10,)
Shape of pattern_repe

RecursionError: maximum recursion depth exceeded while calling a Python object