In [120]:
import json
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim


In [121]:
training_challenge_dict = json.load(open('../data/RAW_DATA_DIR/arc-prize-2024/arc-agi_training_challenges.json'))
training_solutions_dict = json.load(open('../data/RAW_DATA_DIR/arc-prize-2024/arc-agi_training_solutions.json'))

In [122]:
def create_input_output_pairs(training_challenge_dict, training_solutions_dict):
    input_output_pairs = []
    
    for challenge_id, challenge in training_challenge_dict.items():
        train = challenge['train']
        test = challenge['test']

        # find how many examples there are in the train
        num_train_examples = len(train)

        for i in range(num_train_examples):
            input_output_pairs.append({
                'input': train[i]['input'],
                'output': train[i]['output'],
                'challenge_id': challenge_id,
            })

        test_input = test[0]['input']
        test_output = training_solutions_dict[challenge_id][0]
        input_output_pairs.append({
            'input': test_input,
            'output': test_output,
            'challenge_id': challenge_id,
        })
        
    return input_output_pairs

input_output_pairs = create_input_output_pairs(training_challenge_dict, training_solutions_dict)

In [123]:
def create_dataset(training_challenge_dict, training_solutions_dict):
    dict = {
        'input': [],
        'output': [],
        'challenge_id': []
    }

    for challenge_id, challenge in training_challenge_dict.items():
        train = challenge['train']
        test = challenge['test']

        for i in range(len(train)):
            dict['input'].append(torch.tensor(train[i]['input'], dtype=torch.float32).unsqueeze(0).unsqueeze(0))
            dict['output'].append(torch.tensor(train[i]['output'], dtype=torch.float32).unsqueeze(0).unsqueeze(0))
            dict['challenge_id'].append(challenge_id)

        dict['input'].append(torch.tensor(test[0]['input'], dtype=torch.float32).unsqueeze(0).unsqueeze(0))
        dict['output'].append(torch.tensor(training_solutions_dict[challenge_id][0], dtype=torch.float32).unsqueeze(0).unsqueeze(0))
        dict['challenge_id'].append(challenge_id)

    return pd.DataFrame(dict)

dataset = create_dataset(training_challenge_dict, training_solutions_dict)

In [124]:
# Define the CNN model
class CNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(CNN, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim

        k1, p1 = 3, 1
        k2, p2 = 3, 1
        k3, p3 = 3, 1

        self.conv1 = nn.Conv2d(1, 32, kernel_size=k1, padding=p1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=k2, padding=p2)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=k3, padding=p3)
        self.pool = nn.MaxPool2d(2, 2)
        
        # Calculate the size of the feature maps after convolutions and pooling
        h1 = input_dim[2] + 2*p1 - (k1-1)
        w1 = input_dim[3] + 2*p1 - (k1-1)
        h2 = h1 + 2*p2 - (k2-1)
        w2 = w1 + 2*p2 - (k2-1)
        h3 = h2 + 2*p3 - (k3-1)
        w3 = w2 + 2*p3 - (k3-1)

        # calculate the output size of the conv layers
        conv_output_size = h3 * w3 * 64
        
        self.fc1 = nn.Linear(conv_output_size, 128)
        self.fc2 = nn.Linear(128, output_dim[2] * output_dim[3])

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        #print('x after conv1', x.shape)
        x = torch.relu(self.conv2(x))
        #print('x after conv2', x.shape)
        x = torch.relu(self.conv3(x))
        #print('x after conv3', x.shape)
        x = x.view(x.size(0), -1)
        #print('x after view', x.shape)
        x = torch.relu(self.fc1(x))
        #print('x after fc1', x.shape)
        x = self.fc2(x)
        #print('x after fc2', x.shape)
        return x.view(-1, self.output_dim[2], self.output_dim[3])
    
def train(model, inputs, outputs, num_epochs=300, learning_rate=0.001):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        total_loss = 0
        for i in range(len(inputs)):
            input_tensor = inputs[i]
            output_tensor = outputs[i]
            
            # Forward pass
            prediction = model(input_tensor)
            loss = criterion(prediction, output_tensor)
            
            # Backward pass and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        avg_loss = total_loss / len(inputs)
        if (epoch + 1) % 25 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.4f}')
    
    return model

# Function to make predictions
def predict(model, input_data):
    model.eval()
    with torch.no_grad():
        input_tensor = torch.tensor(input_data, dtype=torch.float32)
        output = model(input_tensor)

    output = output.squeeze().numpy()

    # round the output to the nearest integer
    output = np.round(output).astype(int)
    
    return output

In [125]:
trained_models = {}

# unpack the items of the dataset in a loop
i = 0
while i < len(dataset):

    inputs = [dataset.iloc[i]['input']]
    outputs = [dataset.iloc[i]['output']]

    challenge_id = dataset.iloc[i]['challenge_id']

    k = i+1
    
    while k < len(dataset) and dataset.iloc[k]['challenge_id'] == challenge_id:
        inputs.append(dataset.iloc[k]['input'])
        outputs.append(dataset.iloc[k]['output'])
        k += 1

    model = CNN(inputs[0].shape, outputs[0].shape)
    trained_model = train(model, inputs, outputs)

    correct = 0

    for j in range(len(inputs)):

        # predict the output
        prediction = predict(trained_model, inputs[j])

        correct_output = outputs[j].squeeze().numpy().astype(int)

        if np.array_equal(prediction, correct_output):
            correct += 1

        else:
            print('prediction', prediction)
            print('output', correct_output)

    # Extract the weights of the fc2 layer
    fc2_weights = trained_model.fc2.weight.data
    fc2_weights_numpy = fc2_weights.cpu().numpy()  # Convert to NumPy array if needed

    trained_models[challenge_id] = {
        'fc2_weights': fc2_weights_numpy,
        'accuracy': correct/len(inputs),
        'correct': correct,
        'total': len(inputs),
    }

    i = k + 1

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [25/300], Average Loss: 0.3458
Epoch [50/300], Average Loss: 0.0408
Epoch [75/300], Average Loss: 0.0293
Epoch [100/300], Average Loss: 0.0372
Epoch [125/300], Average Loss: 0.0863
Epoch [150/300], Average Loss: 0.0834
Epoch [175/300], Average Loss: 0.0461
Epoch [200/300], Average Loss: 0.0614
Epoch [225/300], Average Loss: 0.0770
Epoch [250/300], Average Loss: 0.0388
Epoch [275/300], Average Loss: 0.0697
Epoch [300/300], Average Loss: 0.0628
prediction [[0 0 0 0 6 6 0 6 6]
 [0 0 0 6 6 6 6 6 6]
 [0 0 0 0 6 6 0 6 6]
 [0 6 6 0 6 6 0 6 6]
 [6 7 6 6 6 6 6 6 6]
 [0 6 6 0 6 6 0 6 6]
 [0 0 0 0 6 6 0 6 6]
 [0 0 0 6 6 6 6 6 6]
 [0 0 0 0 6 6 0 6 6]]
output [[0 0 0 0 7 7 0 7 7]
 [0 0 0 7 7 7 7 7 7]
 [0 0 0 0 7 7 0 7 7]
 [0 7 7 0 7 7 0 7 7]
 [7 7 7 7 7 7 7 7 7]
 [0 7 7 0 7 7 0 7 7]
 [0 0 0 0 7 7 0 7 7]
 [0 0 0 7 7 7 7 7 7]
 [0 0 0 0 7 7 0 7 7]]
prediction [[7 0 7 0 0 0 7 0 7]
 [7 0 7 0 0 0 7 0 7]
 [7 7 0 0 0 0 7 7 0]
 [7 0 7 0 0 0 7 0 7]
 [7 0 7 0 0 0 7 0 7]
 [7 7 0 0 0 0 7 7 0]
 [7 0 7 7 0 

  input_tensor = torch.tensor(input_data, dtype=torch.float32)
  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x25600 and 6400x128)

In [None]:
trained_models

{'007bbfb7': {'model': CNN(
    (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (fc1): Linear(in_features=576, out_features=128, bias=True)
    (fc2): Linear(in_features=128, out_features=81, bias=True)
  ),
  'fc2_weights': array([[ 0.04028171,  0.1018967 ,  0.00197448, ..., -0.02740249,
          -0.02646479, -0.00860251],
         [-0.06980078,  0.09571728,  0.00734971, ..., -0.05292496,
           0.04024561,  0.06641899],
         [-0.07581151, -0.01930696,  0.03905515, ..., -0.03314939,
           0.02402643, -0.06632405],
         ...,
         [-0.07378314, -0.0701132 ,  0.00507139, ...,  0.03246288,
           0.02805118, -0.04377035],
         [ 0.02790444,  0.04738973, -0.06812587, ..., -0.05374504,
     