In [34]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import json
import os
from glob import glob

class GridDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        input_grid = torch.tensor(item['input'], dtype=torch.float32)
        output_grid = torch.tensor(item['output'], dtype=torch.float32)
        return input_grid, output_grid

def custom_collate(batch):
    inputs = [item[0] for item in batch]
    targets = [item[1] for item in batch]
    return inputs, targets

class AdaptiveGridTransformationCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
        )
        self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(128, 128)
        self.output_conv = nn.Conv2d(128, 1, kernel_size=1)

    def forward(self, x):
        if len(x.shape) == 2:
            x = x.unsqueeze(0).unsqueeze(0)  # Add batch and channel dimensions
        elif len(x.shape) == 3:
            x = x.unsqueeze(1)  # Add channel dimension
        x = self.cnn(x)
        x = self.adaptive_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = x.view(x.size(0), 128, 1, 1)
        x = nn.functional.interpolate(x, size=(x.size(2), x.size(3)), mode='bilinear', align_corners=False)
        x = self.output_conv(x)
        return x.squeeze(1)  # Remove channel dimension

def load_all_puzzles(folder_path):
    all_train_data = []
    all_test_data = []
    for file_path in glob(os.path.join(folder_path, '*.json')):
        with open(file_path, 'r') as f:
            data = json.load(f)
            all_train_data.extend(data['train'])
            all_test_data.extend(data['test'])
    return all_train_data, all_test_data

def analyze_data(data):
    input_shapes = [np.array(item['input']).shape for item in data]
    output_shapes = [np.array(item['output']).shape for item in data]
    
    print(f"Number of samples: {len(data)}")
    print(f"Input shapes - Min: {min(input_shapes)}, Max: {max(input_shapes)}")
    print(f"Output shapes - Min: {min(output_shapes)}, Max: {max(output_shapes)}")

def train_model(model, train_data, test_data, num_epochs=100, lr=0.001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    train_dataset = GridDataset(train_data)
    test_dataset = GridDataset(test_data)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=custom_collate)
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, targets in train_loader:
            inputs = [inp.to(device) for inp in inputs]
            targets = [targ.to(device) for targ in targets]
            optimizer.zero_grad()
            outputs = [model(inp) for inp in inputs]
            losses = []
            for out, targ in zip(outputs, targets):
                if out.shape != targ.shape:
                    out = nn.functional.interpolate(out.unsqueeze(1), size=targ.shape, mode='bilinear', align_corners=False).squeeze(1)
                losses.append(criterion(out, targ))
            loss = sum(losses) / len(losses)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        if (epoch + 1) % 10 == 0 or epoch == 0:
            model.eval()
            test_loss = 0
            with torch.no_grad():
                for inputs, targets in test_loader:
                    inputs = [inp.to(device) for inp in inputs]
                    targets = [targ.to(device) for targ in targets]
                    outputs = [model(inp) for inp in inputs]
                    losses = [criterion(out, targ) for out, targ in zip(outputs, targets)]
                    test_loss += sum(losses).item()
            print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {total_loss/len(train_loader):.4f}, Test Loss: {test_loss/len(test_loader):.4f}")
    
    return model

def predict(model, input_grid):
    model.eval()
    with torch.no_grad():
        input_tensor = torch.tensor(input_grid, dtype=torch.float32)
        output = model(input_tensor)
    return output.squeeze().cpu().numpy()

def run_grid_transformation(folder_path):
    train_data, test_data = load_all_puzzles(folder_path)
    
    print("Analyzing training data:")
    analyze_data(train_data)
    print("\nAnalyzing test data:")
    analyze_data(test_data)
    
    model = AdaptiveGridTransformationCNN()
    print("\nStarting model training...")
    trained_model = train_model(model, train_data, test_data)
    print("Model training completed.")
    
    # Make a prediction on a test sample
    sample_input = test_data[0]['input']
    prediction = predict(trained_model, sample_input)
    print("\nSample prediction:")
    print(f"Input shape: {np.array(sample_input).shape}")
    print(f"Predicted output shape: {prediction.shape}")
    print(f"Actual output shape: {np.array(test_data[0]['output']).shape}")
    
    # Save the trained model
    torch.save(trained_model.state_dict(), 'grid_transformation_model.pth')
    print("\nModel saved as 'grid_transformation_model.pth'")
    
    return trained_model

# To run in a notebook, use:
# folder_path = os.path.join('..', 'data', 'training')
# trained_model = run_grid_transformation(folder_path)

In [35]:
folder_path = os.path.join('..', 'data', 'training')
trained_model = run_grid_transformation(folder_path)

Analyzing training data:
Number of samples: 1302
Input shapes - Min: (1, 5), Max: (30, 30)
Output shapes - Min: (1, 1), Max: (30, 30)

Analyzing test data:
Number of samples: 416
Input shapes - Min: (1, 5), Max: (30, 30)
Output shapes - Min: (1, 1), Max: (30, 30)

Starting model training...
Epoch [1/100], Train Loss: 7.0608, Test Loss: 210.6007
Epoch [10/100], Train Loss: 6.1136, Test Loss: 209.0229
Epoch [20/100], Train Loss: 5.7646, Test Loss: 211.4958
Epoch [30/100], Train Loss: 5.3413, Test Loss: 223.6958
Epoch [40/100], Train Loss: 5.0057, Test Loss: 219.2990
Epoch [50/100], Train Loss: 4.7963, Test Loss: 220.9643
Epoch [60/100], Train Loss: 4.6974, Test Loss: 224.5270
Epoch [70/100], Train Loss: 4.5756, Test Loss: 226.4280
Epoch [80/100], Train Loss: 4.5085, Test Loss: 227.2423
Epoch [90/100], Train Loss: 4.4779, Test Loss: 229.6532
Epoch [100/100], Train Loss: 4.4655, Test Loss: 226.6146
Model training completed.

Sample prediction:
Input shape: (3, 3)
Predicted output shape: ()