In [8]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from skimage.metrics import structural_similarity as ssim
from torchvision.transforms import Normalize

#soham:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from scipy.optimize import linear_sum_assignment


In [9]:
def divide_image(image):
    parts = []
    height, width, _ = image.shape
    part_height = height // 3
    part_width = width // 3

    for i in range(3):
        for j in range(3):
            part = image[i*part_height:(i+1)*part_height, j*part_width:(j+1)*part_width]
            parts.append(part)

    return parts

def generate_combinations(parts, num_combinations):
    combinations = []
    original_positions = []
    indices = list(range(len(parts)))

    for _ in range(num_combinations):
        random.shuffle(indices)
        combination = [parts[i] for i in indices]
        combinations.append(combination)
        original_positions.append(indices.copy())

    return combinations, original_positions
def stitch_shuffled_image(parts):
    num_parts = len(parts)
    part_size = parts[0].shape[0]  # Assuming all parts are square

    stitched_image_size = int(np.sqrt(num_parts) * part_size)
    stitched_image = np.zeros((stitched_image_size, stitched_image_size, parts[0].shape[2]), dtype=np.uint8)

    for i in range(stitched_image.shape[0] // part_size):
        for j in range(stitched_image.shape[1] // part_size):
            part_index = i * int(stitched_image.shape[0] / part_size) + j
            stitched_image[i*part_size:(i+1)*part_size, j*part_size:(j+1)*part_size] = parts[part_index]

    return stitched_image
# checking if the target sequence is correct
def reconstructed_image(img, non_converted_target_data, test=False):

    sequenced = [0] * 9
    for i in range(0,27,3):

        # stack the 3 channels to get the original image
        blue_channel = img[:,:,i]
        green_channel = img[:,:,i+1]
        red_channel = img[:,:,i+2]

        # stack the 3 channels to get the original image
        tile = np.stack((blue_channel, green_channel, red_channel), axis=2)

        if test:
            sequenced[i // 3] = tile
        else:
            sequenced[non_converted_target_data[i // 3]] = tile

    stiched_img = stitch_shuffled_image(sequenced)
    return stiched_img


In [10]:
class JigsawDataset(Dataset):
    def __init__(self, input_data, target_data, transform=None):
        self.input_data = input_data
        self.target_data = target_data
        self.transform = transform

    def __len__(self):
        return len(self.input_data)

    def __getitem__(self, idx):
        image = self.input_data[idx]
        target = self.target_data[idx]

        # Convert NumPy arrays to float tensors
        image = torch.from_numpy(image).float()
        target = torch.from_numpy(target)

        if self.transform:
            image = self.transform(image)

        return image, target

class JigsawModel(nn.Module):
    def __init__(self):
        super(JigsawModel, self).__init__()
        self.conv1 = nn.Conv2d(27, 64, 3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 10 * 10, 4096)
        self.fc2 = nn.Linear(4096, 1024)
        self.fc3 = nn.Linear(1024, 512)
        self.fc4 = nn.Linear(512, 81)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.reshape(-1, 128 * 10 * 10)  # Replace view with reshape
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x






In [11]:
# # Define the JigsawModel class with batch normalization layers
# class JigsawModel(nn.Module):
#     def __init__(self):
#         super(JigsawModel, self).__init__()
#         self.conv1 = nn.Conv2d(27, 64, 3, padding=1)
#         self.bn1 = nn.BatchNorm2d(64)  # Batch normalization layer
#         self.conv2 = nn.Conv2d(64, 128, 3, padding=1)
#         self.bn2 = nn.BatchNorm2d(128)  # Batch normalization layer
#         self.pool = nn.MaxPool2d(2, 2)
#         self.fc1 = nn.Linear(128 * 10 * 10, 4096)
#         self.fc2 = nn.Linear(4096, 1024)
#         self.fc3 = nn.Linear(1024, 512)
#         self.fc4 = nn.Linear(512, 81)

#     def forward(self, x):
#         x = self.pool(F.relu(self.bn1(self.conv1(x))))
#         x = self.pool(F.relu(self.bn2(self.conv2(x))))
#         x = x.view(-1, 128 * 10 * 10)
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = F.relu(self.fc3(x))
#         x = self.fc4(x)

#         return x


In [12]:
# class BoundaryLoss(nn.Module):
#     def __init__(self):
#         super(BoundaryLoss, self).__init__()

#     def forward(self, outputs, labels):
#         batch_size = outputs.size(0)
#         top_bottom_ssim = torch.zeros(batch_size)
#         left_right_ssim = torch.zeros(batch_size)

#         for i in range(batch_size):
#             # Reshape the outputs and labels to match SSIM function requirements
#             output_img = outputs[i].view(9, 9).unsqueeze(0).unsqueeze(0)
#             label_img = labels[i].view(9, 9).unsqueeze(0).unsqueeze(0)

#             # Compute SSIM
#             top_bottom_ssim[i] = 1 - torch.mean(torch.abs(output_img[0, :, 0, :] - label_img[0, :, -1, :]))
#             left_right_ssim[i] = 1 - torch.mean(torch.abs(output_img[0, 0, :, :] - label_img[0, -1, :, :]))

#         avg_tb_ssim = torch.mean(top_bottom_ssim)
#         avg_lr_ssim = torch.mean(left_right_ssim)

#         loss = avg_tb_ssim + avg_lr_ssim

#         return loss




def hungarian_loss(outputs, labels):
    batch_size = outputs.size(0)
    n = outputs.size(1)

    # Reshape outputs and labels to 2D matrices
    outputs = outputs.view(batch_size, -1)
    labels = labels.view(batch_size, -1)

    # Calculate pairwise distance
    dist = torch.cdist(outputs, labels, p=2)

    # Solve the assignment problem using the Hungarian algorithm
    cost_matrix = dist.cpu().detach().numpy()
    row_ind, col_ind = linear_sum_assignment(cost_matrix)

    # Compute the Hungarian loss
    loss = torch.tensor(cost_matrix[row_ind, col_ind].sum() / batch_size)

    return loss.to(outputs.device)




In [13]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [14]:
# Load and preprocess data
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
images = np.load('/content/drive/MyDrive/tranformed_cavallo.npy')



Mounted at /content/drive


In [15]:
input_data = []
target_data = []
non_converted_target_data = []

for image in images:
    parts = divide_image(image)
    combinations, original_positions = generate_combinations(parts, 10)

    for idx, combination in enumerate(combinations):

        # shape of combination is (9, 40, 40, 3)
        combination = np.array(combination).transpose(0, 3, 1, 2)
        combination = np.concatenate(combination, axis=0).transpose(1, 2, 0)
        input_data.append(combination)

        dummy_target = np.zeros((9, 9), dtype=np.uint8)
        for i in range(9):
            dummy_target[i, original_positions[idx][i]] = 1

        target_data.append(dummy_target.flatten())

In [16]:
# model = JigsawModel()
# boundary_loss_fn = BoundaryLoss()
# adversarial_loss_fn = nn.BCELoss()
# weight_adversarial = 0.5  # Adjust as needed
# weight_boundary = 0.5
# num_epochs = 20
# batch_size = 64
# soham:

model = JigsawModel()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)  # Move model to GPU if available

print(model)
print("Number of trainable parameters:", sum(p.numel() for p in model.parameters() if p.requires_grad))

criterion = hungarian_loss
# optimizer = optim.Adam(model.parameters(), lr=0.0001)
num_epochs = 10
batch_size = 64
print(model)
print("Number of trainable parameters:", sum(p.numel() for p in model.parameters() if p.requires_grad))


# Initialize optimizer
optimizer = optim.Adam(model.parameters(), lr=0.0001)
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)



JigsawModel(
  (conv1): Conv2d(27, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=12800, out_features=4096, bias=True)
  (fc2): Linear(in_features=4096, out_features=1024, bias=True)
  (fc3): Linear(in_features=1024, out_features=512, bias=True)
  (fc4): Linear(in_features=512, out_features=81, bias=True)
)
Number of trainable parameters: 57284049
JigsawModel(
  (conv1): Conv2d(27, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=12800, out_features=4096, bias=True)
  (fc2): Linear(in_features=4096, out_features=1024, bias=True)
  (fc3): Linear(in_features=1024, out_features=512, bias=True)
  (fc4

In [17]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(input_data, target_data, test_size=0.2, random_state=42)




# Apply normalization transform to your dataset
train_dataset = JigsawDataset(X_train, y_train)
test_dataset = JigsawDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)



In [18]:
# Define the weights for the Hungarian and Cross Entropy losses
weight_hungarian = 0.5
weight_ce = 0.5

# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0
    total_correct = 0
    total_samples = 0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs = inputs.permute(0, 3, 1, 2).float().to(device)  # Move inputs to the same device as the model
        labels = labels.to(device)  # Move labels to the same device as the model

        optimizer.zero_grad()

        outputs = model(inputs)

        # Calculate Hungarian loss
        hungarian_loss = your_hungarian_loss_function(outputs, labels)

        # Calculate Cross Entropy loss
        ce_loss = criterion(outputs, labels.float())

        # Combine the losses with weights
        total_loss = weight_hungarian * hungarian_loss + weight_ce * ce_loss

        # Ensure that total loss requires gradients
        total_loss.requires_grad = True

        total_loss.backward()
        optimizer.step()

        running_loss += total_loss.item()
        if i % 100 == 99:
            print('Epoch: %d, Batch: %5d, Loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

        # Calculate accuracy
        _, predicted_indices = torch.max(outputs, 1)  # Get the index with the highest probability
        predicted_labels = torch.zeros_like(labels)
        for i, idx in enumerate(predicted_indices):
            predicted_labels[i, idx] = 1  # Convert index to one-hot encoding

        correct = (predicted_labels == labels).all(dim=1).sum().item()
        total_correct += correct
        total_samples += labels.size(0)

    accuracy = total_correct / total_samples
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss}, Accuracy: {accuracy}')

print('Finished Training')


Epoch: 1, Batch:   100, Loss: 10.301
Epoch: 1, Batch:   200, Loss: 10.367
Epoch: 1, Batch:   300, Loss: 10.275
Epoch [1/10], Loss: 278.5815076828003, Accuracy: 0.0
Epoch: 2, Batch:   100, Loss: 10.285
Epoch: 2, Batch:   200, Loss: 10.309
Epoch: 2, Batch:   300, Loss: 10.332
Epoch [2/10], Loss: 279.86637592315674, Accuracy: 0.0
Epoch: 3, Batch:   100, Loss: 10.305
Epoch: 3, Batch:   200, Loss: 10.318
Epoch: 3, Batch:   300, Loss: 10.336
Epoch [3/10], Loss: 276.4600124359131, Accuracy: 0.0
Epoch: 4, Batch:   100, Loss: 10.319
Epoch: 4, Batch:   200, Loss: 10.299
Epoch: 4, Batch:   300, Loss: 10.332
Epoch [4/10], Loss: 277.9919328689575, Accuracy: 0.0
Epoch: 5, Batch:   100, Loss: 10.275
Epoch: 5, Batch:   200, Loss: 10.334
Epoch: 5, Batch:   300, Loss: 10.339
Epoch [5/10], Loss: 277.6602029800415, Accuracy: 0.0
Epoch: 6, Batch:   100, Loss: 10.332
Epoch: 6, Batch:   200, Loss: 10.323
Epoch: 6, Batch:   300, Loss: 10.287
Epoch [6/10], Loss: 278.2001495361328, Accuracy: 0.0
Epoch: 7, Batch

In [19]:
idx = random.randint(0, len(X_test))
image = X_test[idx]
target = y_test[idx]
inputs = torch.tensor(image).permute(2, 0, 1).unsqueeze(0).float()
outputs = model(inputs)
outputs = outputs.reshape(9, 9)
print(outputs)
converted_target = np.argmax(target.reshape(9, 9), axis=1)
print(converted_target)

tensor([[-1.0691,  1.9661,  1.9562,  0.7019,  1.6677,  0.9236,  0.9957,  0.6199,
          1.1292],
        [ 0.3475,  0.4670, -0.6204, -0.9456,  0.4840, -0.1379, -1.3853, -3.0874,
          0.9183],
        [ 0.3471, -0.5949,  0.0750,  2.9881,  0.9705,  0.0495, -1.4054,  2.6288,
         -0.5224],
        [-1.5066, -1.3019, -0.4854,  0.9044, -0.4017,  0.1754,  1.0671,  1.2370,
          0.1224],
        [-0.0730,  0.7724,  2.2766, -1.2377,  1.5424, -1.4046, -1.5353,  0.7747,
          0.6702],
        [ 0.4426,  1.0194,  2.1990, -0.7269, -0.2335, -1.4189,  0.5581, -1.2809,
          1.2122],
        [ 0.5904,  1.4663,  0.1507,  1.9854,  0.6001,  0.4612,  1.2083,  1.1583,
          0.8806],
        [ 1.2785, -0.0951, -1.5371, -0.0650,  1.8375, -0.0039, -1.5387, -0.9211,
         -0.9900],
        [-0.0524,  1.1667, -0.7166,  2.7564,  0.2570, -0.7949,  0.9241,  1.8795,
          0.3706]], grad_fn=<ViewBackward0>)
[2 0 8 1 6 4 5 7 3]


In [20]:
import os

# Create the directory if it doesn't exist
os.makedirs('saved_models', exist_ok=True)

# Save the model
torch.save(model.state_dict(), 'saved_models/soham_loss.pth')

In [21]:
def recur(sequence, outputs):
    sequence = np.array(sequence)
    for i in range(len(sequence)):
        while np.sum(sequence == sequence[i]) > 1:  # If the current element is a duplicate
            # Find the index in outputs for the current sequence element that is not yet in the updated sequence
            scores = outputs[i]
            sorted_indices = np.argsort(scores)[::-1]  # Indices of scores sorted in descending order
            for idx in sorted_indices:
                if idx not in sequence:
                    sequence[i] = idx
                    break

    return sequence.tolist()

In [23]:
correct = 0
total = 0
per_tile_accuracy = 0
################
model.to("cpu")
model.eval()
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        inputs = inputs.permute(0, 3, 1, 2).float()
        outputs = model(inputs)

        # reshape the output to 9x9 matrix
        outputs = outputs.reshape(-1, 9, 9)
        predicted = torch.argmax(outputs, dim=1)

        # now doing the same for the target
        labels = labels.reshape(-1, 9, 9)
        target = torch.argmax(labels, dim=1)

        # check if the predicted sequence is correct
        for i in range(len(predicted)):
            updated_predicted = recur(predicted[i], outputs[i, : , :].numpy())

            if torch.equal(torch.tensor(updated_predicted), target[i]):
                correct += 1
            total += 1

            per_tile_accuracy += (np.array(updated_predicted) == target[i].numpy()).sum() / 9

print('Accuracy on test images: %d %%' % (100 * correct / total))
print('Per tile accuracy on test images: %d %%' % (100 * per_tile_accuracy / total))

Accuracy on test images: 0 %
Per tile accuracy on test images: 10 %


In [25]:
import os

# Create the directory if it doesn't exist
os.makedirs('saved_models', exist_ok=True)

# Save the model
torch.save(model.state_dict(), 'saved_models/hung_loss.pth')


In [None]:
# idx = random.randint(0, len(X_test))
# image = X_test[idx]
# target = y_test[idx]
# inputs = torch.tensor(image).permute(2, 0, 1).unsqueeze(0).float()
# outputs = model(inputs)
# outputs = outputs.reshape(9, 9)
# print(outputs)
# converted_target = np.argmax(target.reshape(9, 9), axis=1)
# print(converted_target)