In [1]:
from config import cfg
from data import build_dataset, make_data_loader
from torch.utils.data import DataLoader, Dataset, Subset
from tqdm import tqdm
from torch.nn import Conv3d, ConstantPad3d
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
import random

## Predicting categories

Using one-hot representation of categories:
```python
[array(['Arenas', 'Boats', 'Castles', 'Dungeons', 'Entertainement',
        'Floating Islands', 'Flying Machines', 'Games', 'Gardens',
        'Ground Vehicles', 'Houses And Shops', 'Islands', 'Miscellaneous',
        'Pixel Art', 'Redstone', 'Temples', 'Towers', 'Towns', 'Traps'],
       dtype=object)]
```

In [2]:
class AutoEncoder(nn.Module):
    def _conv_layer(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.Conv3d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
            nn.LeakyReLU(inplace=True),
            nn.BatchNorm3d(out_channels),
            nn.MaxPool3d(2),
        )

    
    def _linear_layer(self, in_features, out_features):
        return nn.Sequential(
            nn.Linear(in_features, out_features),
            nn.LeakyReLU(inplace=True),
            nn.BatchNorm1d(out_features)
        )

    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            self._conv_layer(1, 64, 3, 2, 1),
            self._conv_layer(64, 128, 3, 2, 1), # 128, 8, 8, 8
            nn.Flatten(),
            nn.Linear(65536, 128),
            nn.LeakyReLU(inplace=True),
            nn.BatchNorm1d(128),
        )

        self.classifier = nn.Sequential(
            nn.Linear(128, 19),
            nn.Softmax(dim=1)
        )
        
        self.decoder = nn.Sequential(
                    nn.Linear(128, 65536),
                    nn.LeakyReLU(inplace=True),
                    nn.BatchNorm1d(65536),
                    nn.Unflatten(1, (128, 8, 8, 8)),

                    nn.ConvTranspose3d(128, 64, kernel_size=3, stride=3, padding= 1),
                    nn.LeakyReLU(inplace=True),
            
                    nn.ConvTranspose3d(64, 32, kernel_size=3, stride=3, padding=1),
                    nn.LeakyReLU(inplace=True),

                    nn.ConvTranspose3d(32, 1, kernel_size=2, stride=2),
                    nn.LeakyReLU(inplace=True),
                    nn.Linear(128, 128),
                    nn.LeakyReLU(inplace=True),
                    nn.BatchNorm3d(1),
                
                )
        
    def forward(self, x):
        encoded = self.encoder(x)
        y_hat = self.classifier(encoded)
        decoded = self.decoder(encoded)
        return y_hat, decoded


raw = build_dataset(None)
dataset = DataLoader(raw, batch_size=2, shuffle=True)
x = make_data_loader(cfg, is_train=True)

# print(x.shape)
model = AutoEncoder()

for schem_data, metadata in dataset:
    print(metadata)
    print(metadata.shape)
    d = schem_data.unsqueeze(1)
    output, decoded = model(d)
    print(decoded.shape)
    # print(decoded)
    break

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
          0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
          0., 0.]]], dtype=torch.float64)
torch.Size([2, 1, 19])
torch.Size([2, 1, 128, 128, 128])


In [3]:
try:
    model = AutoEncoder()
    model.load_state_dict(torch.load('schematic_autoencoder.pth'))
except Exception as e:
    print(e)
    model = AutoEncoder()

In [4]:
loss_function = nn.MSELoss()
clf_loss_function = nn.CrossEntropyLoss()
optimiser = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# dataset = DataLoader(build_dataset(None), batch_size=4, shuffle=True)
dataset = build_dataset(None)
training_data = Subset(dataset, range(0, 2000))
training_data = DataLoader(training_data, batch_size=25, shuffle=True)

In [11]:
# Training using autoencoder for recreation


num_epochs = 100

for epoch in range(num_epochs):
    train_loss = 0
    for schem_data, target in (pbar := tqdm(training_data)):
        optimiser.zero_grad()
        schem_data = schem_data.unsqueeze(1)

        output, decoded = model(schem_data)
        output = output.type(torch.float64)

        clf_loss = clf_loss_function(output, target.squeeze(1))
        rec_loss = loss_function(decoded, schem_data)

        clf_loss.backward(retain_graph=True)
        rec_loss.backward()
        optimiser.step()
        
        train_loss += rec_loss.item() + clf_loss.item()
        pbar.set_description(f"Epoch {epoch+1}, Training Loss: {train_loss:.6f}")
    train_loss = train_loss/len(training_data)

Epoch 1, Training Loss: 2175.553288: 100%|██████████| 80/80 [14:22<00:00, 10.79s/it]
Epoch 2, Training Loss: 2158.331319: 100%|██████████| 80/80 [14:30<00:00, 10.88s/it]
Epoch 3, Training Loss: 2154.913129: 100%|██████████| 80/80 [14:32<00:00, 10.91s/it]
Epoch 4, Training Loss: 2151.653074: 100%|██████████| 80/80 [14:33<00:00, 10.92s/it]
Epoch 5, Training Loss: 2149.963042: 100%|██████████| 80/80 [14:35<00:00, 10.94s/it]
Epoch 6, Training Loss: 2147.374109: 100%|██████████| 80/80 [14:35<00:00, 10.95s/it]
Epoch 7, Training Loss: 2145.711355: 100%|██████████| 80/80 [14:36<00:00, 10.96s/it]
Epoch 8, Training Loss: 2143.999899: 100%|██████████| 80/80 [14:36<00:00, 10.96s/it]
Epoch 9, Training Loss: 2141.524839: 100%|██████████| 80/80 [14:40<00:00, 11.01s/it]
Epoch 10, Training Loss: 2140.265988: 100%|██████████| 80/80 [14:38<00:00, 10.98s/it]
Epoch 11, Training Loss: 2138.033274: 100%|██████████| 80/80 [14:39<00:00, 10.99s/it]
Epoch 12, Training Loss: 2137.113086: 100%|██████████| 80/80 [1

KeyboardInterrupt: 

In [12]:
PATH = "schematic_autoencoder.pth"
torch.save(model.state_dict(), PATH)

## Analysing the model

All of our categories are one-hot encoded in this order:

In [5]:
CATEGORIES = raw.enc.categories_[0]
CATEGORIES

array(['Arenas', 'Boats', 'Castles', 'Dungeons', 'Entertainement',
       'Floating Islands', 'Flying Machines', 'Games', 'Gardens',
       'Ground Vehicles', 'Houses And Shops', 'Islands', 'Miscellaneous',
       'Pixel Art', 'Redstone', 'Temples', 'Towers', 'Towns', 'Traps'],
      dtype=object)

In [6]:
def get_predicted_category(output):
    return CATEGORIES[output.argmax(dim=1, keepdim=True)]

In [8]:
for i in range(0, 10): 
    schem_data, target = raw[random.randint(0, len(raw))]
    t_ = model.eval()
    y_hat, _ = t_.forward(schem_data.unsqueeze(0).unsqueeze(0))
    # get predicted class from max value in prediction vector
    pred = get_predicted_category(y_hat)
    print(f"Predicted: {pred}, Actual: {CATEGORIES[target.argmax()]}")
     

Predicted: Houses And Shops, Actual: Houses And Shops
Predicted: Houses And Shops, Actual: Houses And Shops
Predicted: Towers, Actual: Towers
Predicted: Miscellaneous, Actual: Houses And Shops
Predicted: Towers, Actual: Houses And Shops
Predicted: Miscellaneous, Actual: Houses And Shops
Predicted: Castles, Actual: Castles
Predicted: Houses And Shops, Actual: Houses And Shops
Predicted: Houses And Shops, Actual: Temples
Predicted: Miscellaneous, Actual: Miscellaneous


In [10]:
# confusion matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns

def get_confusion_matrix(model, dataset):
    y_true = []
    y_pred = []
    for schem_data, target in dataset:
        schem_data = schem_data.unsqueeze(1)
        output, _ = model(schem_data)

        target_idx = target.argmax(dim=2, keepdim=True)
        output_idx = output.argmax(dim=1, keepdim=True).unsqueeze(1)

        print(target_idx.shape, output_idx.shape)
        
        print(f"Target category: {CATEGORIES[target_idx]}, Predicted category: {CATEGORIES[output_idx]}")

        y_true.append(target_idx)
        y_pred.append(output_idx)
        

    y_true = torch.cat(y_true)
    y_pred = torch.cat(y_pred)
    return confusion_matrix(y_true, y_pred)

testing_data = Subset(dataset, range(0, 10))
testing_data = DataLoader(testing_data, batch_size=5, shuffle=True)
confusion_matrix = get_confusion_matrix(model, testing_data)


torch.Size([5, 1, 1]) torch.Size([5, 1, 1])
Target category: [[['Towers']]

 [['Houses And Shops']]

 [['Houses And Shops']]

 [['Miscellaneous']]

 [['Castles']]], Predicted category: [[['Towers']]

 [['Houses And Shops']]

 [['Houses And Shops']]

 [['Miscellaneous']]

 [['Castles']]]
torch.Size([5, 1, 1]) torch.Size([5, 1, 1])
Target category: [[['Houses And Shops']]

 [['Houses And Shops']]

 [['Houses And Shops']]

 [['Houses And Shops']]

 [['Castles']]], Predicted category: [[['Houses And Shops']]

 [['Houses And Shops']]

 [['Houses And Shops']]

 [['Miscellaneous']]

 [['Castles']]]


ValueError: unknown is not supported

In [24]:
model.encoder(schem_data.unsqueeze(0).unsqueeze(0))

tensor([[-7.8455e-01,  1.4531e-02, -7.8232e-01, -3.4905e-03, -6.7346e-01,
         -4.2006e-01, -4.4595e-01, -6.2003e-01, -2.3628e-01, -6.7820e-01,
         -3.0367e-01, -7.3266e-01, -5.2309e-01,  6.7920e-01,  1.7442e-01,
         -6.0633e-01, -5.0935e-02, -6.8992e-01, -4.5689e-01, -7.6883e-01,
         -5.2528e-01, -4.8645e-01, -5.8308e-01, -3.2763e-01,  2.1635e-01,
          2.9517e-01, -3.7397e-01,  2.6488e-01, -1.8199e-01,  8.0500e-01,
         -6.4660e-01, -4.2963e-01, -5.0702e-01, -2.4107e-01,  5.4232e-02,
         -7.4999e-01,  5.5862e-01, -7.0596e-01,  5.8168e-01, -1.3444e-01,
         -1.8748e-01, -1.6629e-01, -3.2080e-01, -6.3765e-01, -4.2578e-01,
         -4.5445e-01, -9.0726e-01, -3.6081e-01, -5.9168e-01,  2.4587e-01,
         -6.4693e-01, -5.3178e-01, -3.4638e-01,  8.7996e-01,  5.7941e-01,
         -4.2231e-01, -1.7770e-01, -5.7971e-01, -6.1181e-01, -1.5868e-01,
          5.7187e-01, -3.8504e-01, -8.0563e-02,  6.8467e-01,  1.0760e-01,
         -3.3119e-01, -3.7146e-01, -1.

In [165]:
from nbtschematic import SchematicFile

sf = SchematicFile(shape=(128, 128, 128))
sf.blocks = np.array(schem_data)
sf.save("test.schematic")

In [None]:
sub = Subset(build_dataset(None), range(100))
sub = DataLoader(sub, batch_size=4, shuffle=True)

In [None]:
output.dtype

torch.float32

In [None]:
class VoxelAutoencoder(nn.Module):
    def __init__(self, embedding_dim):
        super(VoxelAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv3d(1, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv3d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv3d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv3d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv3d(128, 256, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv3d(256, embedding_dim, kernel_size=3, stride=2, padding=1)
        )
        
        self.decoder = nn.Sequential(
            nn.ConvTranspose3d(embedding_dim, 256, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose3d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose3d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose3d(64, 32, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose3d(32, 16, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose3d(16, 1, kernel_size=3, stride=1, padding=1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat, z

def train_voxel_embedding(embedding_dim, num_epochs, batch_size, learning_rate, device):
    # dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    dataloader = DataLoader(Subset(dataset, range(20)), batch_size=batch_size, shuffle=True, pin_memory=True)
    
    model = VoxelAutoencoder(embedding_dim).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()
    
    for epoch in range(num_epochs):
        epoch_loss = 0
        for batch in tqdm(sub):
            batch = batch.unsqueeze(1).float().to(device) / 255.0
            optimizer.zero_grad()
            x_hat, z = model(batch)
            loss = criterion(x_hat, batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * batch.size(0)
            
        epoch_loss /= len(dpretraining an autataset)
        print(f"Epoch {epoch}: Loss={epoch_loss}")
        
    return model

train_voxel_embedding(128, 10, 4, 1e-3, None)

  0%|          | 0/25 [00:00<?, ?it/s]

In [None]:
import torch
import torch.nn as nn

# Define the voxel embedding model
class VoxelEmbedding(nn.Module):
    def __init__(self, num_embeddings, embedding_dim):
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
    
    def forward(self, x):
        # Reshape input to be 1D tensor
        x = x.view(-1)
        # Embed the input
        x = self.embedding(x)
        # Reshape output to be 3D tensor
        x = x.view(-1, 128, 128, 128, -1)
        return x

# Define the training data
train_data = torch.randint(low=0, high=256, size=(1000, 128, 128, 128), dtype=torch.long)

# Initialize the model and optimizer
model = VoxelEmbedding(num_embeddings=256, embedding_dim=128)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Train the model
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(train_data):
        optimizer.zero_grad()
        inputs = data.unsqueeze(0)
        outputs = model(inputs)
        loss = outputs.mean()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1} loss: {running_loss / len(train_data)}")

# Test the model
test_data = torch.randint(low=0, high=256, size=(1, 128, 128, 128), dtype=torch.long)
with torch.no_grad():
    inputs = test_data.unsqueeze(0)
    outputs = model(inputs)
    print(outputs.shape)


: 

: 