In [1]:
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
import torch.onnx
import pandas as pd
import os
import json

In [2]:
# Constants
DATA_DIR = 'block_dataset'
CHUNK_WIDTH = 16
CHUNK_HEIGHT = 256
CHUNK_DEPTH = 16

In [3]:
# Collect all CSVs
csv_files = [os.path.join(DATA_DIR, f) for f in os.listdir(DATA_DIR) if f.endswith('.csv')]

In [4]:
# Categorical fields to encode
categorical_fields = [
    'Block_ID',
    'Block_to_Left',
    'Block_to_Right',
    'Block_Below',
    'Block_Above',
    'Block_in_Front',
    'Block_Behind',
    'ChunkBiome',
    'Biome',
]

In [5]:
# Collect unique categories across all files
label_encoders = {field: LabelEncoder() for field in categorical_fields}
all_values = {field: [] for field in categorical_fields}

In [6]:
for file in csv_files:
    df = pd.read_csv(file)
    for field in categorical_fields:
        all_values[field].extend(df[field].dropna().tolist())

In [7]:
# Fit encoders
for field in categorical_fields:
    label_encoders[field].fit(all_values[field])

In [8]:
# Container for chunk tensors
chunk_inputs = []
chunk_outputs = []

In [9]:
feature_keys = [
    'ChunkBiome', 'Biome', 'Block_to_Left', 'Block_to_Right',
    'Block_Below', 'Block_Above', 'Block_in_Front', 'Block_Behind'
]

In [10]:
for file in csv_files:
    df = pd.read_csv(file)

    # Filter out-of-bounds rows early
    df = df[(df['x'] < CHUNK_WIDTH) & (df['y'] < CHUNK_HEIGHT) & (df['z'] < CHUNK_DEPTH)]
    
    # Normalize light level (assuming original range is 0 to 15)
    df['Light_Level'] = df['Light_Level'] / 15.0

    # Encode all label features in bulk
    for key in feature_keys + ['Block_ID']:
        df[key] = label_encoders[key].transform(df[key])

    # Initialize arrays
    chunk_input = np.zeros((CHUNK_WIDTH, CHUNK_HEIGHT, CHUNK_DEPTH, 10), dtype=np.float32)
    chunk_output = np.full((CHUNK_WIDTH, CHUNK_HEIGHT, CHUNK_DEPTH), -1, dtype=np.int64)

    # Use itertuples for speed
    for row in df.itertuples(index=False):
        x, y, z = int(row.x), int(row.y), int(row.z)

        features = [
            row.ChunkBiome,
            row.Biome,
            float(row.Is_Surface),
            float(row.Light_Level),
            row.Block_to_Left,
            row.Block_to_Right,
            row.Block_Below,
            row.Block_Above,
            row.Block_in_Front,
            row.Block_Behind,
        ]
        chunk_input[x, y, z] = features
        chunk_output[x, y, z] = row.Block_ID

    # Convert and permute
    chunk_inputs.append(torch.tensor(chunk_input).permute(3, 0, 1, 2))  # [C, X, Y, Z]
    chunk_outputs.append(torch.tensor(chunk_output))  # [X, Y, Z]

# Final stacked tensors
X = torch.stack(chunk_inputs)  # [N, C, X, Y, Z]
y = torch.stack(chunk_outputs)  # [N, X, Y, Z]

In [11]:
# Shuffle input/output pairs
X, y = shuffle(X, y, random_state=42)

In [12]:
class Terrain3DCNN(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.conv1 = nn.Conv3d(in_channels, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv3d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv3d(64, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv3d(128, 64, kernel_size=3, padding=1)
        self.conv5 = nn.Conv3d(64, num_classes, kernel_size=1)  # output logits for each class

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.conv5(x)  # [N, num_classes, X, Y, Z]
        return x

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [14]:
in_channels = X.shape[1]
num_classes = len(label_encoders['Block_ID'].classes_)
model = Terrain3DCNN(in_channels, num_classes).to(device)

In [15]:
# [N, C, X, Y, Z] and [N, X, Y, Z]
dataset = TensorDataset(X, y)
train_loader = DataLoader(dataset, batch_size=2, shuffle=True)

In [16]:
criterion = nn.CrossEntropyLoss(ignore_index=-1)  # ignore unassigned voxels
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [17]:
num_epochs = 5

model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for batch_X, batch_y in train_loader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)

        optimizer.zero_grad()
        logits = model(batch_X)  # shape: [B, num_classes, X, Y, Z]

        # Reshape for loss: flatten logits and targets
        loss = criterion(
            logits.view(-1, num_classes),
            batch_y.view(-1)
        )

        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {total_loss:.4f}")

Epoch 1/5 - Loss: 592.0417
Epoch 2/5 - Loss: 573.8278
Epoch 3/5 - Loss: 569.9351
Epoch 4/5 - Loss: 568.0407
Epoch 5/5 - Loss: 566.3872


In [18]:
model = Terrain3DCNN(in_channels=10, num_classes=54)  # Use 54 if that was the number of classes during training
model.load_state_dict(torch.load('terrain_model.pth', weights_only=True))  # Avoid the FutureWarning
model.eval()

# Create a dummy input with the same shape your model expects (N, C, X, Y, Z)
dummy_input = torch.randn(1, 10, 16, 256, 16)  # Example: batch size 1, 10 input channels

# Export the model to ONNX format
torch.onnx.export(model, dummy_input, "terrain_model.onnx", verbose=True, input_names=['input'], output_names=['output'])

print("Model has been successfully exported to ONNX format!")

Model has been successfully exported to ONNX format!


In [19]:
# Weights + biases
model = Terrain3DCNN(in_channels=10, num_classes=54)

# Load the model weights
model.load_state_dict(torch.load('terrain_model.pth', weights_only=True))
model.eval()

# Iterate through the model parameters (weights and biases)
for name, param in model.named_parameters():
    if 'weight' in name:
        print(f"Weight - {name}: {param.shape}")
        print(param.data)  # Prints the weight values
    elif 'bias' in name:
        print(f"Bias - {name}: {param.shape}")
        print(param.data)

Weight - conv1.weight: torch.Size([32, 10, 3, 3, 3])
tensor([[[[[-1.3905e-02, -1.0677e-01, -5.2698e-02],
           [-1.1878e-01, -7.0797e-02,  3.1018e-03],
           [-6.2815e-02, -3.3740e-02, -1.3716e-02]],

          [[-1.2379e-01, -8.9295e-02, -8.6296e-03],
           [-3.8084e-02, -7.2617e-02, -2.7700e-02],
           [-7.1916e-02, -1.1034e-01, -9.4524e-02]],

          [[-2.4671e-02, -4.3879e-02, -6.2657e-02],
           [-9.7150e-02, -3.4876e-02, -1.0590e-01],
           [-1.6914e-02, -3.3751e-02, -3.0996e-02]]],


         [[[-6.4871e-02, -8.0717e-02, -1.6083e-02],
           [-2.3584e-02, -6.9940e-03, -2.4390e-02],
           [-2.7653e-02, -2.5416e-02, -1.2911e-02]],

          [[-7.3781e-02, -2.4403e-02, -5.7610e-02],
           [-3.4954e-02, -4.2807e-02, -9.5148e-02],
           [-1.2003e-01, -7.9770e-02, -8.8492e-02]],

          [[-3.0750e-02, -4.8067e-02, -2.9913e-02],
           [-6.4749e-02, -8.5319e-03, -2.0246e-02],
           [-4.9191e-02, -8.7836e-02, -3.4171e-02]]

In [20]:
label_encoders = {field: LabelEncoder() for field in categorical_fields}

In [21]:
label_encoders = {field: LabelEncoder() for field in categorical_fields}
all_values = {field: [] for field in categorical_fields}

# Collect values from each CSV
for file in csv_files:
    df = pd.read_csv(file)
    for field in categorical_fields:
        all_values[field].extend(df[field].dropna().tolist())

# Fit the encoders
for field in categorical_fields:
    label_encoders[field].fit(all_values[field])

# Now check the encoded categories
for field, encoder in label_encoders.items():
    if hasattr(encoder, 'classes_'):
        print(f"\n{field} Encoding:")
        for i, label in enumerate(encoder.classes_):
            print(f'"{label}": {i}')
    else:
        print(f"{field} encoder has not been fitted yet.")


Block_ID Encoding:
"ACACIA_LEAVES": 0
"ACACIA_LOG": 1
"AIR": 2
"AMETHYST_BLOCK": 3
"AMETHYST_CLUSTER": 4
"ANDESITE": 5
"AZALEA": 6
"AZALEA_LEAVES": 7
"AZURE_BLUET": 8
"BAMBOO": 9
"BARREL": 10
"BEDROCK": 11
"BEE_NEST": 12
"BIG_DRIPLEAF": 13
"BIG_DRIPLEAF_STEM": 14
"BIRCH_LEAVES": 15
"BIRCH_LOG": 16
"BLACK_STAINED_GLASS": 17
"BONE_BLOCK": 18
"BRAIN_CORAL": 19
"BRAIN_CORAL_BLOCK": 20
"BRAIN_CORAL_FAN": 21
"BRAIN_CORAL_WALL_FAN": 22
"BROWN_MUSHROOM": 23
"BROWN_MUSHROOM_BLOCK": 24
"BROWN_TERRACOTTA": 25
"BUBBLE_COLUMN": 26
"BUBBLE_CORAL": 27
"BUBBLE_CORAL_BLOCK": 28
"BUBBLE_CORAL_FAN": 29
"BUBBLE_CORAL_WALL_FAN": 30
"BUDDING_AMETHYST": 31
"CALCITE": 32
"CAVE_AIR": 33
"CAVE_VINES": 34
"CAVE_VINES_PLANT": 35
"CHAIN": 36
"CHERRY_LEAVES": 37
"CHERRY_LOG": 38
"CHEST": 39
"CHISELED_TUFF": 40
"CHISELED_TUFF_BRICKS": 41
"CLAY": 42
"COAL_ORE": 43
"COARSE_DIRT": 44
"COBBLESTONE": 45
"COBWEB": 46
"COCOA": 47
"COPPER_BLOCK": 48
"COPPER_ORE": 49
"CORNFLOWER": 50
"DANDELION": 51
"DARK_OAK_LEAVES": 52
"D

In [22]:
idx_to_block = {i: name for i, name in enumerate(label_encoders['Block_ID'].classes_)}
with open("block_id_mapping.json", "w") as f:
    json.dump(idx_to_block, f)

print("Saved block ID mapping to block_id_mapping.json")

Saved block ID mapping to block_id_mapping.json
