<h1>CNN Model</h1>

In [1]:
from pycocotools.coco import COCO
import matplotlib
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as functions
import torch.optim as optim
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
import torchvision
from torchvision import transforms
import re
from scipy import stats

In [2]:
DATADIR = "cocodoom/"
USED_RUNS = ["run1", "run2", "run3"]

dataSplit, TRAIN_RUN = "run-full-train", "run1"

annFile = '{}{}.json'.format(DATADIR,dataSplit)

In [3]:
coco_train = COCO(annFile)

loading annotations into memory...
Done (t=20.40s)
creating index...
index created!


In [4]:
dataSplit, VAL_RUN = "run-full-val", "run2"

annFile = '{}{}.json'.format(DATADIR,dataSplit)

In [5]:
coco_val = COCO(annFile)

loading annotations into memory...
Done (t=27.77s)
creating index...
index created!


In [6]:
dataSplit, TEST_RUN = "run-full-test", "run3"

annFile = '{}{}.json'.format(DATADIR,dataSplit)

In [7]:
coco_test = COCO(annFile)

loading annotations into memory...
Done (t=16.26s)
creating index...
index created!


In [8]:
player_positions = {"run1":{}, "run2":{}, "run3":{}}

for run in USED_RUNS:
    with open(DATADIR+run+"/log.txt", 'r') as log_file:
        for line in log_file:
            if "player" in line:
                line = line.strip()
                tic, stats = line.split("player:")
                x, y, z, angle = stats.split(",")
    
                # Store position in the dictionary
                player_positions[run][int(tic)] = (float(x), float(y), float(z), float(angle))

In [9]:
class DoomMotionDataset(Dataset):
    def __init__(self, coco, run, transform=None):
        self.coco = coco
        self.run = run
        self.img_ids = coco.getImgIds()
        self.transform = transform

    def __len__(self):
        return len(self.img_ids)

    def getSegmentationMask(self, rgb_filename):
        return rgb_filename.replace("rgb", "objects")

    def getDepthMask(self, rgb_filename):
        return rgb_filename.replace("rgb", "depth")

    def color_to_index(self, segmentation_image):
        # Map colors to class indices
        r, g, b = segmentation_image
        pixel_values = r + (g *  2**8) + (b * 2**16)  # From cocodoom documentation, converts to an object id

        class_map = torch.full_like(pixel_values, 3, dtype=torch.long)

        sky = (1 << 23) + 0
        horizontal = (1 << 23) + 1
        vertical = (1 << 23) + 2
        
        class_map[x == sky] = 0
        class_map[x == horizontal] = 1
        class_map[x == vertical] = 2
        return class_map

    def load_image(self, path):
        img = Image.open(path)
        return transforms.ToTensor()(img)

    def __getitem__(self, idx):
        # Load the RGB image
        rgb_filename = self.coco.loadImgs(self.img_ids[idx])[0]['file_name']
        tic = int(rgb_filename.replace(".png", "").split("/")[-1])
        next_tic = tic+1
        previous_tic = tic-1

        player_position = player_positions[self.run][tic]
        if next_tic not in player_positions[self.run]:
            next_player_position = player_position
        else:
            next_player_position = player_positions[self.run][next_tic]
        dx = next_player_position[0] - player_position[0]
        dy = next_player_position[1] - player_position[1]
        dz = next_player_position[2] - player_position[2]
        dangle = np.pi - abs(abs(next_player_position[3] - player_position[3]) - np.pi)
        
        dx_relative = dx * np.cos(2 * np.pi - player_position[3]) + dy * np.cos(player_position[3] - 1/2 * np.pi)
        dy_relative = dx * np.sin(2 * np.pi - player_position[3]) + dy * np.sin(player_position[3] - 1/2 * np.pi)
        next_motion_vector = (dx_relative, dy_relative, dz, dangle)

        if previous_tic not in player_positions[self.run]:
            prev_player_position = player_position
        else :
            prev_player_position = player_positions[self.run][previous_tic]
        dx = player_position[0] - prev_player_position[0]
        dy = player_position[1] - prev_player_position[1]
        dz = player_position[2] - prev_player_position[2]
        dangle = np.pi - abs(abs(player_position[3] - prev_player_position[3]) - np.pi)
        
        dx_relative = dx * np.cos(2 * np.pi - prev_player_position[3]) + dy * np.cos(prev_player_position[3] - 1/2 * np.pi)
        dy_relative = dx * np.sin(2 * np.pi - prev_player_position[3]) + dy * np.sin(prev_player_position[3] - 1/2 * np.pi)
        prev_motion_vector = (dx_relative, dy_relative, dz, dangle)
            
        
        # Load and process the segmentation map
        seg_image = self.load_image(self.getSegmentationMask(DATADIR + rgb_filename))
        seg_class_map = self.color_to_index(seg_image)
        seg_class_one_hot = functions.one_hot(seg_class_map, num_classes=4).to(dtype=torch.float).permute(2, 0, 1)
        #print(seg_class_one_hot.shape)
        #seg_class_mode = self.mode_pooling(seg_class_one_hot, 2)
        #print(seg_class_mode.shape)

        # Load depth map
        depth_mask = self.load_image(self.getDepthMask(DATADIR + rgb_filename))
        #depth_mask_mode = self.mode_pooling(depth_mask, 2)
        #print(depth_mask.shape)

        # Combine depth and segmentation as separate channels
        combined = torch.cat([seg_class_one_hot, depth_mask], dim=0)
        
        # Motion vector (label)
        next_motion_vector = torch.tensor(next_motion_vector, dtype=torch.float32)
        prev_motion_vector = torch.tensor(prev_motion_vector, dtype=torch.float32)
        
        return {"image": combined, "past_motion": prev_motion_vector, "target": next_motion_vector}


In [10]:
class NeuralNetwork(nn.Module):
  def __init__(self, activation_function=functions.relu, device=torch.device("cpu")):
    super(NeuralNetwork, self).__init__()

    # Max pooling like this should preserve all of the present classes in a 2x2 window into one vector.
    self.pool = nn.MaxPool2d(kernel_size=(2, 2)).to(device)
      
    self.efficient_net = torchvision.models.efficientnet_b0().to(device)

    self.efficient_net.features[0][0] = nn.Conv2d(5, 32, kernel_size=3, stride=2, padding=1, bias=False).to(device)
    num_features = self.efficient_net.classifier[1].in_features
    self.efficient_net.classifier[1] = nn.Identity()

    self.motion_embedding = nn.Linear(4, num_features).to(device)

    self.classification = nn.Linear(num_features*2, 4).to(device)

  def forward(self, image_data, prev_motion):
    x = self.pool(image_data)
    x = self.efficient_net(x)
    y = self.motion_embedding(prev_motion)
    x = torch.cat((x, y), dim=1)
    x = self.classification(x)
    return x

In [13]:
torch.cuda.empty_cache()
batch_size = 64
learning_rate = 1e-3
num_epochs = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("cuda" if torch.cuda.is_available() else "cpu")
model = NeuralNetwork(device=device).to(device)

train_dataset = DoomMotionDataset(coco_train, TRAIN_RUN)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

val_dataset = DoomMotionDataset(coco_val, VAL_RUN)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Loss function and optimizer
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
    for batch_idx, batch in enumerate(progress_bar):
        image, past_motion, targets = batch["image"], batch["past_motion"], batch["target"]
        image, past_motion, targets = image.to(device), past_motion.to(device), targets.to(device)

        optimizer.zero_grad()

        outputs = model(image, past_motion)
    
        loss = criterion(outputs, targets)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()

        progress_bar.set_postfix({
            "batch_loss": loss.item(),
            "batch_index": batch_idx + 1,
            "batch_size": image.size(0)
        })

    # Average loss per epoch
    epoch_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

    model_weight_path = f"image_motion_cnn_{epoch}epoch.pth"
    torch.save(model.state_dict(), model_weight_path)

    # model.eval()  # Set the model to evaluation mode
    # running_loss = 0.0
    
    
    # progress_bar = tqdm(val_loader, desc="Validation", unit="batch")
    
    # with torch.no_grad():  # Disable gradient calculations for evaluation
    #     for batch_idx, (inputs, targets) in enumerate(progress_bar):
    #         inputs, targets = inputs.to(device), targets.to(device)
            
    #         outputs = model(inputs)
    #         loss = criterion(outputs, targets)
            
    #         running_loss += loss.item()
            
    #         progress_bar.set_postfix({
    #             "batch_loss": loss.item(),
    #             "batch_index": batch_idx + 1,
    #             "batch_size": inputs.size(0)
    #         })
    
    # # Average loss over all batches
    # val_loss = running_loss / len(val_loader)
    # print(f"Val Loss: {val_loss:.4f}")
 
# Save the trained model
torch.save(model.state_dict(), "image_motion_cnn.pth")

cuda



poch 1/20: 100%|█████████████| 3964/3964 [1:32:24<00:00,  1.40s/batch, batch_loss=49, batch_index=3964, batch_size=28]

Epoch 1, Loss: 2110.1263



poch 2/20: 100%|███████████| 3964/3964 [1:37:24<00:00,  1.47s/batch, batch_loss=44.8, batch_index=3964, batch_size=28]

Epoch 2, Loss: 766.9389



poch 3/20: 100%|███████████| 3964/3964 [1:35:47<00:00,  1.45s/batch, batch_loss=44.4, batch_index=3964, batch_size=28]

Epoch 3, Loss: 632.9967



poch 4/20: 100%|███████████| 3964/3964 [1:35:39<00:00,  1.45s/batch, batch_loss=55.2, batch_index=3964, batch_size=28]

Epoch 4, Loss: 544.3366



poch 5/20: 100%|███████████| 3964/3964 [1:35:11<00:00,  1.44s/batch, batch_loss=54.6, batch_index=3964, batch_size=28]

Epoch 5, Loss: 515.3879



poch 6/20: 100%|███████████| 3964/3964 [1:34:22<00:00,  1.43s/batch, batch_loss=49.3, batch_index=3964, batch_size=28]

Epoch 6, Loss: 508.9930



poch 7/20: 100%|███████████| 3964/3964 [1:34:10<00:00,  1.43s/batch, batch_loss=56.1, batch_index=3964, batch_size=28]

Epoch 7, Loss: 508.6478



poch 8/20: 100%|███████████| 3964/3964 [1:34:30<00:00,  1.43s/batch, batch_loss=56.5, batch_index=3964, batch_size=28]

Epoch 8, Loss: 506.2129



poch 9/20: 100%|███████████| 3964/3964 [1:31:15<00:00,  1.38s/batch, batch_loss=54.2, batch_index=3964, batch_size=28]

Epoch 9, Loss: 510.8113



poch 10/20: 100%|██████████| 3964/3964 [1:28:23<00:00,  1.34s/batch, batch_loss=56.8, batch_index=3964, batch_size=28]

Epoch 10, Loss: 506.9681



poch 11/20: 100%|██████████| 3964/3964 [1:27:41<00:00,  1.33s/batch, batch_loss=55.8, batch_index=3964, batch_size=28]

Epoch 11, Loss: 507.7137



poch 12/20: 100%|██████████| 3964/3964 [1:29:56<00:00,  1.36s/batch, batch_loss=61.2, batch_index=3964, batch_size=28]

Epoch 12, Loss: 510.1210



poch 13/20: 100%|████████████| 3964/3964 [1:28:34<00:00,  1.34s/batch, batch_loss=51, batch_index=3964, batch_size=28]

Epoch 13, Loss: 508.5548



poch 14/20: 100%|██████████| 3964/3964 [1:28:19<00:00,  1.34s/batch, batch_loss=47.7, batch_index=3964, batch_size=28]

Epoch 14, Loss: 508.4461



poch 15/20: 100%|██████████| 3964/3964 [1:29:26<00:00,  1.35s/batch, batch_loss=50.5, batch_index=3964, batch_size=28]

Epoch 15, Loss: 508.9400



poch 16/20: 100%|██████████| 3964/3964 [1:30:08<00:00,  1.36s/batch, batch_loss=46.6, batch_index=3964, batch_size=28]

Epoch 16, Loss: 507.3291



poch 17/20: 100%|██████████| 3964/3964 [1:29:57<00:00,  1.36s/batch, batch_loss=40.7, batch_index=3964, batch_size=28]

Epoch 17, Loss: 506.9990



poch 18/20: 100%|██████████| 3964/3964 [1:29:19<00:00,  1.35s/batch, batch_loss=46.1, batch_index=3964, batch_size=28]

Epoch 18, Loss: 508.8963



poch 19/20: 100%|██████████| 3964/3964 [1:30:59<00:00,  1.38s/batch, batch_loss=36.8, batch_index=3964, batch_size=28]

Epoch 19, Loss: 507.8594


Epoch 20/20: 100%|██████████| 3964/3964 [1:31:41<00:00,  1.39s/batch, batch_loss=38.7, batch_index=3964, batch_size=28]

Epoch 20, Loss: 509.2219





In [14]:
# device = (torch.device("cuda" if torch.cuda.is_available() else "cpu"))
# criterion = torch.nn.MSELoss()
# model = NeuralNetwork(device="cuda").to(device)
# model.load_state_dict(torch.load("image_motion_cnn.pth", weights_only=True))

test_dataset = DoomMotionDataset(coco_test, TEST_RUN)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

model.eval()  # Set the model to evaluation mode
running_loss = 0.0


progress_bar = tqdm(test_loader, desc="Testing", unit="batch")

with torch.no_grad():  # Disable gradient calculations for evaluation
    for batch_idx, batch in enumerate(progress_bar):
        image, past_motion, targets = batch['image'], batch['past_motion'], batch['target']
        image, past_motion, targets = image.to(device), past_motion.to(device), targets.to(device)
        
        outputs = model(image, past_motion)
        loss = criterion(outputs, targets)
        
        running_loss += loss.item()
        
        progress_bar.set_postfix({
            "batch_loss": loss.item(),
            "batch_index": batch_idx + 1,
            "batch_size": image.size(0)
        })

# Average loss over all batches
test_loss = running_loss / len(test_loader)
print(f"Test Loss: {test_loss:.4f}")

Testing: 100%|████████████████| 1846/1846 [41:01<00:00,  1.33s/batch, batch_loss=1.74, batch_index=1846, batch_size=58]

Test Loss: 776.4066





In [14]:
model.eval()  # Set the model to evaluation mode
running_loss = 0.0

progress_bar = tqdm(val_loader, desc="Validation", unit="batch")

with torch.no_grad():  # Disable gradient calculations for evaluation
    for batch_idx, batch in enumerate(progress_bar):
        image, past_motion, targets = batch['image'], batch['past_motion'], batch['target']
        image, past_motion, targets = image.to(device), past_motion.to(device), targets.to(device)
        
        outputs = model(image, past_motion)
        loss = criterion(outputs, targets)
        
        running_loss += loss.item()
        
        progress_bar.set_postfix({
            "batch_loss": loss.item(),
            "batch_index": batch_idx + 1,
            "batch_size": image.size(0)
        })

# Average loss over all batches
val_loss = running_loss / len(val_loader)
print(f"Val Loss: {val_loss:.4f}")

NameError: name 'batch_size' is not defined

In [16]:
device = (torch.device("cuda" if torch.cuda.is_available() else "cpu"))
model = NeuralNetwork(device="cuda").to(device)
model.load_state_dict(torch.load("image_motion_cnn_8epoch.pth", weights_only=True))

model.eval()  # Set the model to evaluation mode
running_loss = 0.0

batch_size = 64

val_dataset = DoomMotionDataset(coco_val, VAL_RUN)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
criterion = torch.nn.MSELoss()

progress_bar = tqdm(val_loader, desc="Validation", unit="batch")

with torch.no_grad():  # Disable gradient calculations for evaluation
    for batch_idx, batch in enumerate(progress_bar):
        image, past_motion, targets = batch['image'], batch['past_motion'], batch['target']
        image, past_motion, targets = image.to(device), past_motion.to(device), targets.to(device)
        
        outputs = model(image, past_motion)
        loss = criterion(outputs, targets)
        
        running_loss += loss.item()
        
        progress_bar.set_postfix({
            "batch_loss": loss.item(),
            "batch_index": batch_idx + 1,
            "batch_size": image.size(0)
        })

# Average loss over all batches
val_loss = running_loss / len(val_loader)
print(f"Val Loss: {val_loss:.4f}")

Validation: 100%|███████████| 2972/2972 [1:08:33<00:00,  1.38s/batch, batch_loss=4.48, batch_index=2972, batch_size=46]

Val Loss: 636.3955





In [17]:
test_dataset = DoomMotionDataset(coco_test, TEST_RUN)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

model.eval()  # Set the model to evaluation mode
running_loss = 0.0


progress_bar = tqdm(test_loader, desc="Testing", unit="batch")

with torch.no_grad():  # Disable gradient calculations for evaluation
    for batch_idx, batch in enumerate(progress_bar):
        image, past_motion, targets = batch['image'], batch['past_motion'], batch['target']
        image, past_motion, targets = image.to(device), past_motion.to(device), targets.to(device)
        
        outputs = model(image, past_motion)
        loss = criterion(outputs, targets)
        
        running_loss += loss.item()
        
        progress_bar.set_postfix({
            "batch_loss": loss.item(),
            "batch_index": batch_idx + 1,
            "batch_size": image.size(0)
        })

# Average loss over all batches
test_loss = running_loss / len(test_loader)
print(f"Test Loss: {test_loss:.4f}")

Testing: 100%|████████████████| 1846/1846 [42:28<00:00,  1.38s/batch, batch_loss=4.41, batch_index=1846, batch_size=58]

Test Loss: 772.8054



