In [1]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt

In [23]:
# Convert to PNG

# from PIL import Image
# import os

# input_dir = r'D:\Jupyter\3D Construction\Dataset\redwood-3dscan\data\rgbd_extract\00037\rgb'
# output_dir = r'D:\Jupyter\3D Construction\Dataset\redwood-3dscan\data\rgbd_extract\00037\rgb_png'

# if not os.path.exists(output_dir):
#     os.makedirs(output_dir)

# for filename in os.listdir(input_dir):
#     if filename.lower().endswith('.jpg'):
#         img = Image.open(os.path.join(input_dir, filename))
#         # Convert to PNG
#         png_filename = os.path.splitext(filename)[0] + '.png'
#         img.save(os.path.join(output_dir, png_filename), 'PNG')

In [52]:
import torch

# Define the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Print the device to ensure it's set correctly
print(f"Using device: {device}")

Using device: cuda


In [7]:
# Define the UNet model architecture
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        self.down1 = self.conv_block(1, 64)
        self.down2 = self.conv_block(64, 128)
        self.down3 = self.conv_block(128, 256)
        self.down4 = self.conv_block(256, 512)
        self.down5 = self.conv_block(512, 1024)
        
        self.up1 = self.upconv_block(1024, 512)
        self.up2 = self.upconv_block(1024, 256)
        self.up3 = self.upconv_block(512, 128)
        self.up4 = self.upconv_block(256, 64)
        self.final_depth = nn.Conv2d(128, 1, kernel_size=1)  # Depth map output
        self.final_rgb = nn.Conv2d(128, 3, kernel_size=1)    # RGB image output
    
    def conv_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def upconv_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def forward(self, x):
        d1 = self.down1(x)
        d2 = self.down2(d1)
        d3 = self.down3(d2)
        d4 = self.down4(d3)
        d5 = self.down5(d4)
        
        up1 = self.up1(d5)
        up1 = torch.cat((up1, d4), dim=1)
        up2 = self.up2(up1)
        up2 = torch.cat((up2, d3), dim=1)
        up3 = self.up3(up2)
        up3 = torch.cat((up3, d2), dim=1)
        up4 = self.up4(up3)
        up4 = torch.cat((up4, d1), dim=1)
        
        depth_output = self.final_depth(up4)
        rgb_output = self.final_rgb(up4)
        
        return depth_output, rgb_output

In [54]:
# Dataset class
class ChairDataset(Dataset):
    def __init__(self, edge_dir, depth_dir, rgb_dir, transform=None):
        self.edge_dir = edge_dir
        self.depth_dir = depth_dir
        self.rgb_dir = rgb_dir
        self.transform = transform
        self.edge_files = os.listdir(edge_dir)
        self.valid_files = self._filter_valid_files()
    
    def _filter_valid_files(self):
        valid_files = []
        for filename in self.edge_files:
            edge_path = os.path.join(self.edge_dir, filename)
            depth_path = os.path.join(self.depth_dir, filename)
            rgb_path = os.path.join(self.rgb_dir, filename)
            if os.path.exists(edge_path) and os.path.exists(depth_path) and os.path.exists(rgb_path):
                valid_files.append(filename)
        return valid_files
    
    def __len__(self):
        return len(self.valid_files)
    
    def __getitem__(self, idx):
        filename = self.valid_files[idx]
        edge_path = os.path.join(self.edge_dir, filename)
        depth_path = os.path.join(self.depth_dir, filename)
        rgb_path = os.path.join(self.rgb_dir, filename)
        
        edge = Image.open(edge_path).convert('L')
        depth = Image.open(depth_path).convert('L')
        rgb = Image.open(rgb_path).convert('RGB')
        
        if self.transform:
            edge = self.transform(edge)
            depth = self.transform(depth)
            rgb = self.transform(rgb)
        
        return edge, depth, rgb

In [58]:
import torch
import torch.nn.functional as F

def train_model(model, dataloader, criterion_depth, criterion_rgb, optimizer, num_epochs=10, device='cuda'):
    model.to(device)  # Move the model to the specified device
    model.train()  # Set the model to training mode
    
    for epoch in range(num_epochs):
        running_loss_depth = 0.0
        running_loss_rgb = 0.0
        
        total_samples = 0
        correct_depth_predictions = 0  # Placeholder for depth accuracy
        correct_rgb_predictions = 0    # Placeholder for RGB accuracy
        
        for i, data in enumerate(dataloader, 0):
            inputs, depth_targets, rgb_targets = data
            
            # Move inputs and targets to the same device as the model
            inputs = inputs.to(device)
            depth_targets = depth_targets.to(device)
            rgb_targets = rgb_targets.to(device)
            
            # Forward pass
            depth_outputs, rgb_outputs = model(inputs)
            
            # Print shapes for debugging
            # print(f"Depth Output Shape: {depth_outputs.shape}")
            # print(f"Depth Target Shape: {depth_targets.shape}")
            # print(f"RGB Output Shape: {rgb_outputs.shape}")
            # print(f"RGB Target Shape: {rgb_targets.shape}")
            
            # Resize targets to match the output sizes
            depth_targets_resized = F.interpolate(depth_targets, size=depth_outputs.shape[2:], mode='bilinear', align_corners=False)
            rgb_targets_resized = F.interpolate(rgb_targets, size=rgb_outputs.shape[2:], mode='bilinear', align_corners=False)
            
            # Compute losses
            loss_depth = criterion_depth(depth_outputs, depth_targets_resized)
            loss_rgb = criterion_rgb(rgb_outputs, rgb_targets_resized)
            loss = loss_depth + loss_rgb

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Update loss statistics
            running_loss_depth += loss_depth.item()
            running_loss_rgb += loss_rgb.item()

            # Accuracy (or another metric) calculation
            # Placeholder: For depth and RGB, you may need to define appropriate metrics
            # Here we're just accumulating the losses as an example
            total_samples += inputs.size(0)
            # Example accuracy calculation (needs to be customized based on your problem)
            # For example, if your outputs are probabilities or logits:
            # _, predicted_depth = torch.max(depth_outputs, 1)
            # correct_depth_predictions += (predicted_depth == depth_targets_resized).sum().item()
            # _, predicted_rgb = torch.max(rgb_outputs, 1)
            # correct_rgb_predictions += (predicted_rgb == rgb_targets_resized).sum().item()

        # Print average loss for the epoch
        avg_loss_depth = running_loss_depth / len(dataloader)
        avg_loss_rgb = running_loss_rgb / len(dataloader)
        # Example accuracy (needs to be customized based on actual metrics)
        # accuracy_depth = (correct_depth_predictions / total_samples) * 100
        # accuracy_rgb = (correct_rgb_predictions / total_samples) * 100
        print(f"[Epoch {epoch + 1}] Avg Loss Depth: {avg_loss_depth:.3f}, Avg Loss RGB: {avg_loss_rgb:.3f}")
        # print(f"Avg Accuracy Depth: {accuracy_depth:.2f}%")
        # print(f"Avg Accuracy RGB: {accuracy_rgb:.2f}%")

    print('Finished Training')

In [59]:
transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor()
    ])
    
edge_dir = r'D:\Jupyter\3D Construction\Dataset\redwood-3dscan\data\rgbd_extract\00037\edge'
depth_dir = r'D:\Jupyter\3D Construction\Dataset\redwood-3dscan\data\rgbd_extract\00037\depth'
rgb_dir = r'D:\Jupyter\3D Construction\Dataset\redwood-3dscan\data\rgbd_extract\00037\rgb_png'

dataset = ChairDataset(edge_dir, depth_dir, rgb_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

model = UNet().cuda()
criterion_depth = nn.MSELoss()
criterion_rgb = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [60]:
model = train_model(model, dataloader, criterion_depth, criterion_rgb, optimizer, num_epochs=10)

[Epoch 1] Avg Loss Depth: 0.045, Avg Loss RGB: 0.037
[Epoch 2] Avg Loss Depth: 0.025, Avg Loss RGB: 0.013
[Epoch 3] Avg Loss Depth: 0.018, Avg Loss RGB: 0.008
[Epoch 4] Avg Loss Depth: 0.015, Avg Loss RGB: 0.006
[Epoch 5] Avg Loss Depth: 0.012, Avg Loss RGB: 0.005
[Epoch 6] Avg Loss Depth: 0.010, Avg Loss RGB: 0.004
[Epoch 7] Avg Loss Depth: 0.009, Avg Loss RGB: 0.004
[Epoch 8] Avg Loss Depth: 0.008, Avg Loss RGB: 0.003
[Epoch 9] Avg Loss Depth: 0.007, Avg Loss RGB: 0.003
[Epoch 10] Avg Loss Depth: 0.006, Avg Loss RGB: 0.003
Finished Training


In [63]:
torch.save(model, 'model.pth')

In [66]:
# Save the model's state dictionary
model = UNet()
torch.save(model.state_dict(), 'model_weights.pth')

In [9]:
# Initialize the model
load_model = UNet()

# Load the state dictionary
load_model.load_state_dict(torch.load('model_weights.pth'))

# Set the model to evaluation mode
load_model.eval()

UNet(
  (down1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (down2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (down3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (down4): Sequential(
    (0): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (down5): Sequential(
    (0): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): B

In [13]:
import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

# Define the UNet model architecture with Sigmoid activation for RGB output
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        self.down1 = self.conv_block(1, 64)
        self.down2 = self.conv_block(64, 128)
        self.down3 = self.conv_block(128, 256)
        self.down4 = self.conv_block(256, 512)
        self.down5 = self.conv_block(512, 1024)
        
        self.up1 = self.upconv_block(1024, 512)
        self.up2 = self.upconv_block(1024, 256)
        self.up3 = self.upconv_block(512, 128)
        self.up4 = self.upconv_block(256, 64)
        self.final_depth = nn.Conv2d(128, 1, kernel_size=1)  # Depth map output
        self.final_rgb = nn.Conv2d(128, 3, kernel_size=1)    # RGB image output
        
        self.sigmoid = nn.Sigmoid()  # Sigmoid activation for RGB output
    
    def conv_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def upconv_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def forward(self, x):
        d1 = self.down1(x)
        d2 = self.down2(d1)
        d3 = self.down3(d2)
        d4 = self.down4(d3)
        d5 = self.down5(d4)
        
        up1 = self.up1(d5)
        up1 = torch.cat((up1, d4), dim=1)
        up2 = self.up2(up1)
        up2 = torch.cat((up2, d3), dim=1)
        up3 = self.up3(up2)
        up3 = torch.cat((up3, d2), dim=1)
        up4 = self.up4(up3)
        up4 = torch.cat((up4, d1), dim=1)
        
        depth_output = self.final_depth(up4)
        rgb_output = self.sigmoid(self.final_rgb(up4))  # Apply Sigmoid activation
        
        return depth_output, rgb_output

# Function to load an image, perform transformations, and get model predictions
def generate_images(model, edge_image_path, depth_output_path, rgb_output_path, transform=None, device='cuda'):
    model.to(device)  # Move the model to the specified device
    model.eval()  # Set the model to evaluation mode
    
    # Load and transform the edge image
    edge_image = Image.open(edge_image_path).convert('L')
    if transform:
        edge_image = transform(edge_image)
    edge_image = edge_image.unsqueeze(0).to(device)  # Add batch dimension and move to device
    
    # Get model predictions
    with torch.no_grad():
        depth_output, rgb_output = model(edge_image)
    
    # Convert outputs to images
    depth_output = depth_output.squeeze(0).cpu().numpy()  # Remove batch dimension and move to CPU
    rgb_output = rgb_output.squeeze(0).permute(1, 2, 0).cpu().numpy()  # Convert to HWC format and move to CPU
    
    # Save depth output
    plt.imsave(depth_output_path, depth_output[0], cmap='gray')
    
    # Normalize and save RGB output
    rgb_output = (rgb_output - rgb_output.min()) / (rgb_output.max() - rgb_output.min())  # Normalize to [0, 1]
    plt.imsave(rgb_output_path, rgb_output)
    
    print(f"Generated images saved to {depth_output_path} and {rgb_output_path}")

# Example usage
edge_image_path = r'D:\Jupyter\3D Construction\Dataset\redwood-3dscan\data\rgbd_extract\00037\edge\1.png'
depth_output_path = r'D:\Jupyter\3D Construction\Dataset\redwood-3dscan\data\rgbd_extract\00037\output\1_depth.png'
rgb_output_path = r'D:\Jupyter\3D Construction\Dataset\redwood-3dscan\data\rgbd_extract\00037\output\1_rgb.png'

# Define the transformation
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

# Load the trained model
load_model = UNet()
load_model.load_state_dict(torch.load('model_weights.pth'))

# Generate images
generate_images(load_model, edge_image_path, depth_output_path, rgb_output_path, transform=transform)

Generated images saved to D:\Jupyter\3D Construction\Dataset\redwood-3dscan\data\rgbd_extract\00037\output\1_depth.png and D:\Jupyter\3D Construction\Dataset\redwood-3dscan\data\rgbd_extract\00037\output\1_rgb.png
