In [None]:
!pip install pytorch_lightning

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch_lightning
  Downloading pytorch_lightning-2.0.3-py3-none-any.whl (720 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m720.6/720.6 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics>=0.7.0 (from pytorch_lightning)
  Downloading torchmetrics-0.11.4-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 kB[0m [31m49.0 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.7.0 (from pytorch_lightning)
  Downloading lightning_utilities-0.8.0-py3-none-any.whl (20 kB)
Collecting aiohttp!=4.0.0a0,!=4.0.0a1 (from fsspec[http]>2021.06.0->pytorch_lightning)
  Downloading aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m68.1 MB/s[0m eta [36m0:00:00[0m
Collecting multidict

In [None]:
#!pip install cloud-tpu-client==0.10 torch==2.0.0 torchvision==0.15.1 https://storage.googleapis.com/tpu-pytorch/wheels/colab/torch_xla-2.0-cp310-cp310-linux_x86_64.whl

In [None]:
#import torch_xla
#import torch_xla.core.xla_model as xm

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
from natsort import natsorted
from glob import glob
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet50, ResNet50_Weights
import pytorch_lightning as pl
from pytorch_lightning.callbacks import Callback

#dev = xm.xla_device()
#torch.multiprocessing.set_start_method('spawn')
torch.set_float32_matmul_precision('medium')

In [None]:
left_files = natsorted(glob("/content/drive/MyDrive/188/kitti/training/image_2/*"))
right_files = natsorted(glob("/content/drive/MyDrive/188/kitti/training/image_3/*"))
target_files = natsorted(glob("/content/drive/MyDrive/188/train_embeds/*"))

datapoints = [(left_img, right_img, target) for left_img, right_img, target in zip(left_files, right_files, target_files)]

In [None]:
example_image = Image.open(left_files[0])
example_tensor = torch.load(target_files[0]).to_sparse()
print(example_tensor)
print(example_image)

example_arr = np.asarray(example_image)
print(example_arr.shape)

tensor(indices=tensor([[  0,   0,   0,  ...,  63,  63,  63],
                       [181, 181, 181,  ..., 333, 334, 335],
                       [ 78,  79,  80,  ..., 104, 104, 104]]),
       values=tensor([0.0732, 0.0268, 0.1126,  ..., 1.7712, 1.8953, 0.9293]),
       device='cuda:0', size=(64, 496, 432), nnz=74344, layout=torch.sparse_coo)
<PIL.PngImagePlugin.PngImageFile image mode=RGB size=1224x370 at 0x7F879DFBB8B0>
(370, 1224, 3)


In [None]:
print(len(datapoints))

1500


In [None]:
class StereoEmbedDataset(Dataset):
    def __init__(self, datapoints):
        self.datapoints = datapoints

    def __len__(self):
        return len(self.datapoints)

    def __getitem__(self, idx):
        left_img_file, right_img_file, target_file = self.datapoints[idx]

        left_img = np.asarray(Image.open(left_img_file).resize((370, 1224)))
        right_img = np.asarray(Image.open(right_img_file).resize((370, 1224)))

        # Load the target tensor from the appropriate file
        target = torch.load(target_file).to_sparse()

        return (left_img, right_img), target

In [None]:
stereo_embed_dataset = StereoEmbedDataset(datapoints)

# Train-val split
train_size = int(0.8 * len(stereo_embed_dataset))
val_size = len(stereo_embed_dataset) - train_size  # Use the rest for validation

train_dataset, val_dataset = torch.utils.data.random_split(stereo_embed_dataset, [train_size, val_size])

stereo_embed_train_dataloader = DataLoader(train_dataset, batch_size=1, num_workers=0, shuffle=True)
stereo_embed_val_dataloader = DataLoader(val_dataset, batch_size=1, num_workers=0, shuffle=False)

In [None]:
class StereoEmbedModel(pl.LightningModule):
    def __init__(self):
        super(StereoEmbedModel, self).__init__()

        self.preprocess = ResNet50_Weights.DEFAULT.transforms()

        self.resnet = resnet50(weights = ResNet50_Weights.DEFAULT)
        self.resnet.eval()
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])
        for param in self.resnet.parameters():
            param.requires_grad = False

        # Replaced attention layer with just a 1-convolution to reduce complexity
        self.dim_reduce = nn.Conv2d(2048*2, 2048, kernel_size=1)

        # Additional convolution layers
        self.upsample = nn.Sequential(
            nn.ConvTranspose2d(2048, 512, kernel_size=(4, 4), stride = 2),
            nn.ReLU(),
            nn.Conv2d(512, 64, kernel_size=(3, 3)),
            nn.ReLU()
        )

    def forward(self, left_img, right_img):
        left_img_transformed = self.preprocess(left_img)
        right_img_transformed = self.preprocess(right_img)

        # Extract the feature maps
        left_features = self.resnet(left_img_transformed)
        right_features = self.resnet(right_img_transformed)

        # Concatenate the feature maps and reduce dimensions with 1x1 conv
        combined_features = torch.cat([left_features, right_features], dim=1)
        combined_features = self.dim_reduce(combined_features)

        # Pass through additional upsample conv layers
        combined_features = self.upsample(combined_features)

        output = nn.functional.interpolate(combined_features, size=(496, 432), mode='bilinear', align_corners=False)

        return output.to_sparse()

    def training_step(self, batch, batch_idx):
        (left_img, right_img), target = batch
        output = self.forward(left_img, right_img)
        loss = nn.MSELoss()(output, target)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        (left_img, right_img), target = batch
        output = self.forward(left_img, right_img)
        loss = nn.MSELoss()(output, target)
        self.log('val_loss', loss)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

In [None]:
class PrintCallback(Callback):
    def on_train_start(self, trainer, pl_module):
        print("Training started.")
    def on_train_end(self, trainer, pl_module):
        print("Training done.")

In [None]:
stereo_embed_model = StereoEmbedModel()

trainer = pl.Trainer(callbacks=PrintCallback(), max_epochs=20, num_sanity_val_steps=1)

trainer.fit(stereo_embed_model, stereo_embed_train_dataloader, stereo_embed_val_dataloader)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 204MB/s]
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type                | Params
---------------------------------------------------
0 | preprocess | ImageClassification | 0     
1 | resnet     | Sequential          | 23.5 M
2 | dim_reduce | Conv2d              | 8.4 M 
3 | upsample   | Sequential          | 17.1 M
---------------------------------------------------
25.5 M    Trainable params

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


OutOfMemoryError: ignored