In [1]:
from Resnet_depth import *
from urllib.request import urlopen
from PIL import Image
import timm
import torch
import torch.nn as nn

class Depth(nn.Module):
    def __init__(self, decoder, output_size, in_channels=3, pretrained=True) -> None:
        super(Depth,self).__init__()
        self.model_s=timm.create_model('lcnet_050.ra2_in1k', pretrained=True)
        selected_layers = list(self.model_s.children())[:4]

        self.feat = torch.nn.Sequential(*selected_layers)
        

        
        num_channels=512
        self.output_size = output_size

        self.conv2 = nn.Conv2d(num_channels,num_channels//2,kernel_size=1,bias=False)
        self.bn2 = nn.BatchNorm2d(num_channels//2)
        self.decoder = choose_decoder(decoder, num_channels//2)

        self.conv3 = nn.Conv2d(num_channels//32,1,kernel_size=3,stride=1,padding=1,bias=False)
        self.bilinear = nn.Upsample(size=self.output_size, mode='bilinear', align_corners=True)

        self.conv2.apply(weights_init)
        self.bn2.apply(weights_init)
        self.decoder.apply(weights_init)
        self.conv3.apply(weights_init)
    def forward(self,x):
        x=self.feat(x)
        
        x=self.conv2(x)
        x=self.bn2(x)
        
        # decoder
        x = self.decoder(x)
        x = self.conv3(x)
        x = self.bilinear(x)
        return x



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from Resnet_depth import *
from Depth_net import *



In [3]:
model_s=timm.create_model('lcnet_050.ra2_in1k', pretrained=True)


In [4]:
import numpy as np
data=np.load("/home/multimediateam/Documents/Vision_HUST/DepthEstimation/CitySpaces/data/train/depth/0.npy")
data.shape

(128, 256, 1)

In [5]:
import time
import psutil

def measure_latency_cpu_usage(model, test_inputs):
    process = psutil.Process()
    cpu_start = process.cpu_percent()
    start = time.time()
    model.eval().cuda()
    predictions = model(test_inputs)
    end = time.time()
    cpu_end = process.cpu_percent()
    latency = end - start
    cpu_usage = cpu_end - cpu_start
    return latency, cpu_usage

import time
import torch

def measure_gpu_throughput(model, inputs, batch_size):
    inputs = inputs.to('cuda')
    model = model.to('cuda')
    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)
    start.record()
    predictions=model(inputs)
    end.record()
    torch.cuda.synchronize()
    latency = start.elapsed_time(end)
    throughput = inputs.size(0) / latency
    return throughput

In [6]:
model_pplc=Depth(decoder="upconv",output_size=(228,304),pretrained=False)
model_resnet=ResNet(layers=18,decoder="upconv",output_size=(228,304))


print(measure_latency_cpu_usage(model_pplc,torch.rand(1,3,228,304).cuda()))
print("GPU:{}".format(measure_gpu_throughput(model_pplc,inputs=torch.rand(1,3,228,304),batch_size=1)))

print(measure_latency_cpu_usage(model_resnet,torch.rand(1,3,228,304).cuda()))
print("GPU:{}".format(measure_gpu_throughput(model_resnet,inputs=torch.rand(1,3,228,304),batch_size=1)))



RuntimeError: Given groups=1, weight of size [256, 512, 1, 1], expected input[1, 8, 114, 152] to have 512 channels, but got 8 channels instead

In [7]:
import torch.utils.benchmark as benchmark
input_data = torch.randn(32, 3, 228, 304)

# Sử dụng torch.utils.benchmark để đo đạc thời gian inference
benchmark_result = benchmark.Timer(
    stmt='model(input_data)',
    globals={'model': model_pplc.cuda(), 'input_data': input_data.cuda()}
).timeit(100)
throughput = 100 / benchmark_result.median

print(f'Throughput: {throughput:.2f} images/s')

RuntimeError: Given groups=1, weight of size [256, 512, 1, 1], expected input[32, 8, 114, 152] to have 512 channels, but got 8 channels instead

In [8]:
from Resnet_depth import *
from Depth_net import Depth
import os
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import numpy as np
from torchvision.transforms import transforms as t
from PIL import Image
model_resnet=ResNet(layers=18,decoder="upconv",output_size=(228,304))
model_pplc=Depth(decoder="upconv",output_size=(228,912))
valid_tfms = t.Compose([
    t.ToTensor(),
    t.Resize((228,912)),
    t.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    t.ToPILImage()
    
    
])
ROOT="KITTI_test"
list_dir=os.listdir(ROOT)



In [9]:
model_pplc.load_state_dict(torch.load('result/base_line_KITTI_PPLC-net/best_model.pth'),strict=False)
for filename in list_dir:
    file_image_path=os.path.join(ROOT,filename)
    img=cv2.imread(file_image_path)
    img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    input=valid_tfms(img)
    
    output=model_pplc(input)
    print(output)
    

TypeError: conv2d() received an invalid combination of arguments - got (Image, Parameter, NoneType, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!Image!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!Image!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)


In [10]:
img=cv2.imread("nyu_data/data/nyu2_test/00001_colors.png")
H,W=img.shape[:2]
img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img=cv2.resize(img,(304,228))
norm_transform= t.Compose([
    t.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),    
])
img=torch.from_numpy(img).permute(2,0,1).unsqueeze(0).cuda().float()
img=norm_transform(img)
print(img.shape)
model_resnet.load_state_dict(torch.load('result/base_line_NYU_V2_Resnet/best_model.pth'),strict=False)
model_resnet.cuda().eval()

output=model_resnet(img)
prediction = torch.nn.functional.interpolate(
                output,
                size=(H,W),
                mode="bicubic",
                align_corners=False,
            ).squeeze()
depth_map = output.detach().cpu().numpy()
depth_map=np.squeeze(depth_map)
depth_map = (depth_map*25.5).astype(np.uint8)
depth_map = cv2.applyColorMap(depth_map , cv2.COLORMAP_MAGMA)
cv2.imwrite("results.png",depth_map)


torch.Size([1, 3, 228, 304])


True

In [11]:
class UnNormalize(t.Normalize):
    def __init__(self,*args,**kwargs):
        mean=(0.485, 0.456, 0.406)
        std=(0.229, 0.224, 0.225)
        new_mean = [-m/s for m,s in zip(mean,std)]
        new_std = [1/s for s in std]
        super().__init__(new_mean, new_std, *args, **kwargs)

In [12]:
import matplotlib.pyplot as plt
def colored_depthmap(depth, d_min=None, d_max=None,cmap=plt.cm.inferno):
    if d_min is None:
        d_min = np.min(depth)
    if d_max is None:
        d_max = np.max(depth)
    depth_relative = (depth - d_min) / (d_max - d_min)
    return 255 * cmap(depth_relative)[:,:,:3] # H, W, C

In [13]:
img=cv2.imread("nyu_data/data/nyu2_test/00001_colors.png")
H,W=img.shape[:2]
img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img=cv2.resize(img,(304,228))
norm_transform= t.Compose([
    t.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),    
])
img=torch.from_numpy(img).permute(2,0,1).unsqueeze(0).cuda().float()
img=norm_transform(img)
print(img.shape)
model_resnet.load_state_dict(torch.load('result/base_line_NYU_V2_Resnet/best_model.pth'),strict=False)
model_resnet.cuda().eval()

output=model_resnet(img)
prediction = torch.nn.functional.interpolate(
                output,
                size=(H,W),
                mode="bicubic",
                align_corners=False,
            ).squeeze()
depth_map = output.detach().cpu().numpy()
depth_map=np.squeeze(depth_map)
depth_map=colored_depthmap(depth_map , cv2.COLORMAP_MAGMA)
cv2.imwrite("results.png",depth_map)


torch.Size([1, 3, 228, 304])


True

In [14]:
from Depth_net import Depth
from Resnet_depth import ResNet
from calflops import calculate_flops
model_pplc=Depth(decoder="upconv",output_size=(228,304),pretrained=False)
model_resnet=ResNet(layers=18,decoder="upconv",output_size=(228,304))
batch_size=1
input_shape = (batch_size, 3, 228, 304)
flops, macs, params = calculate_flops(model=model_resnet, 
                                      input_shape=input_shape,
                                      output_as_string=True,
                                      output_precision=4)
print("PPLC_net FLOPs:%s   MACs:%s   Params:%s \n" %(flops, macs, params))


------------------------------------- Calculate Flops Results -------------------------------------
Notations:
number of parameters (Params), number of multiply-accumulate operations(MACs),
number of floating-point operations (FLOPs), floating-point operations per second (FLOPS),
fwd FLOPs (model forward propagation FLOPs), bwd FLOPs (model backward propagation FLOPs),
default model backpropagation takes 2.00 times as much computation as forward propagation.

Total Training Params:                                                  12.4 M  
fwd MACs:                                                               3.7104 GMACs
fwd FLOPs:                                                              7.434 GFLOPS
fwd+bwd MACs:                                                           11.1311 GMACs
fwd+bwd FLOPs:                                                          22.302 GFLOPS

-------------------------------- Detailed Calculated FLOPs Results --------------------------------
Each module

