In [1]:
import torch
import torch_tensorrt
import oyaml as yaml

from modules import get_backbone, get_criterion, module
# from modules.erfnet.erfnet_compile import ERFNet
from modules.erfnet.erfnet_modified import ERFNetModel

In [2]:
with open("config/erfnet_predict.yaml") as istream:
    cfg = yaml.safe_load(istream)

ckpt = torch.load("models/semantic-seg-erfnet.ckpt", map_location="cpu")
state_dict = ckpt["state_dict"] if "state_dict" in ckpt else ckpt
state_dict = {k.replace("network.", ""): v for k, v in state_dict.items()}

num_classes = 3 
model = ERFNetModel(num_classes=num_classes)
model.load_state_dict(state_dict, strict=True)
model.eval().cuda()

ERFNetModel(
  (downsampler_block_01): DownsamplerBlock(
    (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (instance_norm): InstanceNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
  )
  (downsampler_block_02): DownsamplerBlock(
    (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (instance_norm): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
  )
  (enc_non_bottleneck_1d_01): non_bottleneck_1d(
    (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0))
    (conv1x3_1): Conv2d(64, 64, kernel_s

In [5]:
# IMPORTANT: Convert model to half precision to match input
model = model.half()  # This converts all model parameters to float16

example_input = torch.randn(1, 3, 1080, 1920).cuda().half()

with torch.no_grad():
    traced_model = torch.jit.trace(model, example_input)

trt_model = torch_tensorrt.compile(
    traced_model,
    inputs=[torch_tensorrt.Input(example_input.shape, dtype=torch.half)],
    enabled_precisions={torch.half}   # or {torch.float16} if hardware supports
)


Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().


In [6]:
with torch.no_grad():
    output = trt_model(example_input)
    print(f"Output shape: {output.shape}")
    print(f"Output dtype: {output.dtype}")

# Save the compiled model
torch.jit.save(trt_model, "erfnet_tensorrt.ts")

Output shape: torch.Size([1, 3, 1080, 1920])
Output dtype: torch.float32


In [12]:
example_input.dtype

torch.float16

## Deployment


In [2]:
trt_ts_module = torch.jit.load("models/erfnet_half_tensorrt.ts")
input_data = torch.ones([1, 3, 1080, 1920])
input_data = input_data.cuda().half()


result = trt_ts_module(input_data)
result

tensor([[[[ 2.4434,  2.1426,  1.9756,  ...,  3.2715,  3.1465,  2.9863],
          [ 2.3438,  2.0430,  1.7471,  ...,  3.1660,  3.0957,  2.9258],
          [ 2.0215,  1.6348,  1.4346,  ...,  3.1504,  3.0254,  2.8711],
          ...,
          [ 0.4229,  0.1033, -0.0266,  ...,  3.4961,  3.5176,  3.1641],
          [ 0.4453,  0.2239,  0.1799,  ...,  3.4551,  3.2520,  3.0410],
          [ 0.4500,  0.2322,  0.2227,  ...,  3.2129,  3.1777,  2.9668]],

         [[-0.4629, -0.2700,  0.5884,  ..., -1.1572, -1.3184, -1.2139],
          [-0.3955, -0.2029,  0.7373,  ..., -1.0781, -1.2773, -1.1650],
          [ 0.2915,  0.5425,  1.1748,  ..., -1.1113, -1.1709, -1.0684],
          ...,
          [ 0.1348,  0.3403,  0.3599,  ..., -2.3047, -1.9434, -1.7100],
          [ 0.1986,  0.3403,  0.4429,  ..., -2.1465, -1.7178, -1.5801],
          [ 0.1937,  0.3320,  0.4136,  ..., -1.9756, -1.6602, -1.5215]],

         [[-2.0039, -1.8945, -2.5840,  ..., -2.1445, -1.8633, -1.8057],
          [-1.9736, -1.8643, -

In [3]:
from PIL import Image
import torchvision.transforms as transforms

image = Image.open("data/ndsu_2025_08_15/predict/images/frame_021821.jpg")
transform = transforms.ToTensor()
image = transform(image)

In [5]:
image.shape

torch.Size([3, 1080, 1920])

In [27]:
import time

start = time.time()
result = trt_ts_module(input_data)
end = time.time()

print(f"Total time for 1 image: {round(end - start, 5)}s")

Total time for 1 image: 0.0087s


## Half precision

In [4]:
import time

start = time.time()
result = trt_ts_module(input_data.half())
end = time.time()

print(f"Total time for 1 image: {round(end - start, 5)}s")

Total time for 1 image: 0.01196s


In [None]:
with open("config/erfnet_predict.yaml") as istream:
    cfg = yaml.safe_load(istream)

ckpt = torch.load("models/semantic-seg-erfnet.ckpt", map_location="cpu")
state_dict = ckpt["state_dict"] if "state_dict" in ckpt else ckpt
state_dict = {k.replace("network.", ""): v for k, v in state_dict.items()}

num_classes = 3 
model = ERFNetModel(num_classes=num_classes)
model.load_state_dict(state_dict, strict=True)
model.eval().cuda()
cuda_input = image.to("cuda").unsqueeze(0)

In [28]:
with torch.no_grad():
    start = time.time()
    result = model(cuda_input)
    end = time.time()
print(f"Total time for 1 image: {round(end - start, 5)}s")

Total time for 1 image: 0.04546s


In [5]:
from PIL import Image, ImageEnhance
import os

def darken_image(input_path, output_path, factor=0.7):
    """
    Darkens an image to reduce over-exposure and saves the result.

    Args:
        input_path (str): The file path of the image to be processed.
        output_path (str): The file path to save the darkened image.
        factor (float): The factor by which to darken the image. 
                        A value less than 1.0 will darken the image,
                        while a value greater than 1.0 will brighten it.
                        Default is 0.7.
    """
    try:
        # Check if the input file exists
        if not os.path.exists(input_path):
            print(f"Error: Input file not found at '{input_path}'")
            return

        # Open the image file
        with Image.open(input_path) as img:
            print(f"Reading image from: {input_path}")

            # Create a brightness enhancer object
            enhancer = ImageEnhance.Brightness(img)
            
            # Apply the darkening factor. A factor of 0.7 makes it 30% darker.
            # You can change this value to adjust the effect.
            darkened_img = enhancer.enhance(factor)
            
            # Save the new image to the specified output path
            darkened_img.save(output_path)
            print(f"Successfully saved darkened image to: {output_path}")

    except Exception as e:
        print(f"An error occurred: {e}")

# --- Example Usage ---
if __name__ == "__main__":
    # Specify your input and output file paths here.
    # Replace 'input_image.jpg' with the path to your image.
    # The output path will be where the new image is saved.
    factor = 0.2
    input_file_path = 'frame_021825.jpg'
    output_file_path = f'darkened_image{factor}.jpg'

    # You can change the 'factor' to a different value between 0.0 and 1.0
    # to control the level of darkness.
    darken_image(input_file_path, output_file_path, factor=factor)


Reading image from: frame_021825.jpg
Successfully saved darkened image to: darkened_image0.2.jpg
