In [1]:
import torch

In [2]:
def fill_zero_places(a):
    a = torch.cat([torch.tensor([-1], device=a.device), a])
    nonzero_indices = torch.nonzero(a).squeeze()
    diff = torch.diff(torch.cat([nonzero_indices, torch.tensor([a.shape[0]], device=a.device)]))
    b = a[nonzero_indices].repeat_interleave(diff)
    b[b < 0] = 0
    c = b[1:]
    return c

In [3]:
def shift_values_to_next_available_zero(a):
    b = (a > 0).int()
    b = torch.cat([torch.tensor([1], device=a.device), b])
    c = torch.arange(b.shape[0], device=a.device) * b
    d = c[1:]
    e = torch.roll(fill_zero_places(d), 1)
    e[0] = 0
    f = torch.zeros_like(a)
    f.scatter_add_(0, e.long(), a)
        
    return f

In [4]:
def fill_zero_values_ascending_rest_with_zero(a):
    b = (a > 0).int()
    cumsum = torch.cumsum(1 - b, dim=0)
    c = (cumsum * (1 - b) - torch.maximum(b * cumsum, torch.zeros_like(cumsum)).cummax(dim=0)[0]) * (1 - b)
    
    return c

In [5]:
def find_one_zero_pattern(a):
    b = torch.roll(a.int(), -1)
    b[-1] = 1
    c = a - b
    d = (c == 1).int()
    
    return d

In [6]:
def fill_zero_values_descending_rest_with_zero(a):
    b = fill_zero_values_ascending_rest_with_zero(a)
    c = (b > 0).int()
    d = find_one_zero_pattern(c)
    e = d * b
    f = shift_values_to_next_available_zero(e)
    g = fill_zero_places(f)
    h = ((g + 1) - b) * c
    
    return h

In [7]:
def find_values_that_must_be_reordered(a):
    b = torch.arange(1, a.shape[0] + 1)
    c = a - b
    d, _ = torch.cummin(c.flip(0), dim=0)
    d = d.flip(0)
    e = (c == d).int()

    return 1 - e

In [8]:
def adjust_cars_no_lane_change(a):
    b = 1 - find_values_that_must_be_reordered(a)
    c = fill_zero_values_descending_rest_with_zero(b)
    d = a * b
    e = shift_values_to_next_available_zero(d)
    f = fill_zero_places(e)
    g = f - c

    return g

In [12]:
a = torch.tensor([ 83,  84,  89,  93,  87,  92,  90,  95,  90,  94,  94,  96,  97, 101,  97, 105, 104,  99, 101, 101, 101, 110, 105, 111, 106, 115, 90,  86, 107,  59])
adjust_cars_no_lane_change(a)

tensor([30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
        48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59])

In [13]:
a = torch.tensor([ 3, 10,  8, 11,  5, 13,  7, 14, 18, 15, 19, 20, 13, 21, 16, 24, 19, 24, 23, 22, 28, 22, 27, 26, 33, 32, 30, 32, 31, 36])
adjust_cars_no_lane_change(a)

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        19, 20, 21, 22, 25, 26, 27, 28, 29, 30, 31, 36])

In [14]:
a = torch.tensor([ 1,  4,  3,  6,  5,  6,  8,  8, 14, 10, 11, 13, 13, 14, 15, 16, 17, 18,  19, 20, 21, 23, 23, 24, 25, 26, 27, 28, 30, 35])
adjust_cars_no_lane_change(a)

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 35])

In [15]:
count = 30000
lane_num_tracks = torch.tensor([300000])
cars_position = torch.arange(0, count)
cars_speed = torch.randint(0,10, (count, 1)).flatten()

adjust_cars_no_lane_change(cars_position + cars_speed)

tensor([    0,     1,     2,  ..., 29998, 30004, 30006])

### Benchmarks

In [18]:
import torch
import time

if not torch.cuda.is_available():
    raise RuntimeError("CUDA is not available, please run on a CUDA-capable device.")

iterations = 100000
count = 30000

lane_num_tracks = torch.tensor([300000000]).cuda()
cars_position = torch.arange(0, count).cuda()
cars_speed = torch.randint(0, 10, (count, 1)).flatten().cuda()

def adjust_cars_no_lane_change(cars_info):
    # Sample implementation, replace with the actual function
    return cars_info * 2  # Example operation

# Start timing
start_time = time.time()

# Execute the function multiple times
for _ in range(iterations):
    result = adjust_cars_no_lane_change(cars_position + cars_speed)

# Optionally, synchronize and free the cache to get accurate timings
torch.cuda.synchronize()
torch.cuda.empty_cache()

# End timing
end_time = time.time()

# Calculate elapsed time
elapsed_time = end_time - start_time
print(f"Elapsed time: {elapsed_time} seconds")

Elapsed time: 0.9229917526245117 seconds


Iterationen: 1.000.000
Autos: 300.000

Zeit: 16 Sekunden

Iterationen: 10.000.000
Autos: 30.000

Zeit: 94 Sekunden

Iterationen: 1.000.000
Autos: 3.000.000

Zeit: 178 Sekunden