In [1]:
import torch
import random
import numpy as np

In [6]:
# settings
MAX_DIMENSION = 3
MAX_SIZE = 5
MIN_VALUE = 0
MAX_VALUE = 20
MAX_CONSTANT = 5
MAX_ELEMS = 150     # The embedding size for each tensor we're using is 150. Please change this if you change the embedding size.

FUZZING_PER_API = 100000

In [3]:
def random_tensor(size=None, nonzero=False, return_bool=False, max_value=MAX_VALUE):
    if size is None:
        size = [random.randint(1, MAX_SIZE) for _ in range(random.randint(1, MAX_DIMENSION))]
    if nonzero and MIN_VALUE <= 0:
        return torch.randint(1, max_value, size), size
    elif return_bool:
        return torch.randint(0, 2, size).bool(), size
    else:
        return torch.randint(MIN_VALUE, max_value, size), size

# Given a tensor shape (torch.Size), return a shape that is broadcastable to the given tensor.
def get_broadcastable_size(size):
    broadcastable_size = []
    for i, d in enumerate(reversed(size)):
        if random.random() < 0.75:
            broadcastable_size.insert(0, random.choice([1, d]))
            if len(broadcastable_size) == MAX_DIMENSION:
                break
        else:
            if d == 1 and random.random() < 0.5:
                broadcastable_size.insert(0, random.randint(1, MAX_SIZE))
            else:
                break
    if len(broadcastable_size) < MAX_DIMENSION and random.random() < 0.3:
        broadcastable_size.insert(0, 1)
    if len(broadcastable_size) == 0:
        broadcastable_size = get_broadcastable_size(size)
    return broadcastable_size


# Generativing sets of divisors for 0-125 values. fuzz_reshape uses this divisor.
divisors = {}
divisors[1] = []
for target in range(2, 125+1):
    divisors[target] = []
    for i in range(2, target+1):
        if target % i == 0:
            cur_divisors = [i]
            if len(divisors[target/i]) == 0:
                divisors[target].append(cur_divisors)
            else:
                for sub in divisors[target/i]:
                    divisors[target].append(cur_divisors + sub)


In [4]:
# Each function generates and returns a list of n random input/output tensors.
# Each input/output tensor in the list are formated as [in1, in2, in3, out, params, API_function]
# where {in1, in2, in3} are the input tensors, out is the output tensor,
# params are additional parameter (e.g., dim) used, and API_function is the target function.

def fuzz_square(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        out = torch.square(in1)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {}  
        dps.append([in1, None, None, out, params, 'square'])
    return dps


def fuzz_max(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        dim = random.randint(0, len(size))
        if dim == len(size):
            out = torch.max(in1)
            params = {'dim': None}
        else:
            out = torch.max(in1, dim=dim)[0]
            params = {'dim': dim}  
        if torch.numel(out) > MAX_ELEMS:
            continue
        
        dps.append([in1, None, None, out, params, 'max'])
    return dps


def fuzz_argmax(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        dim = random.randint(0, len(size))
        if dim == len(size):
            out = torch.argmax(in1)
            params = {'dim': None}
        else:
            out = torch.argmax(in1, dim=dim)
            params = {'dim': dim}  
        if torch.numel(out) > MAX_ELEMS:
            continue
        
        dps.append([in1, None, None, out, params, 'argmax'])
    return dps


def fuzz_squeeze(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        dim = random.randint(0, len(size))
        in1 = torch.unsqueeze(in1, dim)
        if dim == len(size):
            out = torch.squeeze(in1)
            params = {'dim': None}
        else:
            out = torch.squeeze(in1, dim=dim)
            params = {'dim': dim}
        if torch.numel(out) > MAX_ELEMS:
            continue
        dps.append([in1, None, None, out, params, 'squeeze'])
    return dps


def fuzz_unsqueeze(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        dim = random.randint(0, len(size)-1)
        out = torch.unsqueeze(in1, dim=dim)
        params = {'dim': dim}
        if torch.numel(out) > MAX_ELEMS:
            continue
        dps.append([in1, None, None, out, params, 'unsqueeze'])
    return dps


def fuzz_sum(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        dim = random.randint(0, len(size))
        if dim == len(size):
            out = torch.sum(in1)
            params = {'dim': None}
        else:
            out = torch.sum(in1, dim=dim)
            params = {'dim': dim}  
        if torch.numel(out) > MAX_ELEMS:
            continue
        dps.append([in1, None, None, out, params, 'sum'])
    return dps


def fuzz_stack(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        in2, _ = random_tensor(size)
        dim = random.randint(0, len(size))
        out = torch.stack((in1, in2), dim=dim)
        params = {'dim': dim}
        if torch.numel(out) > MAX_ELEMS:
            continue
        dps.append([in1, in2, None, out, params, 'stack'])
    return dps

def fuzz_minimum(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        in2_size = get_broadcastable_size(size)
        in2, _ = random_tensor(in2_size)
        out = torch.minimum(in1, in2)
        params = {}
        if torch.numel(out) > MAX_ELEMS:
            continue
        dps.append([in1, in2, None, out, params, 'minimum'])
    return dps


def fuzz_eq(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        broadcastable_size = get_broadcastable_size(size)
        in2, _ = random_tensor(broadcastable_size)
        if torch.numel(in2) == 1 and random.random() > 0.5:
                in2 = in2[0].item()
        out = torch.eq(in1, in2)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {}
        dps.append([in1, in2, None, out, params, 'eq'])
    return dps

def fuzz_gt(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        broadcastable_size = get_broadcastable_size(size)
        in2, _ = random_tensor(broadcastable_size)
        if torch.numel(in2) == 1 and random.random() > 0.5:
                in2 = in2[0].item()
        out = torch.gt(in1, in2)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {}
        dps.append([in1, in2, None, out, params, 'gt'])
    return dps

def fuzz_ne(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        broadcastable_size = get_broadcastable_size(size)
        in2, _ = random_tensor(broadcastable_size)
        if torch.numel(in2) == 1 and random.random() > 0.5:
                in2 = in2[0].item()
        out = torch.ne(in1, in2).float()
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {}
        dps.append([in1, in2, None, out, params, 'ne'])
    return dps


def fuzz_add(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor(max_value=10)
        broadcastable_size = get_broadcastable_size(size)
        in2, _ = random_tensor(broadcastable_size)
        if torch.numel(in2) == 1 and random.random() > 0.5:
                in2 = in2[0].item()
        if random.random() < 0.5:
            temp = in1
            in1 = in2
            in2 = temp
        out = torch.add(in1, in2)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {}
        dps.append([in1, in2, None, out, params, 'add'])
    return dps


def fuzz_div(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        broadcastable_size = get_broadcastable_size(size)
        if random.random() < 0.5:
            in1_size = broadcastable_size
            broadcastable_size = size
            in1, size = random_tensor(in1_size)
        in2, _ = random_tensor(broadcastable_size, True)
        if torch.numel(in2) == 1 and random.random() > 0.5:
                in2 = in2[0].item()
        out = torch.div(in1, in2)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {}
        dps.append([in1, in2, None, out, params, 'div'])
    return dps


def fuzz_mul(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor(max_value=10)
        broadcastable_size = get_broadcastable_size(size)
        in2, _ = random_tensor(broadcastable_size)
        if torch.numel(in2) == 1 and random.random() > 0.5:
                in2 = in2[0].item()
        if random.random() < 0.5:
            temp = in1
            in1 = in2
            in2 = temp
        out = torch.mul(in1, in2)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {}
        dps.append([in1, in2, None, out, params, 'mul'])
    return dps

def fuzz_tile(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        dims = tuple(random.randint(1, MAX_CONSTANT) for _ in range(len(size)))
        out = torch.tile(in1, dims=dims)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {}
        dps.append([in1, None, None, out, params, 'tile'])
    return dps


def fuzz_repeat_interleave(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        dim = random.randint(0, len(size) - 1)
        repeats = torch.tensor([random.randint(1, MAX_CONSTANT) for _ in range(size[dim])])
        out = torch.repeat_interleave(in1, repeats=repeats, dim=dim)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {'dim': dim}
        dps.append([in1, repeats, None, out, params, 'repeat_interleave'])
    return dps


def fuzz_masked_select(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        # mask needs to be broadcastable to in1
        if len(size) < MAX_DIMENSION:
            size = [1] * random.randint(0, MAX_DIMENSION - len(size)) + size
        mask = torch.randint(0, 2, size).bool()
        out = torch.masked_select(in1, mask=mask)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {}
        dps.append([in1, mask, None, out, params, 'masked_select'])
    return dps
  

def fuzz_transpose(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        if len(size) < 2:
            continue
        dims = random.sample(range(len(size)), 2)
        out = torch.transpose(in1, dims[0], dims[1])
        if torch.numel(out) > MAX_ELEMS:
            continue        
        params = {'dim0': dims[0], 'dim1': dims[1]}
        dps.append([in1, None, None, out, params, 'transpose'])
    return dps


def fuzz_any(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        dim = random.randint(0, len(size))
        if dim == len(size):
            out = torch.any(in1)
            params = {'dim': None}
        else:
            out = torch.any(in1, dim=dim)
            params = {'dim': dim}
        if torch.numel(out) > MAX_ELEMS:
            continue
        
        dps.append([in1, None, None, out, params, 'any'])
    return dps


def fuzz_tensordot(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor(max_value=10)
        reducing_dim = random.randint(1, len(size))
        in2_dim = random.randint(reducing_dim, MAX_DIMENSION+1)
        size2 = size[-reducing_dim:] + [random.randint(1, MAX_SIZE) for _ in range(in2_dim-reducing_dim)] 
        in2, _ = random_tensor(size2)
        out = torch.tensordot(in1, in2, dims=reducing_dim)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {'dims': reducing_dim}
        dps.append([in1, in2, None, out, params, 'tensordot'])
    return dps


def fuzz_one_hot(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        max_val = torch.max(in1)
        num_classes = random.randint(max_val+1, max_val+MAX_CONSTANT)
        out = torch.nn.functional.one_hot(in1, num_classes)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {'num_classes': num_classes}
        dps.append([in1, None, None, out, params, 'one_hot'])
    return dps


def fuzz_gather(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        dim = random.randint(0, len(size)-1)
        new_size = []
        for d, s in enumerate(size):
            if d == dim:
                new_size.append(random.randint(1, MAX_SIZE))
            else:
                new_size.append(random.randint(1, s))
        in2, _ = random_tensor(new_size) 
        in2 = in2 % size[dim]
        out = torch.gather(in1, dim, in2)
        params = {'dim': dim}
        if torch.numel(out) > MAX_ELEMS:
            continue
        dps.append([in1, in2, None, out, params, 'gather'])
    return dps


def fuzz_cdist(n):
    dps = []
    while len(dps) < n:
        sizes = [random.randint(1, MAX_SIZE) for _ in range(4)]
        in1, size = random_tensor(sizes[:3])
        in2, _ = random_tensor([sizes[0],sizes[3],sizes[2]])
        p = random.randint(0, MAX_CONSTANT)
        out = torch.cdist(in1.float(), in2.float(), p=p)
        params = {'p': p}
        if torch.numel(out) > MAX_ELEMS:
            continue
        dps.append([in1, in2, None, out, params, 'cdist'])
    return dps


def fuzz_searchsorted(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        in1 = torch.sort(in1)[0]
        if len(size) > 1:
            in2_size = size[:-1] + [random.randint(1, MAX_SIZE)]
            in2, _ = random_tensor(in2_size)    
        else:
            in2, _ = random_tensor()
        out = torch.searchsorted(in1, in2)
        params = {}
        if torch.numel(out) > MAX_ELEMS:
            continue
        dps.append([in1, in2, None, out, params, 'searchsorted'])
    return dps


def fuzz_bincount(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor([random.randint(1, MAX_SIZE)])
        if torch.max(in1) > MAX_SIZE:
            continue
        out = torch.bincount(in1)
        params = {}
        if torch.numel(out) > MAX_ELEMS:
            continue
        dps.append([in1, None, None, out, params, 'bincount'])
    return dps

def fuzz_where(n):
    dps = []
    while len(dps) < n:
        condition, size = random_tensor(return_bool=True)
        rand_value = random.random()
        if rand_value < 0.25:
            in1_size = get_broadcastable_size(size)
            in2_size = get_broadcastable_size(size)
            if random.random() < 0.5:
                in1, _ = random_tensor(in1_size)
                in2, _ = random_tensor(in2_size)
            else:
                in1, _ = random_tensor(in2_size)
                in2, _ = random_tensor(in1_size)
        elif rand_value < 0.5:
            in1_size = get_broadcastable_size(size)
            in1, _ = random_tensor(in1_size)
            in2 = random.randint(MIN_VALUE, MAX_VALUE)
        elif rand_value < 0.75:
            in2_size = get_broadcastable_size(size)
            in1 = random.randint(MIN_VALUE, MAX_VALUE)
            in2, _ = random_tensor(in2_size)
        else:
            in1 = random.randint(MIN_VALUE, MAX_VALUE)
            in2 = random.randint(MIN_VALUE, MAX_VALUE)
        try:
            out = torch.where(condition, in1, in2)
        except:
            continue
        params = {}
        if torch.numel(out) > MAX_ELEMS:
            continue
        dps.append([condition, in1, in2, out, params, 'where'])
    return dps


def fuzz_reshape(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        in1_elements = torch.numel(in1)
        if in1_elements > MAX_ELEMS or in1_elements == 1:
            continue
        shape = list(random.choice(divisors[in1_elements]))
        if random.random() < 0.25:
            shape.insert(random.randint(0, len(shape)), 1)
        if random.random() < 0.25:
            shape[random.randint(0, len(shape)-1)] = -1
        if len(shape) > MAX_DIMENSION:
            continue
        out = torch.reshape(in1, tuple(shape))
        params = {'shape': shape}
        dps.append([in1, None, None, out, params, 'reshape'])
    return dps


def fuzz_arange(n):
    dps = []
    while len(dps) < n:
        in1 = random.randint(1, MAX_VALUE)
        out = torch.arange(in1)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {}
        dps.append([None, None, None, out, params, 'arange'])
    return dps


def fuzz_eye(n):
    dps = []
    while len(dps) < n:
        in1 = random.randint(1, MAX_VALUE)
        out = torch.eye(in1)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {}
        dps.append([None, None, None, out, params, 'eye'])
    return dps


def fuzz_matmul(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor(max_value=10)
        if len(size) == 1:
            if random.random() < 0.5:
                in2, in2_size = random_tensor(size)
            else:
                new_dim = random.randint(1, MAX_SIZE)
                in2, in2_size = random_tensor([size[0], new_dim])
        elif len(size) == 2:
            if random.random() < 0.5:
                new_dim = random.randint(1, MAX_SIZE)
                in2, in2_size = random_tensor([size[1], new_dim])
            else:
                in2, in2_size = random_tensor([size[1]])
        elif len(size) == 3:
            rand_value = random.random()
            if rand_value < 0.33:
                in2, in2_size = random_tensor([size[2]])
            elif rand_value >= 0.33 and rand_value < 0.66:
                new_dim = random.randint(1, MAX_SIZE)
                in2, in2_size = random_tensor([size[0], size[2], new_dim])
            else:
                new_dim = random.randint(1, MAX_SIZE)
                in2, in2_size = random_tensor([size[2], new_dim])
        out = torch.matmul(in1, in2)
        params = {}
        dps.append([in1, in2, None, out, params, 'matmul'])
    return dps

def fuzz_roll(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        shift = random.randint(1, MAX_CONSTANT)
        dim = random.randint(0, len(size)-1)
        out = torch.roll(in1, shift, dim)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {'shift': shift, 'dim': dim}
        dps.append([in1, None, None, out, params, 'roll'])
    return dps

def fuzz_expand(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        in1_elements = torch.numel(in1)
        if in1_elements > MAX_ELEMS or in1_elements == 1:
            continue
        for i in range(len(size)):
            if size[i] == 1 and random.random() < 0.5:
                size[i] = random.randint(MIN_VALUE, MAX_VALUE)
        if random.random() < 0.5 and len(size) > 1:
            size[random.randint(0, len(size)-1)] = -1
        if len(size) > MAX_DIMENSION:
            continue
        out = in1.expand(tuple(size))
        params = {'sizes': size}
        dps.append([in1, None, None, out, params, 'expand'])
    return dps

def fuzz_lt(n):
    dps = []
    while len(dps) < n:
        in1, size = random_tensor()
        broadcastable_size = get_broadcastable_size(size)
        in2, _ = random_tensor(broadcastable_size)
        if torch.numel(in2) == 1 and random.random() > 0.5:
            in2 = in2[0].item()
        out = torch.lt(in1, in2)
        if torch.numel(out) > MAX_ELEMS:
            continue
        params = {}
        dps.append([in1, in2, None, out, params, 'lt'])
    return dps


In [7]:
# Generate synthetic data of API functions in api_list
dps = []
api_list = [fuzz_matmul, fuzz_where, fuzz_max, fuzz_eq, fuzz_sum, fuzz_tile, fuzz_repeat_interleave, fuzz_masked_select,
            fuzz_squeeze, fuzz_transpose, fuzz_any, fuzz_add, fuzz_div, fuzz_mul, fuzz_tensordot, fuzz_square, fuzz_one_hot,
            fuzz_stack, fuzz_argmax, fuzz_minimum, fuzz_gather, fuzz_cdist, fuzz_searchsorted, fuzz_bincount,
            fuzz_gt, fuzz_ne, fuzz_unsqueeze, fuzz_reshape, fuzz_arange, fuzz_eye, fuzz_roll, fuzz_expand, fuzz_lt]
for api in api_list:
    print(api)
    dps += api(FUZZING_PER_API)
print(len(dps))

<function fuzz_matmul at 0x7f24eee5c790>
<function fuzz_where at 0x7f24eee5c550>
<function fuzz_max at 0x7f24fa1bc550>
<function fuzz_eq at 0x7f24fa1bcb80>
<function fuzz_sum at 0x7f24fa1bc940>
<function fuzz_tile at 0x7f24fa1bcee0>
<function fuzz_repeat_interleave at 0x7f24fa1bcf70>
<function fuzz_masked_select at 0x7f24eee5c040>
<function fuzz_squeeze at 0x7f24fa1bc820>
<function fuzz_transpose at 0x7f24eee5c0d0>
<function fuzz_any at 0x7f24eee5c160>
<function fuzz_add at 0x7f24fa1bcd30>
<function fuzz_div at 0x7f24fa1bcdc0>
<function fuzz_mul at 0x7f24fa1bce50>
<function fuzz_tensordot at 0x7f24eee5c1f0>
<function fuzz_square at 0x7f24fa1bc4c0>
<function fuzz_one_hot at 0x7f24eee5c280>


<function fuzz_stack at 0x7f24fa1bc9d0>
<function fuzz_argmax at 0x7f24fa1bc790>
<function fuzz_minimum at 0x7f24fa1bcaf0>
<function fuzz_gather at 0x7f24eee5c310>
<function fuzz_cdist at 0x7f24eee5c3a0>
<function fuzz_searchsorted at 0x7f24eee5c430>
<function fuzz_bincount at 0x7f24eee5c4c0>
<function fuzz_gt at 0x7f24fa1bcc10>
<function fuzz_ne at 0x7f24fa1bcca0>
<function fuzz_unsqueeze at 0x7f24fa1bc8b0>
<function fuzz_reshape at 0x7f24eee5c5e0>
<function fuzz_arange at 0x7f24eee5c670>
<function fuzz_eye at 0x7f24eee5c700>
<function fuzz_roll at 0x7f24eee5c820>
<function fuzz_expand at 0x7f24eee5c8b0>
<function fuzz_lt at 0x7f24eee5c940>
3300


In [8]:
SAVE_FILE = 'Single_100000_synthetic.pt'
torch.save(dps,f=SAVE_FILE)