Skip to content

Update the Relooper algorithm #330

@RReverser

Description

@RReverser

C libraries often use goto exit / goto error as a way to jump within a function to a place that can cleanup resources before exiting when some condition is unmet.

Let me take this example (one of the simpler ones in the wild): https://github.com/kbarbary/sep/blob/d22bef88ded3c5b25f06584aa8a2bc931cad1826/ctest/test_image.c#L142-L212

/* an extremely dumb reader for our specific test FITS file! */
int read_test_image(char *fname, float **data, int *nx, int *ny)
{
  FILE *f;
  char buf[80];  /* buffer to hold a line */
  long pos;
  size_t nbytes, nel, i, elsize, nread;
  char *rawbytes = NULL;
  short *rawdata;
  char tmp;
  int status = 0;
  float bscale, bzero;


  /* hard-code image size & element size */
  *nx = 256;
  *ny = 256;
  elsize = 2;
  nel = *nx * *ny;
  bscale = 2.92273835509;
  bzero = 3713.66692596;


  f = fopen(fname, "r");


  /* read first line and check if it is a FITS file */
  nread = fread(buf, 1, 80, f);
  if (nread != 80) { status = 1; goto exit; }
  if (strncmp(buf, "SIMPLE  =                    T", 30) != 0)
    {
      printf("not a FITS file");
      status = 1;
      goto exit;
    }


  /* read rows until we get to END keyword */
  while (strncmp(buf, "END", 3) != 0)
    {
      nread = fread(buf, 1, 80, f);
      if (nread != 80) { status = 1; goto exit; }
    }


  /* move to next 2880 byte boundary. */
  pos = ftell(f) % 2880;  /* position in "page" */
  if (pos != 0) fseek(f, 2880 - pos, SEEK_CUR);


  /* read raw data bytes */
  nbytes = nel * elsize;
  rawbytes = (char *)malloc(nbytes);
  nread = fread(rawbytes, 1, nbytes, f);
  if (nread != nbytes) { status = 1; goto exit; }


  /* swap bytes in raw data (FITS is big-endian) */
  for (i=0; i<nbytes; i+=2)
    {
      tmp = rawbytes[i];
      rawbytes[i] = rawbytes[i+1];
      rawbytes[i+1] = tmp;
    }
  rawdata = (short *)rawbytes; /* buffer is now little-endian */


  /* convert to float, applying bscale/bzero */
  *data = (float *)malloc(nel * sizeof(float));
  for (i=0; i<nel; i++)
    {
      (*data)[i] = bscale * rawdata[i] + bzero;
    }


 exit:
  free(rawbytes);


  return status;
}

After transpiling + reorganize definitions, the Rust code generated by c2rust looks like this:

/* an extremely dumb reader for our specific test FITS file! */
#[no_mangle]

pub unsafe extern "C" fn read_test_image(
    mut fname: *mut libc::c_char,
    mut data: *mut *mut libc::c_float,
    mut nx: *mut libc::c_int,
    mut ny: *mut libc::c_int,
) -> libc::c_int {
    let mut current_block: u64; /* buffer to hold a line */
    let mut f = 0 as *mut crate::stdlib::FILE;
    let mut buf: [libc::c_char; 80] = [0; 80];
    let mut pos: libc::c_long = 0;
    let mut nbytes: crate::stddef_h::size_t = 0;
    let mut nel: crate::stddef_h::size_t = 0;
    let mut i: crate::stddef_h::size_t = 0;
    let mut elsize: crate::stddef_h::size_t = 0;
    let mut nread: crate::stddef_h::size_t = 0;
    let mut rawbytes = crate::stddef_h::NULL as *mut libc::c_char;
    let mut rawdata = 0 as *mut libc::c_short;
    let mut tmp: libc::c_char = 0;
    let mut status = 0 as libc::c_int;
    let mut bscale: libc::c_float = 0.;
    let mut bzero: libc::c_float = 0.;
    /* hard-code image size & element size */
    *nx = 256 as libc::c_int;
    *ny = 256 as libc::c_int;
    elsize = 2 as libc::c_int as crate::stddef_h::size_t;
    nel = (*nx * *ny) as crate::stddef_h::size_t;
    bscale = 2.92273835509f64 as libc::c_float;
    bzero = 3713.66692596f64 as libc::c_float;
    f = crate::stdlib::fopen(fname, b"r\x00" as *const u8 as *const libc::c_char);
    /* read first line and check if it is a FITS file */
    nread = crate::stdlib::fread(
        buf.as_mut_ptr() as *mut libc::c_void,
        1 as libc::c_int as libc::c_ulong,
        80 as libc::c_int as libc::c_ulong,
        f,
    );
    if nread != 80 as libc::c_int as libc::c_ulong {
        status = 1 as libc::c_int
    } else if crate::stdlib::strncmp(
        buf.as_mut_ptr(),
        b"SIMPLE  =                    T\x00" as *const u8 as *const libc::c_char,
        30 as libc::c_int as libc::c_ulong,
    ) != 0 as libc::c_int
    {
        ::libc::printf(b"not a FITS file\x00" as *const u8 as *const libc::c_char);
        status = 1 as libc::c_int
    } else {
        loop
        /* read rows until we get to END keyword */
        {
            if !(crate::stdlib::strncmp(
                buf.as_mut_ptr(),
                b"END\x00" as *const u8 as *const libc::c_char,
                3 as libc::c_int as libc::c_ulong,
            ) != 0 as libc::c_int)
            {
                current_block = 1109700713171191020;
                break;
            }
            nread = crate::stdlib::fread(
                buf.as_mut_ptr() as *mut libc::c_void,
                1 as libc::c_int as libc::c_ulong,
                80 as libc::c_int as libc::c_ulong,
                f,
            );
            if !(nread != 80 as libc::c_int as libc::c_ulong) {
                continue;
            }
            status = 1 as libc::c_int;
            current_block = 686024592065818529;
            break;
        }
        match current_block {
            686024592065818529 => {}
            _ => {
                /* move to next 2880 byte boundary. */
                pos = crate::stdlib::ftell(f) % 2880 as libc::c_int as libc::c_long; /* position in "page" */
                if pos != 0 as libc::c_int as libc::c_long {
                    crate::stdlib::fseek(
                        f,
                        2880 as libc::c_int as libc::c_long - pos,
                        ::libc::SEEK_CUR,
                    );
                }
                /* read raw data bytes */
                nbytes = nel.wrapping_mul(elsize);
                rawbytes = crate::stdlib::malloc(nbytes) as *mut libc::c_char;
                nread = crate::stdlib::fread(
                    rawbytes as *mut libc::c_void,
                    1 as libc::c_int as libc::c_ulong,
                    nbytes,
                    f,
                );
                if nread != nbytes {
                    status = 1 as libc::c_int
                } else {
                    /* swap bytes in raw data (FITS is big-endian) */
                    i = 0 as libc::c_int as crate::stddef_h::size_t; /* buffer is now little-endian */
                    while i < nbytes {
                        tmp = *rawbytes.offset(i as isize);
                        *rawbytes.offset(i as isize) = *rawbytes
                            .offset(i.wrapping_add(1 as libc::c_int as libc::c_ulong) as isize);
                        *rawbytes
                            .offset(i.wrapping_add(1 as libc::c_int as libc::c_ulong) as isize) =
                            tmp;
                        i = (i as libc::c_ulong).wrapping_add(2 as libc::c_int as libc::c_ulong)
                            as crate::stddef_h::size_t
                            as crate::stddef_h::size_t
                    }
                    rawdata = rawbytes as *mut libc::c_short;
                    /* convert to float, applying bscale/bzero */
                    *data = crate::stdlib::malloc(
                        nel.wrapping_mul(::std::mem::size_of::<libc::c_float>() as libc::c_ulong),
                    ) as *mut libc::c_float; /* corresponding x on small im */
                    i = 0 as libc::c_int as crate::stddef_h::size_t; /* corresponding y on small im */
                    while i < nel {
                        *(*data).offset(i as isize) = bscale
                            * *rawdata.offset(i as isize) as libc::c_int as libc::c_float
                            + bzero;
                        i = i.wrapping_add(1)
                    }
                }
            }
        }
    }
    ::libc::free(rawbytes as *mut libc::c_void);
    return status;
}

As you can see, it tried to interleave loops and conditions with the goto, making the dataflow less recognisable as well as inserting current_block IDs.

Instead, a much more natural output could use #![feature(label_break_value)] and output function like this:

/* an extremely dumb reader for our specific test FITS file! */
#[no_mangle]

pub unsafe extern "C" fn read_test_image(
    mut fname: *mut libc::c_char,
    mut data: *mut *mut libc::c_float,
    mut nx: *mut libc::c_int,
    mut ny: *mut libc::c_int,
) -> libc::c_int {
  'exit: {
     // ...main code...
     break 'exit; // <- transform `goto exit` anywhere in the function into this
     // ...more main code...
  }
  ::libc::free(rawbytes as *mut libc::c_void);
  return status;
}

That particular feature is unstable, but C2Rust already outputs nightly-specific code. However, a stable variant is also possible with a minor change:

/* an extremely dumb reader for our specific test FITS file! */
#[no_mangle]

pub unsafe extern "C" fn read_test_image(
    mut fname: *mut libc::c_char,
    mut data: *mut *mut libc::c_float,
    mut nx: *mut libc::c_int,
    mut ny: *mut libc::c_int,
) -> libc::c_int {
  'exit: loop {
     // ...main code...
     break 'exit; // <- transform `goto exit` anywhere in the function into this
     // ...more main code...
     break; // just break out of the superfluous loop so that it runs only once
  }
  ::libc::free(rawbytes as *mut libc::c_void);
  return status;
}

Either way, the output becomes a lot more readable, compiles to something a lot closer to the original, and easier to refactor by hand further into idiomatic Rust code as well.

IIUC, currently C2Rust is using a single Relooper algorithm for all control flow (?), but given how common this pattern is, perhaps it's worth special-casing it and solving separately?

Metadata

Metadata

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions