diff --git a/runtime/libffi/z/ffi64.c b/runtime/libffi/z/ffi64.c
index 139946ce7bd..91c1affb137 100644
--- a/runtime/libffi/z/ffi64.c
+++ b/runtime/libffi/z/ffi64.c
@@ -695,16 +695,14 @@ ffi_prep_args (unsigned char *stack, extended_cif *ecif)
       int type = (*type_ptr)->type;
       int size = (*type_ptr)->size;
 
-     /*  Check how a structure type is passed.   */
-      if (type == FFI_TYPE_STRUCT) {
-				memcpy(arg_ptr, (char*)p_argv, (*type_ptr)->size);
-				arg_ptr += (*type_ptr)->size;
-				continue;
-      }
-
      /*  Now handle all primitive int/pointer/float data types.  */
       switch (type) 
 	{
+    
+    case FFI_TYPE_STRUCT:
+      memcpy(arg_ptr, *p_argv, size);
+      break;
+
 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
 	  case FFI_TYPE_LONGDOUBLE: 
 	    *(long double *) arg_ptr = * (long double *) (*p_argv);
@@ -743,23 +741,18 @@ ffi_prep_args (unsigned char *stack, extended_cif *ecif)
  
 	  case FFI_TYPE_UINT16:
 	    *(unsigned short *) arg_ptr = * (unsigned short *) (* p_argv);
-			arg_ptr += 2;
 	    break;
  
 	  case FFI_TYPE_SINT16:
 	    *(signed short *) arg_ptr = * (signed short *) (* p_argv);
-			arg_ptr += 2;
 	    break;
 
 	  case FFI_TYPE_UINT8:
 	    *(unsigned char *) arg_ptr = * (unsigned char *) (* p_argv);
-		arg_ptr += 3;
 	    break;
  
 	  case FFI_TYPE_SINT8:
 	    *(signed char *) arg_ptr = * (signed char*) (* p_argv);
-
-			arg_ptr += 3;
 	    break;
  
 	  default:
@@ -800,16 +793,17 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 
   /* 64-bit XPLINK handling below */
 
+  /* TODO: This comment is describing 31-bit behaviour */
   /* Determine return value handling.  
-     Integral values <=4bytes are widened and put in GPR3
-     Integral values >4bytes and <=8bytes are widened and put in
-     GPR2 (left most 32-bits) and GPR3 (right most 32-bits)
+     Integral values <=8bytes are widened and put in GPR3
+     Integral values >8bytes and <=16bytes are widened and put in
+     GPR2 (left most 64-bits) and GPR3 (right most 64-bits)
      Floating point values, including complex type, are returned in 
      FPR0, FPR2, FPR4, FPR6 (as many registers as required)
-     Aggregates size of <=4 are returned GPR1 (left adjusted)
-     Aggregates size between 5bytes-8bytes are returned in GPR1 and 
+     Aggregates size of <=8 are returned GPR1 (left adjusted)
+     Aggregates size between 9bytes-16bytes are returned in GPR1 and 
      GPR2 (left adjusted)
-     Aggregates size between 9bytes-12bytes are returned in GPR1, GPR2,
+     Aggregates size between 17bytes-24bytes are returned in GPR1, GPR2,
      and GPR3 (left adjusted)
      Anything greater in size and anyother type is returned in a buffer,
      the buffer is passed in as hidden first argument.
@@ -825,12 +819,10 @@ ffi_prep_cif_machdep(ffi_cif *cif)
       /* Structures are returned in GPR or buffer depending on size.  */
       case FFI_TYPE_STRUCT:
 	struct_size = cif->rtype->size;
-	if (struct_size <= 8)
-	  cif->flags = FFI390_RET_INT64;
-	else if (struct_size <= 12)
+        if (struct_size <= 24)
 	  cif->flags = FFI390_RET_STRUCT;
 	else
-	  n_ov = struct_size;
+          n_gpr++;
 	break; 
 
       /* Floating point and complex values are returned in fpr0, 2, 4, 6 */
@@ -987,7 +979,10 @@ ffi_call(ffi_cif *cif,
   switch (cif->abi)
     {
       case FFI_SYSV:
-        ffi_call_SYSV(fn, &ecif, cif->flags, ecif.rvalue, cif->bytes, cif->nargs, (*cif->arg_types)->size);
+        if (cif->arg_types != NULL)
+          ffi_call_SYSV(fn, &ecif, cif->flags, ecif.rvalue, cif->bytes, cif->nargs, (*cif->arg_types)->size);
+        else 
+          ffi_call_SYSV(fn, &ecif, cif->flags, ecif.rvalue, cif->bytes, cif->nargs, 0);
 #ifdef FFI_DEBUG
 	printf("called_ffi_call_sysv nargs=%d\n",cif->nargs);
 #endif
diff --git a/runtime/libffi/z/sysvz64.s b/runtime/libffi/z/sysvz64.s
index 20b70457b09..4d1bb9bd712 100644
--- a/runtime/libffi/z/sysvz64.s
+++ b/runtime/libffi/z/sysvz64.s
@@ -13,6 +13,12 @@ FFISYS CELQPRLG DSASIZE=DSASZ,PSECT=ASP
 *@2136(,4) <- cif->arg_types->size (+52)?
 
          USING  CEEDSAHP,4
+
+*Store argument registers on the stack
+         STG 1,(2176+(((DSASZ+31)/32)*32))(,4)
+         STG 2,(2176+(((DSASZ+31)/32)*32)+8)(,4)
+         STG 3,(2176+(((DSASZ+31)/32)*32)+16)(,4)
+
          LG 14,0(,2)           ecif->cif
          LG 14,8(,14)          cif->arg_types
 *What: Storing arguments in this routine's
@@ -40,8 +46,23 @@ FFISYS CELQPRLG DSASIZE=DSASZ,PSECT=ASP
 *         SR  6,6              Offset in stored parm values
           LA  6,0
 
+*Dumb handling for now, but if struct return with size > 24
+*bytes, we need to allocate space for the dummy argument
+*that holds the return value pointer 
+         LG 15,(2176+(((DSASZ+31)/32)*32)+8)(,4)
+         LG 15,0(,15)           ecif->cif
+         LG 15,16(,15)          cif->rtype
+         LG 15,0(,15)           rtype->size
+         CGIJNH 15,24,GETNARGS
+
+         LG 1,(2176+(((DSASZ+31)/32)*32)+24)(,4)         
+         AHI 0,1                One less gpr to work with
+         AHI 7,8                argument area is 8 bytes larger
+
+GETNARGS DS 0H
 *Get the cif->nargs from caller's stack
          L   9,(2176+(((DSASZ+31)/32)*32)+44)(,4) 
+         CGIJE 9,0,CALL
 
 *Place arguments passed to the foreign function based on type
 ARGLOOP  LG  11,0(10,5)       Get pointer to current ffi_type
@@ -53,6 +74,14 @@ ARGLOOP  LG  11,0(10,5)       Get pointer to current ffi_type
  
 *Following code prepares ffi arguments, according to xplink
 
+* technically, this isn't allowed 
+* but openJ9 uses ffi_type_void for void functions
+* so support it since it doesn't break anything
+* assuming we don't have something weird like a void type
+* followed by real parameters
+VOID     DS  0H               ffi_type_void
+         B CALL
+
 I        DS  0H               ffi_type_int
 UI32     DS 0H                ffi_type_uint32
          LA 15,I32
@@ -71,20 +100,24 @@ J        DS 0H
 IGPR1    DS 0H                INT/UI32 type passed in gpr1
          L 1,0(6,13)
          AHI 7,8              Advance to next word in arg area
+         AHI 6,4              Advance to next word in parm value
          B CONT               Next parameter
 IGPR2    DS 0H                INT/UI32 type passed in gpr2
          L 2,0(6,13)
          AHI 7,8              Advance to next word in arg area
+         AHI 6,4              Advance to next word in parm value
          B CONT               Next parameter
 IGPR3    DS 0H                INT/UI32 type passed in gpr3
          L 3,0(6,13)
          AHI 7,8              Advance to next word in arg area
+         AHI 6,4              Advance to next word in parm value
          B CONT               Next parameter
 IARGA    DS 0H                INT/UI32 stored in arg area
          L 11,0(6,13)         Argument value
          LA 15,2176(,4)       Start of arg area
          STG 11,0(7,15)        Store in next word in arg area
          AHI 7,8              Bump to the next word in arg area
+         AHI 6,4              Advance to next word in parm value
          B CONT               Next parameter
 
 UI8      DS 0H                ffi_type_uint8
@@ -105,20 +138,24 @@ J2       DS 0H
 I8GPR1   DS 0H                Char type passed in gpr1
          LLC 1,0(6,13)
          AHI 7,8              Advance to next word in arg area
+         AHI 6,1              Advance the first byte of parm value
          B  CONT              Next parameter
 I8GPR2   DS 0H                Char type passed in gpr2
          LLC 2,0(6,13)
          AHI 7,8              Advance to next word in arg area
+         AHI 6,1              Advance the first byte of parm value
          B CONT               Next parameter
 I8GPR3   DS 0H                Char type passed in gpr3
          LLC 3,0(6,13)   
          AHI 7,8              Advance to the next word in arg area
+         AHI 6,1              Advance the first byte of parm value
          B CONT
 I8ARGA   DS 0H                Char stored in arg area
          LLC 11,0(6,13)       Argument value
          LA 15,2176(,4)       Start of arg area
          STG 11,0(7,15)       Store in next word in arg area
          AHI 7,8              Bump to the next word in arg area
+         AHI 6,1              Advance the first byte of parm value
          B CONT               Next parameter
 
 UI16     DS 0H                ffi_type_uint16
@@ -138,20 +175,24 @@ J3       DS 0H
 U16GPR1  DS 0H                u_short type passed in gpr1
          LLH 1,0(6,13)
          AHI 7,8              Advance to the next word in arg area
+         AHI 6,2              Advance the first 2 bytes of parm value
          B CONT               Next parameter
 U16GPR2  DS 0H                u_short type passed in gpr2
          LLH 2,0(6,13)
          AHI 7,8              Advance to the next word in arg area
+         AHI 6,2              Advance the first 2 bytes of parm value
          B CONT               Next parameter
 U16GPR3  DS 0H                u_short passed in gpr3
          LLH 3,0(6,13)
          AHI 7,8              Advance to the next word in arg area
+         AHI 6,2              Advance the first 2 bytes of parm value
          B CONT               Next parameter
 U16ARGA  DS 0H                u_short in arg area
          LLH 11,0(6,13)       Argument value
          LA 15,2176(,4)       Start of arg area
          STG 11,0(7,15)       Store in next word in arg area
          AHI 7,8              Bump to the next word in arg area
+         AHI 6,2              Advance the first 2 bytes of parm value
          B CONT               Next parameter
 
 SI16     DS 0H                ffi_type_sint16
@@ -171,20 +212,24 @@ J4       DS 0H
 S16GPR1  DS 0H                s_SHORT type passsed in gpr1
          LH 1,0(6,13)   
          AHI 7,8              Advance to the next word in arg area
+         AHI 6,2              Advance the first 2 bytes of parm value
          B CONT               Next parameter
 S16GPR2  DS 0H                s_SHORT type passed in gpr2
          LH 2,0(6,13)
          AHI 7,8              Advance to the next word in arg area
+         AHI 6,2              Advance the first 2 bytes of parm value
          B CONT               Next parameter
 S16GPR3  DS 0H                s_SHORT type passed in gpr3
          LH 3,0(6,13)
          AHI 7,8              Advance to next word in arg area
+         AHI 6,2              Advance the first 2 bytes of parm value
          B CONT               Next parameter
 S16ARGA  DS 0H                s_SHORT in arg area
          LH 11,0(6,13)        Argument value
          LA 15,2176(,4)       Start of arg area
          STG 11,0(7,15)       Store in next word in arg area
          AHI 7,8              Bump to the next word in arg area
+         AHI 6,2              Advance the first 2 bytes of parm value
          B CONT               Next parameter
 
 SI64     DS 0H                ffi_type_sint64
@@ -204,24 +249,24 @@ J5       DS 0H
 S64GPR1 DS 0H                INT64 type passed in gpr1
          LG  1,0(6,13)
          AHI 7,8              Advance next two words in arg area
-         AHI 6,4              Advance the first 4 bytes of parm value
+         AHI 6,8              Advance the first 8 bytes of parm value
          B CONT               Next parameter
 S64GPR2 DS 0H                INT64 type passed in gpr2,gpr3
          LG  2,0(6,13)
          AHI 7,8              Advance next two words in arg area
-         AHI 6,4              Advance the first 4 bytes of parm value
+         AHI 6,8              Advance the first 8 bytes of parm value
          B CONT               Next parameter
 S64GPR3  DS 0H                INT64 type passed in gpr2
          LG  3,0(6,13)
          AHI 7,8              Advance next two words in arg area
-         AHI 6,4              Advance the first 4 bytes of parm value
+         AHI 6,8              Advance the first 8 bytes of parm value
          B CONT               Next parameter
 S64ARGA  DS 0H                INT64 in arg area
          LG  11,0(6,13)        Argument value
          LA 15,2176(,4)       Start of arg area
          STG  11,0(7,15)       Store in next word in arg area
          AHI 7,8              Bump one word in arg area
-         AHI 6,4              Advance the first 4 bytes of parm value
+         AHI 6,8              Advance the first 8 bytes of parm value
          B CONT               Next parameter
 
 PTR      DS 0H                ffi_type_pointer
@@ -241,24 +286,24 @@ J6       DS 0H
 PTRG1    DS 0H                PTR type passed in gpr1
          LG 1,0(6,13)
          AHI 7,8              Advance to the next word in arg area
-         AHI 6,4
+         AHI 6,8
          B CONT               Next parameter
 PTRG2    DS 0H                PTR type passed in gpr2
          LG 2,0(6,13)
          AHI 7,8              Advance to the next word in arg area
-         AHI 6,4
+         AHI 6,8
          B CONT               Next parameter
 PTRG3    DS 0H                PTR type passed in gpr3
          LG 3,0(6,13)
          AHI 7,8              Advance to the next word in arg area
-         AHI 6,4
+         AHI 6,8
          B CONT               Next parameter
 PTRARG   DS 0H                PTR in arg area
          LG 11,0(6,13)         Argument value
          LA 15,2176(,4)       Start of arg area
-         STG 11,0(7,15)        Store in next word in arg area
+         STG 11,0(7,15)       Store in next word in arg area
          AHI 7,8              Bump to the next word in arg area
-         AHI 6,4
+         AHI 6,8
          B CONT               Next parameter
 
 UI64     DS 0H                ffi_type_uint64
@@ -278,24 +323,24 @@ J64      DS 0H
 U64GP1   DS 0H                u_INT64 passed in gpr1, gpr2
          LG 1,0(6,13)  
          AHI 7,8              Advance two slots in arg area
-         AHI 6,4              Advance the first 4 bytes of parm value
+         AHI 6,8              Advance the first 8 bytes of parm value
          B CONT               Next parameter
 U64GP2   DS 0H                u_INT64 passed in gpr2, gpr3
          LG 2,0(6,13) 
          AHI 7,8              Advance two slots in arg area
-         AHI 6,4              Advance the first 4 bytes of parm value
+         AHI 6,8              Advance the first 8 bytes of parm value
          B CONT               Next parameter
 U64GP3   DS 0H                u_INT64 passed in gpr2
          LG 3,0(6,13)
          AHI 7,8              Advance next word in arg area
-         AHI 6,4              Advance the first 4 bytes of parm value
+         AHI 6,8              Advance the first 8 bytes of parm value
          B CONT               Next parameter
 U64ARGA  DS 0H                u_INT64 in arg area
          LG 11,0(6,13)      Argument value
          LA 15,2176(,4)       Start of arg area
          STG  11,0(7,15)       Store in next two words in arg area
          AHI 7,8              Bump one word in arg area
-         AHI 6,4              Advance the first 4 bytes of parm value
+         AHI 6,8              Advance the first 8 bytes of parm value
          B CONT               Next parameter
    
 FLT      DS 0H                ffi_type_float
@@ -340,6 +385,7 @@ FLTSTR   DS 0H
          LA 15,2176(,4)       Start of arg area
          STE 11,0(7,15)       Store in next word in arg area
          AHI 7,8              Bump to the next word in arg area
+         AHI 6,4              Advance the first 4 bytes of parm value
          B CONT               Next parameter
 
 D        DS 0H                ffi_type_double
@@ -358,7 +404,7 @@ INC7     DS 0H
          LA 0,3
          B J7
 INCGD    DS 0H
-         AHI 0,2
+         AHI 0,1
 J7       DS 0H
          BR 15 
 
@@ -384,7 +430,7 @@ DBLSTR   DS 0H
          LA 15,2176(,4)       Start of arg area
          STD 11,0(7,15)       Store in next two words in arg area
          AHI 7,8              Bump to the next two words in arg area
-         AHI 6,4              Bump stack twice, cuz double is 8byte
+         AHI 6,8              Bump stack twice, cuz double is 8byte
          B CONT               Next parameter
 
 LD       DS 0H                ffi_type_longdouble
@@ -432,51 +478,355 @@ DARGF    DS 0H                l_DOUBLE in arg area
          LA  14,4             We reached max fprs
          B CONT               Next parameter
 
-*If we have spare gprs, pass up to 12bytes
-*in GPRs. 
-*TODO: Store left over struct to argument area
-*
+*If we have spare gprs, pass up to 24 bytes in GPRs. 
 STRCT    DS 0H
-         L   WRKREG,(2176+(((DSASZ+31)/32)*32)+24)(,4)
-         CFI WRKREG,12
-         BL STRCT2
-         B CONT
-STRCT2   DS 0H
-         LA 15,STRCTS
-         LR WRKREG,GPX
-         SLL WRKREG,2
-         L  15,0(WRKREG,15)
-         BR 15
+         LG  11,0(10,5)
+         LG 15,0(,11)          type->size
+*todo NULL check?
 
-BYTE4    DS 0H
-         L 1,0(6,STKREG)
-         AHI GPX,1
-         B CONT
+*check if first element is float or double 
+STFPCHK  DS 0H  
+         LG 15,16(,11)         type->elements
+         LG 15,0(,15)           type->elements[0]
+         LH 15,10(,15)          type->elements[0]->type
+         CFI 15,11               is it a float?
+         BE STFPCHKF
+         CFI 15,1               is it a double?
+         BE STFPCHKD
+         B STRCT2
 
-BYTE8    DS 0H
-         L 1,0(6,STKREG)
-         L 2,4(6,STKREG)
-         AHI GPX,2
-         AHI 6,4
-         B CONT
+*first arg is a float
+STFPCHKF DS 0H
+         LG 15,16(,11)          type->elements
+         LA 15,8(,15)           type->elements[0]
+         LG 15,0(,15)
+         LH 15,10(,15)          type->elements[0]->type
+         CFI 15,11               is it a float?
+         BE STFPCHKF2
+         B STRCT2
+
+*first arg is a double
+STFPCHKD DS 0H
+         LG 15,16(,11)          type->elements
+         LA 15,8(,15)           type->elements[1]
+         LG 15,0(,15)
+         LH 15,10(,15)          type->elements[1]->type
+         CFI 15,1               is it a double?
+         BE STFPCHKD2
+         B STRCT2
+
+
+*check if there are more elements
+*if not we have a special case
+*I think this is to handle complex types
+*in this case we pass the struct
+*in the first 2 free FPRs, just check this
+*against r14 the free FPR counter
+STFPCHKF2 DS 0H
+         LG 15,16(,11)          type->elements
+         LA 15,16(,15)           type->elements[0]
+         LG 15,0(,15)
+         CFI 15,0
+         BNE STRCT2             if no, not special case
+
+         CFI 14,4               no FPRs are available
+         BNL STRCT2             if yes, not special case
+
+         CFI 14,3               1 FPR is available                
+         BNL STRCT2             if yes, struct won't fit
+
+         CFI 14,2
+         BE STFPCHKF246
+
+         CFI 14,1
+         BE STFPCHKF224
+
+         CFI 14,0
+         BE STFPCHKF202
+
+STFPCHKF202 DS 0H
+         LE 0,0(6,13)           first float
+         LE 2,4(6,13)           second float
+         AFI 0,2                one less GPR available
+         AFI 14,2               two less FPRs available
+
+         AFI 6,8
+         AFI 15,-16          If struct is <24 bytes arg area
+         AFI 7,8            Needs padding to 24 bytes
+
+         B STRCTLP
+
+STFPCHKF224 DS 0H
+         LE 2,0(6,13)           first float
+         LE 4,4(6,13)           second float
+         AFI 0,2                one less GPR available
+         AFI 14,2               two less FPRs available
+
+         AFI 6,8
+         AFI 15,-8          If struct is <24 bytes arg area
+         AFI 7,8            Needs padding to 24 bytes
+
+         B STRCTLP
+
+
+STFPCHKF246 DS 0H
+         LE 4,0(6,13)           first float
+         LE 6,4(6,13)           second float
+         AFI 0,2                one less GPR available
+         AFI 14,2               two less FPRs available
+
+         AFI 6,8
+         AFI 15,-8              If struct is <24 bytes arg area
+         AFI 7,8                Needs padding to 24 bytes
+
+         B STRCTLP
+
+
+
+*check if there are more elements
+*if not we have a special case
+*I think this is to handle complex types
+*in this case we pass the struct
+*in the first 2 free FPRs, just check this
+*against r14 the free FPR counter
+STFPCHKD2 DS 0H
+         LG 15,16(,11)          type->elements
+         LA 15,16(,15)           type->elements[0]
+         LG 15,0(,15)
+         CFI 15,0
+         BNE STRCT2             if no, not special case
+
+         CFI 14,4               no FPRs are available
+         BNL STRCT2             if yes, not special case
+
+         CFI 14,3               1 FPR is available                
+         BNL STRCT2             if yes, struct won't fit
+
+         CFI 14,2
+         BE STFPCHKD246
+
+         CFI 14,1
+         BE STFPCHKD224
+
+         CFI 14,0
+         BE STFPCHKD202
+
+
+STFPCHKD202 DS 0H
+         LD 0,0(6,13)           first float
+         LD 2,8(6,13)           second float
+         AFI 0,2                one less GPR available
+         AFI 14,2               two less FPRs available
+
+         AFI 6,16
+         AFI 15,-16          If struct is <24 bytes arg area
+         AFI 7,16            Needs padding to 24 bytes
+
+         B STRCTLP
+
+STFPCHKD224 DS 0H
+         LD 2,0(6,13)           first float
+         LD 4,8(6,13)           second float
+         AFI 0,2                one less GPR available
+         AFI 14,2               two less FPRs available
+
+         AFI 6,16
+         AFI 15,-16          If struct is <24 bytes arg area
+         AFI 7,16            Needs padding to 24 bytes
+
+         B STRCTLP
+
+
+STFPCHKD246 DS 0H
+         LD 4,0(6,13)           first float
+         LD 6,8(6,13)           second float
+         AFI 0,2                one less GPR available
+         AFI 14,2               two less FPRs available
+
+         AFI 6,16
+         AFI 15,-16          If struct is <24 bytes arg area
+         AFI 7,16            Needs padding to 24 bytes
+
+         B STRCTLP
+
+
+*determine how to pass the struct based on size
+*at this point we've weeded out all the float/double
+*pair structs that get passed purely in FPRs
+STRCT2  DS 0H
+         LG 15,0(,11)          type->size         
+         CFI 15,8              Struct <= 8 bytes?
+         BNH  BYTE8
+         CFI 15,16             Struct <= 16 bytes?
+         BNH   BYTE16
+         CFI 15,24             Struct <= 24 bytes?
+         B BYTE24
+
+BYTE8    DS 0H               Struct <= 8 bytes
+
+*Since a struct here can be <8 bytes
+*we need to pad it to 8 bytes in the arg area
+*since saves in the arg area always assume 8 bytes for register
+*so even on a for example 1 byte struct, the arg area has 8 bytes
+*this is true for all cases where
+*max struct size < register storage used
+*so <8 byte sturcts regardless of register storage used, will be padded
+*similarly with <16 byte structs using 2 registers,
+*and <24 byte structs
+*using all 3 registers
+*I assume this is because the first 24 bytes of the arg area are
+*technically the save area for the first 3 registers
+BYTE8R1  DS 0H               Struct goes in R1
+         CFI 0,1             is R1 available?
+         BNL BYTE8R2
+
+         LG 1,0(6,13)
+
+         AGR 6,15            Advance to next byte in arg area
+         AFI 15,-8           If struct is <8 bytes arg area
+         AFI 7,8             Needs padding to 8 bytes
+
+         AFI 0,1
+         B STRCTLP
+
+BYTE8R2  DS 0H               Struct goes in R2
+         CFI 0,2             is R2 available?
+         BH BYTE8R3
+
+         LG 2,0(6,13)
+
+         AGR 6,15           Advance to next byte in arg area
+         AFI 15,-8          If struct is <8 bytes arg area
+         AFI 7,8            Needs padding to 8 bytes
+
+         AFI 0,1
+         B STRCTLP
+
+
+BYTE8R3  DS 0H               Struct goes in R3
+         CFI 0,3             is R3 available?
+         BNL STRCTLP
+
+         LG 3,0(6,13)
+
+         AGR 6,15           Advance to next byte in arg area
+         AFI 15,-8          If struct is <8 bytes arg area
+         AFI 7,8            Needs padding to 8 bytes
+
+         AFI 0,1
+         B STRCTLP
+
+BYTE8F0  DS 0H               Struct goes in FPR0
+BYTE8F2  DS 0H               Struct goes in FPR2
+BYTE8F4  DS 0H               Struct goes in FPR4
+BYTE8F6  DS 0H               Struct goes in FPR6
+
+BYTE16   DS 0H               Struct <= 16 bytes
+
+BYTE16R1 DS 0H               Struct goes in R1-R2
+         CFI 0,1             are R1+R2 available?
+         BNL BYTE16R2
+        
+         LG 1,0(6,13)
+         LG 2,8(6,13)
+
+         AGR 6,15            Advance to next byte in arg area
+         AFI 15,-16          If struct is <16 bytes arg area
+         AFI 7,16            Needs padding to 16 bytes
+
+         AFI 0,2
+         B STRCTLP
+
+
+BYTE16R2 DS 0H               Struct goes in R2-R3
+         CFI 0,2             are R2+R3 available?
+         BNL BYTE16R3
+         
+         LG 2,0(6,13)
+         LG 3,8(6,13)
+
+         AGR 6,15            Advance to next byte in arg area
+         AFI 15,-16          If struct is <16 bytes arg area
+         AFI 7,16            Needs padding to 16 bytes
+
+         AFI 0,2
+         B STRCTLP
+         
+
+BYTE16R3 DS 0H               Struct goes in R3+Memory
+         CFI 0,3             are R3 available?
+         BNL STRCTLP
+         LG 3,0(6,13)
+
+         AFI 0,2
+         B STRCTLP
+
+BYTE16F0 DS 0H               Struct goes in FPR0-FPR2
+BYTE16F2 DS 0H               Struct goes in FPR2-FPR4
+BYTE16F4 DS 0H               Struct goes in FPR4-FPR6
+
+
+BYTE24   DS 0H               Struct <= 24 bytes
+
+
+BYTE24R1 DS 0H               Struct goes in R1-R3
+         CFI 0,1             are R1+R2+R3 available?
+         BNL BYTE24R2 
+         LG 1,0(6,13)
+         LG 2,8(6,13)
+         LG 3,16(6,13)
+
+         AGR 6,15            Advance to next byte in arg area
+         AFI 15,-24          If struct is <24 bytes arg area
+         AFI 7,24            Needs padding to 24 bytes
+
+         AFI 0,3
+         B STRCTLP
+
+BYTE24R2 DS 0H               Struct goes in R2,R3+(potentially)Memory
+         CFI 0,2             are R1+R2+R3 available?
+         BNL BYTE24R3 
+         LG 2,0(6,13)
+         LG 3,8(6,13)
+        
+         AFI 0,3
+         B STRCTLP
+
+BYTE24R3 DS 0H               Struct goes in R3+Memory
+         CFI 0,3             are R3 available?
+         BNL STRCTLP 
+         LG 3,0(6,13)
+
+         AFI 0,3
+         B STRCTLP
+
+BYTE24F0 DS 0H               Struct goes in FPR0-FPR4+Memory
+BYTE24F2 DS 0H               Struct goes in FPR2-FPR6+Memory
+BYTE24F4 DS 0H               Struct goes in FPR4-FPR6+Memory
+BYTE24F6 DS 0H               Struct goes in FPR6+Memory
+ 
+STRCTLP  DS 0H               Rest of struct goes in Memory
+         CFI 15,0            Size remaining > 0?
+         BL CONT
+
+         LB 12,0(6,13)        Load the byte of the struct
+         STC 12,2176(7,4)     Store in next byte in arg area
+
+         AFI  6,1            Move to next byte in struct
+         AFI  7,1            Move to next byte in arg area
+         AFI 15,-1           Decrement size of struct
+         B  STRCTLP
 
-BYTE12   DS 0H
-         L 1,0(6,STKREG)
-         L 2,4(6,STKREG)
-         L 3,8(6,STKREG)
-         LA GPX,3
-         AHI 6,8
-         B CONT
 
 CONT     DS 0H                End of processing curr_param
          AHI 10,8             Next parameter type
-         AHI 6,4              Next parameter value stored 
+*        AHI 6,4              Next parameter value stored 
          BCT 9,ARGLOOP        
   
 *Get function address, first argument passed,
 *and return type, third argument passed from caller's
 *argument area.
-
+CALL     DS 0H
 *         SR 11,11
          LA 11,0
          LG 6,(2176+(((DSASZ+31)/32)*32))(,4)
@@ -527,9 +877,163 @@ RLL      DS 0H
          B RET
 
 RV       DS 0H
-RS       DS 0H
          B RET
 
+RS       DS 0H
+         LG 7,(2176+(((DSASZ+31)/32)*32)+24)(,4)
+         LG 14,(2176+(((DSASZ+31)/32)*32)+8)(,4)
+         LG 14,0(,14)           ecif->cif
+         LG 9,16(,14)           cif->rtype
+         LG 14,0(,9)            rtype->size
+         CGIBH 14,24,RSP
+
+* now we need to determine how to return the struct
+* this label determines if the struct is <= 24 bytes
+* if it is we return using gprs/frprs
+* we then loop over rtype->elements
+* to determine whether this struct is all floating point
+* or if it is contains integral types
+* if we have just floating point types use FPRs as appropriate
+* but only if we have just the same type of float
+* a struct mixing float, double and long double will use GPRs
+* i have no idea why it is done this way, but it is
+* also if we use more than 2 floating point types we use GPRs
+* if we have at least one integral type, we use GPRs based on size
+* if the struct is > 24 bytes we have already passed in the return
+* pointer as the first argument to the callee "fn" 
+* so that work was outsource to the compiler
+* turning this into some simple cases for clarity
+* if struct <= 24 bytes 
+*   if elements contains >1 integral types -> GPRs
+*   if elements contains only one type of float
+*     if num elements <= 2 -> FPRs
+*     else -> GPRs
+         
+
+* currently we have 
+* r9 = cif->rtype
+* r7 = ecif->rvalue
+RSREG    DS 0H
+         LG 11,16(,9)         rtype->elements
+         LA 15,0              have we seen a float yet?
+         LA 0,0               have we seen a double yet?
+RSLOOP   DS 0H
+         LG 12,0(,11)         load the current element, we update
+*                             this register in NEXT so it's technically
+*                             actually rtype->elements + i
+         CGIJE 12,0,RSFPR     if we have NULL, we are done
+*                             this means we have all float types
+         LA 13,10(,12)        address of rtype->elements[i]->type
+         LLH 10,0(,13)        load the type (halfword)
+         LGHR 10,10           ensure upper half is clear
+         CFI 10,11            if type == FFI_TYPE_FLOAT
+         BE NEXTF             check next element
+         CFI 10,1             if type == FFI_TYPE_DOUBLE
+         BE NEXTD             check next element
+*        CFI 10,1             TODO if type == FFI_TYPE_LONGDOUBLE
+*        BE NEXT              check next element
+
+         B RSGPR              we have an integral type, use GPRs
+
+NEXTF    DS 0H
+         LA 11,8(,11)         increment the pointer to the next element
+         LA 15,1              we have seen a float
+         CGIJE 0,1,RSGPR      we have a mix of float/double use GPRs
+         B RSLOOP             loop back to the top
+
+NEXTD    DS 0H
+         LA 11,8(,11)         increment the pointer to the next element
+         LA 0,1               we have seen a double
+         CGIJE 15,1,RSGPR     we have a mix of float/double use GPRs
+         B RSLOOP             loop back to the top
+
+
+* label if we're using FPRs
+RSFPR    DS 0H
+         LA 5,0
+         LA 15,SSTOR
+         LG 11,16(,9)
+         LG 12,0(,11)
+         CGIJE 12,0,RSCPY
+         LA 13,10(,12)        address of rtype->elements[i]->type
+         LLH 10,0(,13)        load the type (halfword)
+         LGHR 10,10           ensure upper half is clear
+         CFI 10,11            if type == FFI_TYPE_FLOAT
+         BE STOREF1           store the float in the return area
+         CFI 10,1             if type == FFI_TYPE_DOUBLE
+         BE STORED1
+
+STOREF1  DS 0H
+         STE 0,0(5,15)
+         AGFI 5,4
+         B STORFPR2
+
+STORED1  DS 0H
+         STD 0,0(5,15)
+         AGFI 5,8
+         B STORFPR2
+
+STORFPR2 DS 0H
+         LA 11,8(,11)
+         LG 12,0(,11)
+         CGIJE 12,0,RSCPY
+         LA 13,10(,12)        address of rtype->elements[i]->type
+         LLH 10,0(,13)        load the type (halfword)
+         LGHR 10,10           ensure upper half is clear
+         CFI 10,11            if type == FFI_TYPE_FLOAT
+         BE STOREF2           store the float in the return area
+         CFI 10,1             if type == FFI_TYPE_DOUBLE
+         BE STORED2
+
+STOREF2  DS 0H
+         STE 2,0(5,15)
+         AGFI 5,4
+         B STORFPR3
+
+STORED2  DS 0H
+         STD 2,0(5,15)
+         AGFI 5,8
+         B STORFPR3
+
+STORFPR3 DS 0H
+
+         LA 11,8(,11)
+         LG 12,0(,11)
+         CGIJE 12,0,RSCPY    if we have NULL, time to copy
+         BE RSGPR            if we still have elements we use GPRs
+
+* label if we're using GPRs
+RSGPR    DS 0H
+* so to save us from having to figure out
+* how many regs to save, we'll just save them all
+* into some local storage, then copy the right amount
+         LA 15,SSTOR      
+         STG  1,0(,15)
+         STG  2,8(,15)
+         STG  3,16(,15)
+         B RSCPY
+
+* label to copy the struct to the return area
+* just move one byte at a time from r15 to r7
+* while r14 is non-zero 
+
+RSCPY    DS 0H 
+         CGIJNH 14,0,RET
+         LB 1,0(,15)
+*         STB 1,0(,7)
+         STC 1,0(,7)
+         LA 15,1(,15)
+         LA 7,1(,7)
+         AFI 14,-1
+         B RSCPY
+ 
+* return struct in pointer passed as dummy first argument
+* this should be the same pointer passed to ffi_call
+* and should have been set up in  prep_args
+* only using an explicit label for clarity
+RSP      DS 0H                 
+         B RET                  
+  
 RET      DS 0H 
          CELQEPLG
 
@@ -546,7 +1050,7 @@ ATABLE DC A(I)                Labels for parm types
  DC A(UI64)
  DC A(FLT)
  DC A(STRCT)
- DC A(0)
+ DC A(VOID)
  DC A(I)
  DC A(D)
 I32 DC A(IGPR1)               Labels for passing INT in gpr
@@ -590,9 +1094,6 @@ FLTS DC A(FLTR0)              Labels to store FLOAT in fpr
   DC A(FLTR4)
   DC A(FLTR6)
   DC A(ARGAFT)                Label to store FLOAT in arg area
-STRCTS DC A(BYTE4)
-  DC A(BYTE8)
-  DC A(BYTE12)
 RTABLE DC A(RV)
  DC A(RS)
  DC A(RF)
@@ -616,5 +1117,9 @@ FPX      EQU  14
 CEEDSAHP CEEDSA SECTYPE=XPLINK
 ARGSL DS  XL800
 LSTOR DS  XL800
+* storage space for the returns regs maximum 4*8 fpr
+* TODO check long doubles, might be 8*8
+SSTOR DS  XL32
+SSIZE DS  XL8
 DSASZ    EQU (*-CEEDSAHP_FIXED)
  END FFISYS