Skip to content

Commit

Permalink
Implement recursive gap computation
Browse files Browse the repository at this point in the history
Improved benchmark recovery rate by 7% (77% -> 84%)
  • Loading branch information
cjgriscom committed Nov 6, 2019
1 parent ce09944 commit 0d69e41
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 62 deletions.
135 changes: 74 additions & 61 deletions src/main/java/io/chandler/rece232/RECE232.java
Expand Up @@ -26,6 +26,8 @@
*/
package io.chandler.rece232;

import java.util.Arrays;

/**
* RECE-232 is a data encoding scheme that encodes longwords/floats to ASCII while maximizing error detection and correctability.
* It's intended for use in ASCII RS-232 streams where bitflips and dropped characters may be common.
Expand Down Expand Up @@ -222,6 +224,68 @@ public RECE232Decoder setConvertTabs(boolean convertTabs) {
return this;
}

private static final int INCOMPLETE = Integer.MAX_VALUE; // Magic number to signify length mismatch
private int calculateGaps(byte[] src, int i, int r, int n, int[] gaps, int gapCount) {
int longwordIndex = r / 8;
boolean exp5Bit = r % 2 == 0;
if (i >= src.length - 2) { // src.length - 2 is the first fletcher character
// Ran through end
if (DEBUG) System.out.println("Finished calculateGaps0: " + i + "," + r + ": " + gapCount);
return r == n ? gapCount : INCOMPLETE; // Have we finished
} else if (r == n) {
if (DEBUG) System.out.println("Finished calculateGaps1: " + i + "," + r + ": " + gapCount);
return i == src.length - 3 ? gapCount : INCOMPLETE; // Made it to end
} else {

// Allow pushing into the first fletcher char, in case there's a gap before there
int byt = src[i] & 0xff;
if (convertTabs && byt == (byte)'\t') byt = 127;

if (byt < 32 || byt >= 128) {
if (DEBUG) System.out.println(i + "," + r + " !");
// Out of ascii range; consider this a corrupt character
if (gaps[longwordIndex] != -1) return INCOMPLETE; // Already counted a gap in this longword
gaps[longwordIndex] = r;
recon[r] = 0;
return calculateGaps(src, i+1, r+1, n, gaps, gapCount);
} else if (exp5Bit && byt < 64) {
if (DEBUG) System.out.println(i + "," + r + " 5");
// Is expected 5-bit
recon[r] = byt - 32;
return calculateGaps(src, i+1, r+1, n, gaps, gapCount);
} else if (!exp5Bit && byt >= 64) {
if (DEBUG) System.out.println(i + "," + r + " 6");
// Is expected 6-bit
recon[r] = byt - 64;
return calculateGaps(src, i+1, r+1, n, gaps, gapCount);
} else {
if (DEBUG) System.out.println(i + "," + r + " G");

// It's not in the expected range, could be a gap or a corrupt character
if (gaps[longwordIndex] != -1) return INCOMPLETE; // Already counted a gap in this longword

recon[r] = 0;
gaps[longwordIndex] = r;

// Try corrupt case
int corruptCase = calculateGaps(src, i+1, r+1, n, gaps, gapCount);

// Try gap case
gaps[longwordIndex] = r;
for (int l = longwordIndex + 1; l < gaps.length; l++) gaps[l] = -1; // Reset gaps
int gapCase = calculateGaps(src, i, r+1, n, gaps, gapCount+1);

if (corruptCase < gapCase) { // Prefer corrupt
gaps[longwordIndex] = r;
for (int l = longwordIndex + 1; l < gaps.length; l++) gaps[l] = -1; // Reset gaps
return calculateGaps(src, i+1, r+1, n, gaps, gapCount); // Recalculate corrupt case (TODO better way?)
} else { // Prefer gap
return gapCase;
}
}
}
}

private static final int GOOD_MASK = 0b11111_111111_11111;
public boolean load(byte[] src) {

Expand Down Expand Up @@ -301,65 +365,13 @@ public boolean load(byte[] src) {

this.recon = new int[nLongwords * 8];
int[] gaps = new int[nLongwords];
Arrays.fill(gaps, -1);
boolean[] badChks = new boolean[nLongwords];

int i = 0;
if (calculateGaps(src, 0, 0, nLongwords * 8, gaps, 0) == INCOMPLETE) return false; // Recursive gaps calculation

// Process checksums or fill gaps
for (int n = 0; n < nLongwords; n++) {
int nRead = 0;
int nGap = 0;
gaps[n] = -1;
// Loop through and find gaps, attempt reconstruction
for (; ; i++) {
if (i < src.length - 2) { // Allow pushing into the first fletcher char, in case there's a gap before there
int byt = src[i] & 0xff;
if (convertTabs && byt == (byte)'\t') byt = 127;

if (byt < 32 || byt >= 128) {
if (DEBUG) System.out.print("!");
// Out of ascii range; consider this a corrupt character
gaps[n] = nRead;
nGap++;
if (nGap >= 2) {
if (DEBUG) System.out.println();
return false; // Too many errors
}
recon[n * 8 + nRead] = 0;
nRead++;
} else if (nRead % 2 == 0 && byt < 64) {
if (DEBUG) System.out.print("5");
// Is expected 5-bit
recon[n * 8 + nRead] = byt - 32;
nRead++;
} else if (nRead % 2 != 0 && byt >= 64) {
if (DEBUG) System.out.print("6");
// Is expected 6-bit
recon[n * 8 + nRead] = byt - 64;
nRead++;
} else {
if (DEBUG) System.out.print("G");
// It's not in the expected range, must be a gap
gaps[n] = nRead;
nGap++;
if (nGap >= 2) {
if (DEBUG) System.out.println();
return false; // Too many errors
}
recon[n * 8 + nRead] = 0;
nRead++;
i--; // Backtrack for next

}
} else {
if (DEBUG) System.out.println();
return false; // Ran through end w/o a complete 8-charset
}
if (nRead == 8) {i++; break;}
}
if (DEBUG) System.out.println("");

if (nRead != 8) return false; // Ran out of bytes to src

// Fill gaps using checksum
int gapIdx = gaps[n];
if (gapIdx == -1) {
// No gaps; just verify checksum
Expand All @@ -369,17 +381,18 @@ public boolean load(byte[] src) {
}
} else {
int chk = 0;
for (int b = 0; b < 8; b++) {
for (int b = n*8; b < n*8 + 8; b++) {
if (b == gapIdx) continue;
if (DEBUG) System.out.println("Fill gap chk" + b);
chk ^= recon[b + n*8];
chk ^= recon[b];
}
recon[gapIdx + n*8] = chk ^ 0b111111;
recon[gapIdx] = chk ^ 0b111111;
}

}

// Attempt to correct wrong checksums
// Recursively attempt to correct wrong checksums
// TODO can improve statistical accuracy by keeping an n-bitflips score and returning the best one
// TODO implement a configurable limit to recursive calls
return correctChecksums(badChks, false, 0, fletF, fletFMask);
}

Expand Down
6 changes: 5 additions & 1 deletion src/test/java/io/chandler/rece232/RECE232Tests.java
Expand Up @@ -44,7 +44,10 @@
public class RECE232Tests {
@Test public void testC() {
RECE232Decoder dec = RECE232.getDecoder().setConvertTabs(true);
System.out.println(dec.load("? ? ? ?@? ?r? ?M;T4".getBytes(StandardCharsets.US_ASCII)));
// TODO "? ? ? ?@? ?r? ?M;T4" -> "? ? ? s@? ?r? ?M;T4" fails
// The gap detection needs to be improved; it detects 2 gaps instead of one bit error
// Could perform gap detection minimizing num errors, or length matching
System.out.println(dec.load("? ? ? s@? ?r? ?M;T4".getBytes(StandardCharsets.US_ASCII)));
System.out.println(dec.getHeader6Bit());
System.out.println(dec.getLongword(0));
System.out.println(dec.getLongword(1));
Expand Down Expand Up @@ -121,6 +124,7 @@ public void testErrors() {
"Pass, Flip0 1, Flip0 8, Flip0 17",
"Pass, Flip3 1, Flip3 8, Flip3 17",
"Pass, Flip2 9, Flip3 9, Flip4 9",
"Pass, Flip6 6", // This case should only work with the recursive gap computation
};

for (int[] dataset : datasets) { for (String corr : corruptionTests) {
Expand Down

0 comments on commit 0d69e41

Please sign in to comment.