141 changes: 141 additions & 0 deletions src/msgAggregatorWorker.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/*
Copyright (C) 2022 Arduino SA
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

class WorkerStub {
listener = (_: { data: { command: string } }) => {};
addEventListener(_: string, listenerCallback: (event: object) => void) {
this.listener = listenerCallback;
}
}
let worker = new WorkerStub();
(global as any).self = worker;

const messageAggregator = require("./msgAggregatorWorker");

beforeEach(() => {
worker.listener({ data: { command: "cleanup" } });
});

describe("Parsing data", () => {
describe.each([
["space", " "],
["tab", "\t"],
["comma", ","],
])("%s field delimiter", (_, fieldDelimiter) => {
describe.each([
["trailing", fieldDelimiter],
["no trailing", ""],
])("%s", (_, trailingFieldDelimiter) => {
describe.each([
["LF", "\n"],
["CRLF", "\r\n"],
])("%s record delimiter", (_, recordDelimiter) => {
test("single field", () => {
const messages = [
`0${trailingFieldDelimiter}${recordDelimiter}`,
`1${trailingFieldDelimiter}${recordDelimiter}`,
`2${trailingFieldDelimiter}${recordDelimiter}`,
];

const assertion = {
datasetNames: ["value 1"],
parsedLines: [{ "value 1": 1 }, { "value 1": 2 }],
};

expect(messageAggregator.parseSerialMessages(messages)).toEqual(
assertion
);
});

test("multi-field", () => {
const messages = [
`0${trailingFieldDelimiter}${recordDelimiter}`,
`1${fieldDelimiter}2${trailingFieldDelimiter}${recordDelimiter}`,
`3${fieldDelimiter}4${trailingFieldDelimiter}${recordDelimiter}`,
];

const assertion = {
datasetNames: ["value 1", "value 2"],
parsedLines: [
{ "value 1": 1, "value 2": 2 },
{ "value 1": 3, "value 2": 4 },
],
};

expect(messageAggregator.parseSerialMessages(messages)).toEqual(
assertion
);
});

test("labeled", () => {
const messages = [
`0${trailingFieldDelimiter}${recordDelimiter}`,
`label_1:1${fieldDelimiter}label_2:2${trailingFieldDelimiter}${recordDelimiter}`,
`label_1:3${fieldDelimiter}label_2:4${trailingFieldDelimiter}${recordDelimiter}`,
];

const assertion = {
datasetNames: ["label_1", "label_2"],
parsedLines: [
{ label_1: 1, label_2: 2 },
{ label_1: 3, label_2: 4 },
],
};

expect(messageAggregator.parseSerialMessages(messages)).toEqual(
assertion
);
});

test("buffering", () => {
// Incomplete record
let messages = [
`0${trailingFieldDelimiter}${recordDelimiter}`,
`1${fieldDelimiter}`,
];

// Incomplete message is buffered
let assertion: {
datasetNames: string[];
parsedLines: { [key: string]: number }[];
} = {
datasetNames: [],
parsedLines: [],
};

expect(messageAggregator.parseSerialMessages(messages)).toEqual(
assertion
);

// Second part of the record
messages = [`2${trailingFieldDelimiter}${recordDelimiter}`];

assertion = {
datasetNames: ["value 1", "value 2"],
parsedLines: [{ "value 1": 1, "value 2": 2 }],
};

expect(messageAggregator.parseSerialMessages(messages)).toEqual(
assertion
);
});
});
});
});
});

export {};
34 changes: 20 additions & 14 deletions src/msgAggregatorWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ ctx.addEventListener("message", (event) => {

let buffer = "";
let discardFirstLine = true;
const separator = "\n";
var re = new RegExp(`(${separator})`, "g");
const separator = "\r?\n";
const delimiter = "[, \t]+"; // Serial Plotter protocol supports Comma, Space & Tab characters as delimiters
var separatorRegex = new RegExp(`(${separator})`, "g");
var delimiterRegex = new RegExp(delimiter, "g");

export const parseSerialMessages = (
messages: string[]
Expand All @@ -31,10 +33,11 @@ export const parseSerialMessages = (
// so we need to discard it and start aggregating from the first encountered separator
let joinMessages = messages.join("");
if (discardFirstLine) {
const firstSeparatorIndex = joinMessages.indexOf(separator);
if (firstSeparatorIndex > -1) {
separatorRegex.lastIndex = 0; // Reset lastIndex to ensure match happens from beginning of string
const separatorMatch = separatorRegex.exec(joinMessages);
if (separatorMatch && separatorMatch.index > -1) {
joinMessages = joinMessages.substring(
firstSeparatorIndex + separator.length
separatorMatch.index + separatorMatch[0].length
);
discardFirstLine = false;
} else {
Expand All @@ -47,13 +50,14 @@ export const parseSerialMessages = (

//add any leftover from the buffer to the first line
const messagesAndBuffer = ((buffer || "") + joinMessages)
.split(re)
.split(separatorRegex)
.filter((message) => message.length > 0);

// remove the previous buffer
buffer = "";
separatorRegex.lastIndex = 0;
// check if the last message contains the delimiter, if not, it's an incomplete string that needs to be added to the buffer
if (messagesAndBuffer[messagesAndBuffer.length - 1] !== separator) {
if (!separatorRegex.test(messagesAndBuffer[messagesAndBuffer.length - 1])) {
buffer = messagesAndBuffer[messagesAndBuffer.length - 1];
messagesAndBuffer.splice(-1);
}
Expand All @@ -62,19 +66,21 @@ export const parseSerialMessages = (
const parsedLines: { [key: string]: number }[] = [];

// for each line, explode variables
separatorRegex.lastIndex = 0;
messagesAndBuffer
.filter((message) => message !== separator)
.filter((message) => !separatorRegex.test(message))
.forEach((message) => {
const parsedLine: { [key: string]: number } = {};

//there are two supported formats:
// format1: <value1> <value2> <value3>
// format2: name1:<value1>,name2:<value2>,name3:<value3>
// Part Separator symbols i.e. Space, Tab & Comma are fully supported
// SerialPlotter protocol specifies 3 message formats. The following 2 formats are supported
// Value only format: <value1> <value2> <value3>
// Label-Value format: name1:<value1>,name2:<value2>,name3:<value3>

// if we find a colon, we assume the latter is being used
let tokens: string[] = [];
if (message.indexOf(":") > 0) {
message.split(",").forEach((keyValue: string) => {
message.split(delimiterRegex).forEach((keyValue: string) => {
let [key, value] = keyValue.split(":");
key = key && key.trim();
value = value && value.trim();
Expand All @@ -83,8 +89,8 @@ export const parseSerialMessages = (
}
});
} else {
// otherwise they are spaces
const values = message.split(/\s/);
// otherwise they are unlabelled
const values = message.split(delimiterRegex);
Copy link
Contributor

@per1234 per1234 Sep 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a regression here for a use case that is not explicitly supported by the specification, but is supported by the Arduino IDE 1.x Serial Plotter and previous implementation of this project. That use case is the combination of a trailing delimiter and \r\n separator.

For example:

void setup() {
  Serial.begin(9600);
}
void loop() {
  for (byte value = 0; value < 4; value++) {
    for (byte variableOffset = 0; variableOffset < 3; variableOffset++) {
      Serial.print(value + variableOffset);
      Serial.print('\t');
    }
    Serial.println();
  }
  delay(100);
}

You can see how code that generates data via a for loop favors a trailing delimiter, and of course Serial.println(); is a convenient way to produce a separator.

Previously, that sketch produced 3 variables as expected:

Arduino IDE 1.8.19:

image

arduino-serial-plotter-webapp@eac6d39 / arduino-serial-plotter-webapp@0.1.0 / Arduino IDE 2.0.0-rc9.3:

image

After the change made here, it produces 4 variables:

image

The problem is the \r in the \r\n separator produced by Serial.println(); is being treated as a separate value. Previously, that \r was consumed by message.split(/\s/); (because it matches /\s/).

Copy link
Contributor Author

@nmzaheer nmzaheer Sep 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if the separator regex is modified to include the carriage return character \r? Would that solve the problem or am I still missing something ?

const separator = "\r?\n";

Is there a particular reason why Serial.println outputs a \r\n and not \n ? We cannot have two different conventions for EOL character. Either we amend the SerialPlotter protocol or drop support for \r\n entirely.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const separator = "\r?\n";

That general approach should work. However, separator is used in the code directly as a string in addition to its use in the definition of separatorRegex, so the implementation is not quite so simple.

It will need to be something like this (not tested):

diff --git a/src/msgAggregatorWorker.ts b/src/msgAggregatorWorker.ts
index b05dddf..913915b 100644
--- a/src/msgAggregatorWorker.ts
+++ b/src/msgAggregatorWorker.ts
@@ -20,7 +20,7 @@ let buffer = "";
 let discardFirstLine = true;
 const separator = "\n";
 const delimiter = "[, \t]+"; // Serial Plotter protocol supports Comma, Space & Tab characters as delimiters
-var separatorRegex = new RegExp(`(${separator})`, "g");
+var separatorRegex = new RegExp(/(\r?\n)/, "g");
 var delimiterRegex = new RegExp(`(${delimiter})`, "g");
 
 export const parseSerialMessages = (
@@ -55,7 +55,7 @@ export const parseSerialMessages = (
   // remove the previous buffer
   buffer = "";
   // check if the last message contains the delimiter, if not, it's an incomplete string that needs to be added to the buffer
-  if (messagesAndBuffer[messagesAndBuffer.length - 1] !== separator) {
+  if (!separatorRegex.test(messagesAndBuffer[messagesAndBuffer.length - 1])) {
     buffer = messagesAndBuffer[messagesAndBuffer.length - 1];
     messagesAndBuffer.splice(-1);
   }
@@ -65,7 +65,7 @@ export const parseSerialMessages = (
 
   // for each line, explode variables
   messagesAndBuffer
-    .filter((message) => message !== separator)
+    .filter((message) => !separatorRegex.test(message))
     .forEach((message) => {
       const parsedLine: { [key: string]: number } = {};

We cannot have two different conventions for EOL character.

Why not? Inconsistency in EOL is a common situation. Not so long ago, Windows, Linux, and macOS each used a different EOL.

Is there a particular reason why Serial.println outputs a \r\n and not \n ?

No idea. It has been that way since 2006, and in the proud tradition of Arduino the person who made the change didn't bother to explain the reason:

arduino/Arduino@650c275

amend the SerialPlotter protocol

I think it is a good idea. The protocol already does specify that \r\n is supported by using Serial.println() in the all the example programs, but that is too vague. It should be stated explicitly that both are supported.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

drop support for \r\n entirely.

It would be too disruptive. I'm certain the code can be made to support either without any real difficulty.

The only real challenge here is that there is no test coverage in this project, despite the fact that this code is very easy to write unit tests for.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like to contribute by writing the tests but I still haven't managed to setup my dev environment right.

Ideal case, I would like to debug the modifications before committing it to the repo. However, I have not been able to interface my Arduino with the standalone web-app or write a websocket script to feed values to the web-app.

Currently, I've been modifying the randomValueGenerator code manually to feed various types of messages and check its results. If you could guide me in setting this up, I would be able to contribute a lot more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const separator = "\r?\n";

That general approach should work.

Do we have to support \r as an EOL character? In that case, the regex would be

const separator = "\r?\n?";

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have to support \r as an EOL character?

No. It has never been supported and I haven't seen any complaints about the lack of support (I think it is not a very common separator in Arduino sketches despite being supported in the line ending menu of Serial Plotter and Serial Monitor)

For example, this sketch:

void setup() {
  Serial.begin(9600);
}
void loop() {
  for (byte value = 0; value < 4; value++) {
    for (byte variableOffset = 0; variableOffset < 3; variableOffset++) {
      Serial.print(value + variableOffset);
      Serial.print('\t');
    }
    Serial.print('\r');
  }
  delay(100);
}

Produces no output in the plotter:

arduino-serial-plotter-webapp@0.1.0

image

Arduino IDE 1.8.19

image

values.forEach((value, i) => {
if (value.length) {
tokens.push(...[`value ${i + 1}`, value]);
Expand Down