Skip to content

Commit

Permalink
Special handling of the UTF8 nbsp sequence
Browse files Browse the repository at this point in the history
The UTF8 nbsp sequence 0xc2 0xa0 is not seen as a whitespace sequence and not handled properly.
This can lead to:
```
warning: found </c> tag without matching <c>
```
when we have e.g. ```e.g. `linux`<br>``` where the space between the `.` and the backtick is actually the UTF8 nbsp sequence
Replacing the the UTF8 nbsp sequence with the `&nbsp;` sequence.
  • Loading branch information
albert-github committed May 11, 2019
1 parent ad9af44 commit 6d20ed0
Showing 1 changed file with 19 additions and 6 deletions.
25 changes: 19 additions & 6 deletions src/markdown.cpp
Expand Up @@ -2453,19 +2453,32 @@ static QCString detab(const QCString &s,int &refIndent)
col++;
break;
default: // non-whitespace => update minIndent
out.addChar(c);
if (c<0 && i<size) // multibyte sequence
{
out.addChar(data[i++]); // >= 2 bytes
if (((uchar)c&0xE0)==0xE0 && i<size)
// special handling of the UTF-8 nbsp character 0xc2 0xa0
if (c == '\xc2' && data[i] == '\xa0')
{
out.addChar(data[i++]); // 3 bytes
out.addStr("&nbsp;");
i++;
}
if (((uchar)c&0xF0)==0xF0 && i<size)
else
{
out.addChar(data[i++]); // 4 byres
out.addChar(c);
out.addChar(data[i++]); // >= 2 bytes
if (((uchar)c&0xE0)==0xE0 && i<size)
{
out.addChar(data[i++]); // 3 bytes
}
if (((uchar)c&0xF0)==0xF0 && i<size)
{
out.addChar(data[i++]); // 4 byres
}
}
}
else
{
out.addChar(c);
}
if (col<minIndent) minIndent=col;
col++;
}
Expand Down

0 comments on commit 6d20ed0

Please sign in to comment.