From 54c5a42d0925cc825b9386d9dc5b564178722b3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20Kn=C3=B6schke?= Date: Thu, 6 Sep 2018 15:27:18 +0200 Subject: [PATCH 1/3] Integrate ConvertUTF (see #61) - Add linenoise-ng as 3rd party project - Implement UTF conversions using ConvertUTF - Update license - Add tests for conversions --- 3rdparty/CMakeLists.txt | 4 + 3rdparty/linenoise-ng/CMakeLists.txt | 142 + 3rdparty/linenoise-ng/LICENSE | 66 + 3rdparty/linenoise-ng/README.md | 156 + .../include/linenoise-ng/ConvertUTF.h | 162 + .../include/linenoise-ng/linenoise.h | 73 + 3rdparty/linenoise-ng/source/ConvertUTF.cpp | 546 +++ 3rdparty/linenoise-ng/source/linenoise.cpp | 3461 +++++++++++++++++ 3rdparty/linenoise-ng/source/wcwidth.cpp | 315 ++ CMakeLists.txt | 1 + LICENSE | 69 +- source/cppassist/CMakeLists.txt | 1 + .../include/cppassist/string/conversion.h | 17 +- source/cppassist/source/string/conversion.cpp | 254 +- .../tests/cppassist-test/conversion_test.cpp | 79 + 15 files changed, 5269 insertions(+), 77 deletions(-) create mode 100644 3rdparty/CMakeLists.txt create mode 100644 3rdparty/linenoise-ng/CMakeLists.txt create mode 100644 3rdparty/linenoise-ng/LICENSE create mode 100644 3rdparty/linenoise-ng/README.md create mode 100644 3rdparty/linenoise-ng/include/linenoise-ng/ConvertUTF.h create mode 100644 3rdparty/linenoise-ng/include/linenoise-ng/linenoise.h create mode 100644 3rdparty/linenoise-ng/source/ConvertUTF.cpp create mode 100644 3rdparty/linenoise-ng/source/linenoise.cpp create mode 100644 3rdparty/linenoise-ng/source/wcwidth.cpp diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt new file mode 100644 index 0000000..07bcbdd --- /dev/null +++ b/3rdparty/CMakeLists.txt @@ -0,0 +1,4 @@ + +# External libraries +set(IDE_FOLDER "3rdparty") +add_subdirectory(linenoise-ng) diff --git a/3rdparty/linenoise-ng/CMakeLists.txt b/3rdparty/linenoise-ng/CMakeLists.txt new file mode 100644 index 0000000..3f5d1bd --- /dev/null +++ b/3rdparty/linenoise-ng/CMakeLists.txt @@ -0,0 +1,142 @@ + +# +# External dependencies +# + + +# +# Library name and options +# + +# Target name +set(target linenoise-ng) + +# Exit here if required dependencies are not met +message(STATUS "External ${target}") + + +# +# Sources +# + +set(include_path "${CMAKE_CURRENT_SOURCE_DIR}/include/${target}") +set(source_path "${CMAKE_CURRENT_SOURCE_DIR}/source") + +set(headers + ${include_path}/linenoise.h + ${include_path}/ConvertUTF.h +) + +set(sources + ${source_path}/linenoise.cpp + ${source_path}/ConvertUTF.cpp + ${source_path}/wcwidth.cpp +) + + +# +# Create library +# + +# Build library +add_library(${target} STATIC + ${sources} + ${headers} +) + +# Create namespaced alias +add_library(externals::${target} ALIAS ${target}) + + +# +# Project options +# + +set_target_properties(${target} + PROPERTIES + ${DEFAULT_PROJECT_OPTIONS} + FOLDER "${IDE_FOLDER}" +) + + +# +# Include directories +# + +target_include_directories(${target} + PRIVATE + ${DEFAULT_INCLUDE_DIRECTORIES} + ${PROJECT_BINARY_DIR}/source/include + + PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/include + + INTERFACE +) + + +# +# Libraries +# + +target_link_libraries(${target} + PRIVATE + + PUBLIC + ${DEFAULT_LIBRARIES} + + INTERFACE +) + + +# +# Compile definitions +# + +target_compile_definitions(${target} + PRIVATE + + PUBLIC + $<$>:${target_upper}_STATIC_DEFINE> + ${DEFAULT_COMPILE_DEFINITIONS} + + INTERFACE +) + + +# +# Compile options +# + +target_compile_options(${target} + PRIVATE + + PUBLIC + ${DEFAULT_COMPILE_OPTIONS} + + INTERFACE +) + + +# +# Linker options +# + +target_link_libraries(${target} + PRIVATE + + PUBLIC + ${DEFAULT_LINKER_OPTIONS} + + INTERFACE +) + + +# +# Target Health +# + +perform_health_checks( + ${target} + ${sources} +) diff --git a/3rdparty/linenoise-ng/LICENSE b/3rdparty/linenoise-ng/LICENSE new file mode 100644 index 0000000..b7c58c4 --- /dev/null +++ b/3rdparty/linenoise-ng/LICENSE @@ -0,0 +1,66 @@ +linenoise.cpp +============= + +Copyright (c) 2010, Salvatore Sanfilippo +Copyright (c) 2010, Pieter Noordhuis + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Redis nor the names of its contributors may be used + to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + + +wcwidth.cpp +=========== + +Markus Kuhn -- 2007-05-26 (Unicode 5.0) + +Permission to use, copy, modify, and distribute this software +for any purpose and without fee is hereby granted. The author +disclaims all warranties with regard to this software. + + + +ConvertUTF.cpp +============== + +Copyright 2001-2004 Unicode, Inc. + +Disclaimer + +This source code is provided as is by Unicode, Inc. No claims are +made as to fitness for any particular purpose. No warranties of any +kind are expressed or implied. The recipient agrees to determine +applicability of information provided. If this file has been +purchased on magnetic or optical media from Unicode, Inc., the +sole remedy for any claim will be exchange of defective media +within 90 days of receipt. + +Limitations on Rights to Redistribute This Code + +Unicode, Inc. hereby grants the right to freely use the information +supplied in this file in the creation of products supporting the +Unicode Standard, and to make copies of this file in any form +for internal or external distribution as long as this notice +remains attached. diff --git a/3rdparty/linenoise-ng/README.md b/3rdparty/linenoise-ng/README.md new file mode 100644 index 0000000..81898db --- /dev/null +++ b/3rdparty/linenoise-ng/README.md @@ -0,0 +1,156 @@ +# Linenoise Next Generation + +A small, portable GNU readline replacement for Linux, Windows and +MacOS which is capable of handling UTF-8 characters. Unlike GNU +readline, which is GPL, this library uses a BSD license and can be +used in any kind of program. + +## Origin + +This linenoise implementation is based on the work by +[Salvatore Sanfilippo](https://github.com/antirez/linenoise) and +10gen Inc. The goal is to create a zero-config, BSD +licensed, readline replacement usable in Apache2 or BSD licensed +programs. + +## Features + +* single-line and multi-line editing mode with the usual key bindings implemented +* history handling +* completion +* BSD license source code +* Only uses a subset of VT100 escapes (ANSI.SYS compatible) +* UTF8 aware +* support for Linux, MacOS and Windows + +It deviates from Salvatore's original goal to have a minimal readline +replacement for the sake of supporting UTF8 and Windows. It deviates +from 10gen Inc.'s goal to create a C++ interface to linenoise. This +library uses C++ internally, but to the user it provides a pure C +interface that is compatible with the original linenoise API. +C interface. + +## Requirements + +To build this library, you will need a C++11-enabled compiler and +some recent version of CMake. + +## Build instructions + +To build this library on Linux, first create a build directory + +```bash +mkdir -p build +``` + +and then build the library: + +```bash +(cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make) +``` + +To build and install the library at the default target location, use + +```bash +(cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make && sudo make install) +``` + +The default installation location can be adjusted by setting the `DESTDIR` +variable when invoking `make install`: + +```bash +(cd build && make DESTDIR=/tmp install) +``` + +To build the library on Windows, use these commands in an MS-DOS command +prompt: + +``` +md build +cd build +``` + +After that, invoke the appropriate command to create the files for your +target environment: + +* 32 bit: `cmake -G "Visual Studio 12 2013" -DCMAKE_BUILD_TYPE=Release ..` +* 64 bit: `cmake -G "Visual Studio 12 2013 Win64" -DCMAKE_BUILD_TYPE=Release ..` + +After that, open the generated file `linenoise.sln` from the `build` +subdirectory with Visual Studio. + + +*note: the following sections of the README.md are from the original +linenoise repository and are partly outdated* + +## Can a line editing library be 20k lines of code? + +Line editing with some support for history is a really important +feature for command line utilities. Instead of retyping almost the +same stuff again and again it's just much better to hit the up arrow +and edit on syntax errors, or in order to try a slightly different +command. But apparently code dealing with terminals is some sort of +Black Magic: readline is 30k lines of code, libedit 20k. Is it +reasonable to link small utilities to huge libraries just to get a +minimal support for line editing? + +So what usually happens is either: + + * Large programs with configure scripts disabling line editing if + readline is not present in the system, or not supporting it at all + since readline is GPL licensed and libedit (the BSD clone) is not + as known and available as readline is (Real world example of this + problem: Tclsh). + + * Smaller programs not using a configure script not supporting line + editing at all (A problem we had with Redis-cli for instance). + +The result is a pollution of binaries without line editing support. + +So Salvatore spent more or less two hours doing a reality check +resulting in this little library: is it *really* needed for a line +editing library to be 20k lines of code? Apparently not, it is possibe +to get a very small, zero configuration, trivial to embed library, +that solves the problem. Smaller programs will just include this, +supporing line editing out of the box. Larger programs may use this +little library or just checking with configure if readline/libedit is +available and resorting to linenoise if not. + +## Terminals, in 2010. + +Apparently almost every terminal you can happen to use today has some +kind of support for basic VT100 escape sequences. So Salvatore tried +to write a lib using just very basic VT100 features. The resulting +library appears to work everywhere Salvatore tried to use it, and now +can work even on ANSI.SYS compatible terminals, since no VT220 +specific sequences are used anymore. + +The original library has currently about 1100 lines of code. In order +to use it in your project just look at the *example.c* file in the +source distribution, it is trivial. Linenoise is BSD code, so you can +use both in free software and commercial software. + +## Tested with... + + * Linux text only console ($TERM = linux) + * Linux KDE terminal application ($TERM = xterm) + * Linux xterm ($TERM = xterm) + * Linux Buildroot ($TERM = vt100) + * Mac OS X iTerm ($TERM = xterm) + * Mac OS X default Terminal.app ($TERM = xterm) + * OpenBSD 4.5 through an OSX Terminal.app ($TERM = screen) + * IBM AIX 6.1 + * FreeBSD xterm ($TERM = xterm) + * ANSI.SYS + * Emacs comint mode ($TERM = dumb) + * Windows + +Please test it everywhere you can and report back! + +## Let's push this forward! + +Patches should be provided in the respect of linenoise sensibility for +small and easy to understand code that and the license +restrictions. Extensions must be submitted under a BSD license-style. +A contributor license is required for contributions. + diff --git a/3rdparty/linenoise-ng/include/linenoise-ng/ConvertUTF.h b/3rdparty/linenoise-ng/include/linenoise-ng/ConvertUTF.h new file mode 100644 index 0000000..8a29623 --- /dev/null +++ b/3rdparty/linenoise-ng/include/linenoise-ng/ConvertUTF.h @@ -0,0 +1,162 @@ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Header file. + + Several funtions are included here, forming a complete set of + conversions between the three formats. UTF-7 is not included + here, but is handled in a separate source file. + + Each of these routines takes pointers to input buffers and output + buffers. The input buffers are const. + + Each routine converts the text between *sourceStart and sourceEnd, + putting the result into the buffer between *targetStart and + targetEnd. Note: the end pointers are *after* the last item: e.g. + *(sourceEnd - 1) is the last item. + + The return result indicates whether the conversion was successful, + and if not, whether the problem was in the source or target buffers. + (Only the first encountered problem is indicated.) + + After the conversion, *sourceStart and *targetStart are both + updated to point to the end of last text successfully converted in + the respective buffers. + + Input parameters: + sourceStart - pointer to a pointer to the source buffer. + The contents of this are modified on return so that + it points at the next thing to be converted. + targetStart - similarly, pointer to pointer to the target buffer. + sourceEnd, targetEnd - respectively pointers to the ends of the + two buffers, for overflow checking only. + + These conversion functions take a ConversionFlags argument. When this + flag is set to strict, both irregular sequences and isolated surrogates + will cause an error. When the flag is set to lenient, both irregular + sequences and isolated surrogates are converted. + + Whether the flag is strict or lenient, all illegal sequences will cause + an error return. This includes sequences such as: , , + or in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code + must check for illegal sequences. + + When the flag is set to lenient, characters over 0x10FFFF are converted + to the replacement character; otherwise (when the flag is set to strict) + they constitute an error. + + Output parameters: + The value "sourceIllegal" is returned from some routines if the input + sequence is malformed. When "sourceIllegal" is returned, the source + value will point to the illegal value that caused the problem. E.g., + in UTF-8 when a sequence is malformed, it points to the start of the + malformed sequence. + + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Fixes & updates, Sept 2001. + +------------------------------------------------------------------------ */ + +/* --------------------------------------------------------------------- + The following 4 definitions are compiler-specific. + The C standard does not guarantee that wchar_t has at least + 16 bits, so wchar_t is no less portable than unsigned short! + All should be unsigned values to avoid sign extension during + bit mask & shift operations. +------------------------------------------------------------------------ */ + +#if 0 +typedef unsigned long UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ +#endif + +#include +#include + +namespace linenoise_ng { + +typedef uint32_t UTF32; +typedef uint16_t UTF16; +typedef uint8_t UTF8; +typedef unsigned char Boolean; /* 0 or 1 */ + +/* Some fundamental constants */ +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +typedef enum { + conversionOK, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ +} ConversionResult; + +typedef enum { + strictConversion = 0, + lenientConversion +} ConversionFlags; + +// /* This is for C++ and does no harm in C */ +// #ifdef __cplusplus +// extern "C" { +// #endif + +ConversionResult ConvertUTF8toUTF16 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF16toUTF8 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF8toUTF32 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF32toUTF8 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF16toUTF32 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF32toUTF16 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + char16_t** targetStart, char16_t* targetEnd, ConversionFlags flags); + +Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); + +// #ifdef __cplusplus +// } +// #endif + +} + +/* --------------------------------------------------------------------- */ diff --git a/3rdparty/linenoise-ng/include/linenoise-ng/linenoise.h b/3rdparty/linenoise-ng/include/linenoise-ng/linenoise.h new file mode 100644 index 0000000..3a8eb9f --- /dev/null +++ b/3rdparty/linenoise-ng/include/linenoise-ng/linenoise.h @@ -0,0 +1,73 @@ +/* linenoise.h -- guerrilla line editing library against the idea that a + * line editing lib needs to be 20,000 lines of C code. + * + * See linenoise.c for more information. + * + * Copyright (c) 2010, Salvatore Sanfilippo + * Copyright (c) 2010, Pieter Noordhuis + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __LINENOISE_H +#define __LINENOISE_H + +#define LINENOISE_VERSION "1.0.0" +#define LINENOISE_VERSION_MAJOR 1 +#define LINENOISE_VERSION_MINOR 1 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct linenoiseCompletions linenoiseCompletions; + +typedef void(linenoiseCompletionCallback)(const char*, linenoiseCompletions*); +void linenoiseSetCompletionCallback(linenoiseCompletionCallback* fn); +void linenoiseAddCompletion(linenoiseCompletions* lc, const char* str); + +char* linenoise(const char* prompt); +void linenoisePreloadBuffer(const char* preloadText); +int linenoiseHistoryAdd(const char* line); +int linenoiseHistorySetMaxLen(int len); +char* linenoiseHistoryLine(int index); +int linenoiseHistorySave(const char* filename); +int linenoiseHistoryLoad(const char* filename); +void linenoiseHistoryFree(void); +void linenoiseClearScreen(void); +void linenoiseSetMultiLine(int ml); +void linenoisePrintKeyCodes(void); +/* the following are extensions to the original linenoise API */ +int linenoiseInstallWindowChangeHandler(void); +/* returns type of key pressed: 1 = CTRL-C, 2 = CTRL-D, 0 = other */ +int linenoiseKeyType(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __LINENOISE_H */ diff --git a/3rdparty/linenoise-ng/source/ConvertUTF.cpp b/3rdparty/linenoise-ng/source/ConvertUTF.cpp new file mode 100644 index 0000000..a9bf1b7 --- /dev/null +++ b/3rdparty/linenoise-ng/source/ConvertUTF.cpp @@ -0,0 +1,546 @@ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Source code file. + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Sept 2001: fixed const & error conditions per + mods suggested by S. Parent & A. Lillich. + June 2002: Tim Dodd added detection and handling of incomplete + source sequences, enhanced error detection, added casts + to eliminate compiler warnings. + July 2003: slight mods to back out aggressive FFFE detection. + Jan 2004: updated switches in from-UTF8 conversions. + Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. + + See the header file "ConvertUTF.h" for complete documentation. + +------------------------------------------------------------------------ */ + +#include "linenoise-ng/ConvertUTF.h" +#ifdef CVTUTF_DEBUG +#include +#endif + +namespace linenoise_ng { + +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF32 halfMask = 0x3FFUL; + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF +#define false 0 +#define true 1 + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF16 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + char16_t** targetStart, char16_t* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + char16_t* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + if (target >= targetEnd) { + result = targetExhausted; break; + } + ch = *source++; + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_LEGAL_UTF32) { + if (flags == strictConversion) { + result = sourceIllegal; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + --source; /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF16toUTF32 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF32* target = *targetStart; + UTF32 ch, ch2; + while (source < sourceEnd) { + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + if (target >= targetEnd) { + source = oldSource; /* Back up source pointer! */ + result = targetExhausted; break; + } + *target++ = ch; + } + *sourceStart = source; + *targetStart = target; +#ifdef CVTUTF_DEBUG +if (result == sourceIllegal) { + fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); + fflush(stderr); +} +#endif + return result; +} + +/* --------------------------------------------------------------------- */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +/* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ +static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +/* --------------------------------------------------------------------- */ + +/* The interface converts a whole buffer to avoid function-call overhead. + * Constants have been gathered. Loops & conditionals have been removed as + * much as possible for efficiency, in favor of drop-through switches. + * (See "Note A" at the bottom of the file for equivalent code.) + * If your compiler supports it, the "isLegalUTF8" call can be turned + * into an inline function. + */ + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF16toUTF8 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + UTF32 ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* Figure out how many bytes the result will require */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch < (UTF32)0x110000) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + } + + target += bytesToWrite; + if (target > targetEnd) { + source = oldSource; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); + default: break; + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +/* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * If not calling this from ConvertUTF8to*, then the length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns false. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ + +static Boolean isLegalUTF8(const UTF8 *source, int length) { + UTF8 a; + const UTF8 *srcptr = source+length; + switch (length) { + default: return false; + /* Everything else falls through when "true"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 2: if ((a = (*--srcptr)) > 0xBF) return false; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return false; break; + case 0xED: if (a > 0x9F) return false; break; + case 0xF0: if (a < 0x90) return false; break; + case 0xF4: if (a > 0x8F) return false; break; + default: if (a < 0x80) return false; + } + + case 1: if (*source >= 0x80 && *source < 0xC2) return false; + } + if (*source > 0xF4) return false; + return true; +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return whether a UTF-8 sequence is legal or not. + * This is not used here; it's just exported. + */ +Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { + int length = trailingBytesForUTF8[*source]+1; + if (source+length > sourceEnd) { + return false; + } + return isLegalUTF8(source, length); +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF16 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (! isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + default: break; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_UTF16) { + if (flags == strictConversion) { + result = sourceIllegal; + source -= (extraBytesToRead+1); /* return to the start */ + break; /* Bail out; shouldn't continue */ + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF8 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + ch = *source++; + if (flags == strictConversion ) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* + * Figure out how many bytes the result will require. Turn any + * illegally large UTF32 things (> Plane 17) into replacement chars. + */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + result = sourceIllegal; + } + + target += bytesToWrite; + if (target > targetEnd) { + --source; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]); + default: break; + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF32 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF32* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (! isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; + case 4: ch += *source++; ch <<= 6; + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + default: break; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up the source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_LEGAL_UTF32) { + /* + * UTF-16 surrogate values are illegal in UTF-32, and anything + * over Plane 17 (> 0x10FFFF) is illegal. + */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = ch; + } + } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ + result = sourceIllegal; + *target++ = UNI_REPLACEMENT_CHAR; + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +} + +/* --------------------------------------------------------------------- + + Note A. + The fall-through switches in UTF-8 reading code save a + temp variable, some decrements & conditionals. The switches + are equivalent to the following loop: + { + int tmpBytesToRead = extraBytesToRead+1; + do { + ch += *source++; + --tmpBytesToRead; + if (tmpBytesToRead) ch <<= 6; + } while (tmpBytesToRead > 0); + } + In UTF-8 writing code, the switches on "bytesToWrite" are + similarly unrolled loops. + + --------------------------------------------------------------------- */ diff --git a/3rdparty/linenoise-ng/source/linenoise.cpp b/3rdparty/linenoise-ng/source/linenoise.cpp new file mode 100644 index 0000000..351e6dc --- /dev/null +++ b/3rdparty/linenoise-ng/source/linenoise.cpp @@ -0,0 +1,3461 @@ +/* linenoise.c -- guerrilla line editing library against the idea that a + * line editing lib needs to be 20,000 lines of C code. + * + * Copyright (c) 2010, Salvatore Sanfilippo + * Copyright (c) 2010, Pieter Noordhuis + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * line editing lib needs to be 20,000 lines of C code. + * + * You can find the latest source code at: + * + * http://github.com/antirez/linenoise + * + * Does a number of crazy assumptions that happen to be true in 99.9999% of + * the 2010 UNIX computers around. + * + * References: + * - http://invisible-island.net/xterm/ctlseqs/ctlseqs.html + * - http://www.3waylabs.com/nw/WWW/products/wizcon/vt220.html + * + * Todo list: + * - Switch to gets() if $TERM is something we can't support. + * - Filter bogus Ctrl+ combinations. + * - Win32 support + * + * Bloat: + * - Completion? + * - History search like Ctrl+r in readline? + * + * List of escape sequences used by this program, we do everything just + * with three sequences. In order to be so cheap we may have some + * flickering effect with some slow terminal, but the lesser sequences + * the more compatible. + * + * CHA (Cursor Horizontal Absolute) + * Sequence: ESC [ n G + * Effect: moves cursor to column n (1 based) + * + * EL (Erase Line) + * Sequence: ESC [ n K + * Effect: if n is 0 or missing, clear from cursor to end of line + * Effect: if n is 1, clear from beginning of line to cursor + * Effect: if n is 2, clear entire line + * + * CUF (Cursor Forward) + * Sequence: ESC [ n C + * Effect: moves cursor forward of n chars + * + * The following are used to clear the screen: ESC [ H ESC [ 2 J + * This is actually composed of two sequences: + * + * cursorhome + * Sequence: ESC [ H + * Effect: moves the cursor to upper left corner + * + * ED2 (Clear entire screen) + * Sequence: ESC [ 2 J + * Effect: clear the whole screen + * + */ + +#ifdef _WIN32 + +#include +#include +#include + +#if defined(_MSC_VER) && _MSC_VER < 1900 +#define snprintf _snprintf // Microsoft headers use underscores in some names +#endif + +#if !defined GNUC +#define strcasecmp _stricmp +#endif + +#define strdup _strdup +#define isatty _isatty +#define write _write +#define STDIN_FILENO 0 + +#else /* _WIN32 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif /* _WIN32 */ + +#include +#include +#include + +#include "linenoise-ng/linenoise.h" +#include "linenoise-ng/ConvertUTF.h" + +#include +#include +#include + +using std::string; +using std::vector; +using std::unique_ptr; +using namespace linenoise_ng; + +typedef unsigned char char8_t; + +static ConversionResult copyString8to32(char32_t* dst, size_t dstSize, + size_t& dstCount, const char* src) { + const UTF8* sourceStart = reinterpret_cast(src); + const UTF8* sourceEnd = sourceStart + strlen(src); + UTF32* targetStart = reinterpret_cast(dst); + UTF32* targetEnd = targetStart + dstSize; + + ConversionResult res = ConvertUTF8toUTF32( + &sourceStart, sourceEnd, &targetStart, targetEnd, lenientConversion); + + if (res == conversionOK) { + dstCount = targetStart - reinterpret_cast(dst); + + if (dstCount < dstSize) { + *targetStart = 0; + } + } + + return res; +} + +static ConversionResult copyString8to32(char32_t* dst, size_t dstSize, + size_t& dstCount, const char8_t* src) { + return copyString8to32(dst, dstSize, dstCount, + reinterpret_cast(src)); +} + +static size_t strlen32(const char32_t* str) { + const char32_t* ptr = str; + + while (*ptr) { + ++ptr; + } + + return ptr - str; +} + +static size_t strlen8(const char8_t* str) { + return strlen(reinterpret_cast(str)); +} + +static char8_t* strdup8(const char* src) { + return reinterpret_cast(strdup(src)); +} + +#ifdef _WIN32 +static const int FOREGROUND_WHITE = + FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE; +static const int BACKGROUND_WHITE = + BACKGROUND_RED | BACKGROUND_GREEN | BACKGROUND_BLUE; +static const int INTENSITY = FOREGROUND_INTENSITY | BACKGROUND_INTENSITY; + +class WinAttributes { + public: + WinAttributes() { + CONSOLE_SCREEN_BUFFER_INFO info; + GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &info); + _defaultAttribute = info.wAttributes & INTENSITY; + _defaultColor = info.wAttributes & FOREGROUND_WHITE; + _defaultBackground = info.wAttributes & BACKGROUND_WHITE; + + _consoleAttribute = _defaultAttribute; + _consoleColor = _defaultColor | _defaultBackground; + } + + public: + int _defaultAttribute; + int _defaultColor; + int _defaultBackground; + + int _consoleAttribute; + int _consoleColor; +}; + +static WinAttributes WIN_ATTR; + +static void copyString32to16(char16_t* dst, size_t dstSize, size_t* dstCount, + const char32_t* src, size_t srcSize) { + const UTF32* sourceStart = reinterpret_cast(src); + const UTF32* sourceEnd = sourceStart + srcSize; + char16_t* targetStart = reinterpret_cast(dst); + char16_t* targetEnd = targetStart + dstSize; + + ConversionResult res = ConvertUTF32toUTF16( + &sourceStart, sourceEnd, &targetStart, targetEnd, lenientConversion); + + if (res == conversionOK) { + *dstCount = targetStart - reinterpret_cast(dst); + + if (*dstCount < dstSize) { + *targetStart = 0; + } + } +} +#endif + +static void copyString32to8(char* dst, size_t dstSize, size_t* dstCount, + const char32_t* src, size_t srcSize) { + const UTF32* sourceStart = reinterpret_cast(src); + const UTF32* sourceEnd = sourceStart + srcSize; + UTF8* targetStart = reinterpret_cast(dst); + UTF8* targetEnd = targetStart + dstSize; + + ConversionResult res = ConvertUTF32toUTF8( + &sourceStart, sourceEnd, &targetStart, targetEnd, lenientConversion); + + if (res == conversionOK) { + *dstCount = targetStart - reinterpret_cast(dst); + + if (*dstCount < dstSize) { + *targetStart = 0; + } + } +} + +static void copyString32to8(char* dst, size_t dstLen, const char32_t* src) { + size_t dstCount = 0; + copyString32to8(dst, dstLen, &dstCount, src, strlen32(src)); +} + +static void copyString32(char32_t* dst, const char32_t* src, size_t len) { + while (0 < len && *src) { + *dst++ = *src++; + --len; + } + + *dst = 0; +} + +static int strncmp32(const char32_t* left, const char32_t* right, size_t len) { + while (0 < len && *left) { + if (*left != *right) { + return *left - *right; + } + + ++left; + ++right; + --len; + } + + return 0; +} + +#ifdef _WIN32 +#include + +static size_t OutputWin(char16_t* text16, char32_t* text32, size_t len32) { + size_t count16 = 0; + + copyString32to16(text16, len32, &count16, text32, len32); + WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE), text16, + static_cast(count16), nullptr, nullptr); + + return count16; +} + +static char32_t* HandleEsc(char32_t* p, char32_t* end) { + if (*p == '[') { + int code = 0; + + for (++p; p < end; ++p) { + char32_t c = *p; + + if ('0' <= c && c <= '9') { + code = code * 10 + (c - '0'); + } else if (c == 'm' || c == ';') { + switch (code) { + case 0: + WIN_ATTR._consoleAttribute = WIN_ATTR._defaultAttribute; + WIN_ATTR._consoleColor = + WIN_ATTR._defaultColor | WIN_ATTR._defaultBackground; + break; + + case 1: // BOLD + case 5: // BLINK + WIN_ATTR._consoleAttribute = + (WIN_ATTR._defaultAttribute ^ FOREGROUND_INTENSITY) & INTENSITY; + break; + + case 30: + WIN_ATTR._consoleColor = BACKGROUND_WHITE; + break; + + case 31: + WIN_ATTR._consoleColor = + FOREGROUND_RED | WIN_ATTR._defaultBackground; + break; + + case 32: + WIN_ATTR._consoleColor = + FOREGROUND_GREEN | WIN_ATTR._defaultBackground; + break; + + case 33: + WIN_ATTR._consoleColor = + FOREGROUND_RED | FOREGROUND_GREEN | WIN_ATTR._defaultBackground; + break; + + case 34: + WIN_ATTR._consoleColor = + FOREGROUND_BLUE | WIN_ATTR._defaultBackground; + break; + + case 35: + WIN_ATTR._consoleColor = + FOREGROUND_BLUE | FOREGROUND_RED | WIN_ATTR._defaultBackground; + break; + + case 36: + WIN_ATTR._consoleColor = FOREGROUND_BLUE | FOREGROUND_GREEN | + WIN_ATTR._defaultBackground; + break; + + case 37: + WIN_ATTR._consoleColor = FOREGROUND_GREEN | FOREGROUND_RED | + FOREGROUND_BLUE | + WIN_ATTR._defaultBackground; + break; + } + + code = 0; + } + + if (*p == 'm') { + ++p; + break; + } + } + } else { + ++p; + } + + auto handle = GetStdHandle(STD_OUTPUT_HANDLE); + SetConsoleTextAttribute(handle, + WIN_ATTR._consoleAttribute | WIN_ATTR._consoleColor); + + return p; +} + +static size_t WinWrite32(char16_t* text16, char32_t* text32, size_t len32) { + char32_t* p = text32; + char32_t* q = p; + char32_t* e = text32 + len32; + size_t count16 = 0; + + while (p < e) { + if (*p == 27) { + if (q < p) { + count16 += OutputWin(text16, q, p - q); + } + + q = p = HandleEsc(p + 1, e); + } else { + ++p; + } + } + + if (q < p) { + count16 += OutputWin(text16, q, p - q); + } + + return count16; +} +#endif + +static int write32(int fd, char32_t* text32, int len32) { +#ifdef _WIN32 + if (isatty(fd)) { + size_t len16 = 2 * len32 + 1; + unique_ptr text16(new char16_t[len16]); + size_t count16 = WinWrite32(text16.get(), text32, len32); + + return static_cast(count16); + } else { + size_t len8 = 4 * len32 + 1; + unique_ptr text8(new char[len8]); + size_t count8 = 0; + + copyString32to8(text8.get(), len8, &count8, text32, len32); + + return write(fd, text8.get(), static_cast(count8)); + } +#else + size_t len8 = 4 * len32 + 1; + unique_ptr text8(new char[len8]); + size_t count8 = 0; + + copyString32to8(text8.get(), len8, &count8, text32, len32); + + return write(fd, text8.get(), count8); +#endif +} + +class Utf32String { + public: + Utf32String() : _length(0), _data(nullptr) { + // note: parens intentional, _data must be properly initialized + _data = new char32_t[1](); + } + + explicit Utf32String(const char* src) : _length(0), _data(nullptr) { + size_t len = strlen(src); + // note: parens intentional, _data must be properly initialized + _data = new char32_t[len + 1](); + copyString8to32(_data, len + 1, _length, src); + } + + explicit Utf32String(const char8_t* src) : _length(0), _data(nullptr) { + size_t len = strlen(reinterpret_cast(src)); + // note: parens intentional, _data must be properly initialized + _data = new char32_t[len + 1](); + copyString8to32(_data, len + 1, _length, src); + } + + explicit Utf32String(const char32_t* src) : _length(0), _data(nullptr) { + for (_length = 0; src[_length] != 0; ++_length) { + } + + // note: parens intentional, _data must be properly initialized + _data = new char32_t[_length + 1](); + memcpy(_data, src, _length * sizeof(char32_t)); + } + + explicit Utf32String(const char32_t* src, int len) : _length(len), _data(nullptr) { + // note: parens intentional, _data must be properly initialized + _data = new char32_t[len + 1](); + memcpy(_data, src, len * sizeof(char32_t)); + } + + explicit Utf32String(int len) : _length(0), _data(nullptr) { + // note: parens intentional, _data must be properly initialized + _data = new char32_t[len](); + } + + explicit Utf32String(const Utf32String& that) : _length(that._length), _data(nullptr) { + // note: parens intentional, _data must be properly initialized + _data = new char32_t[_length + 1](); + memcpy(_data, that._data, sizeof(char32_t) * _length); + } + + Utf32String& operator=(const Utf32String& that) { + if (this != &that) { + delete[] _data; + _data = new char32_t[that._length](); + _length = that._length; + memcpy(_data, that._data, sizeof(char32_t) * _length); + } + + return *this; + } + + ~Utf32String() { delete[] _data; } + + public: + char32_t* get() const { return _data; } + + size_t length() const { return _length; } + + size_t chars() const { return _length; } + + void initFromBuffer() { + for (_length = 0; _data[_length] != 0; ++_length) { + } + } + + const char32_t& operator[](size_t pos) const { return _data[pos]; } + + char32_t& operator[](size_t pos) { return _data[pos]; } + + private: + size_t _length; + char32_t* _data; +}; + +class Utf8String { + Utf8String(const Utf8String&) = delete; + Utf8String& operator=(const Utf8String&) = delete; + + public: + explicit Utf8String(const Utf32String& src) { + size_t len = src.length() * 4 + 1; + _data = new char[len]; + copyString32to8(_data, len, src.get()); + } + + ~Utf8String() { delete[] _data; } + + public: + char* get() const { return _data; } + + private: + char* _data; +}; + +struct linenoiseCompletions { + vector completionStrings; +}; + +#define LINENOISE_DEFAULT_HISTORY_MAX_LEN 100 +#define LINENOISE_MAX_LINE 4096 + +// make control-characters more readable +#define ctrlChar(upperCaseASCII) (upperCaseASCII - 0x40) + +/** + * Recompute widths of all characters in a char32_t buffer + * @param text input buffer of Unicode characters + * @param widths output buffer of character widths + * @param charCount number of characters in buffer + */ +namespace linenoise_ng { +int mk_wcwidth(char32_t ucs); +} + +static void recomputeCharacterWidths(const char32_t* text, char* widths, + int charCount) { + for (int i = 0; i < charCount; ++i) { + widths[i] = mk_wcwidth(text[i]); + } +} + +/** + * Calculate a new screen position given a starting position, screen width and + * character count + * @param x initial x position (zero-based) + * @param y initial y position (zero-based) + * @param screenColumns screen column count + * @param charCount character positions to advance + * @param xOut returned x position (zero-based) + * @param yOut returned y position (zero-based) + */ +static void calculateScreenPosition(int x, int y, int screenColumns, + int charCount, int& xOut, int& yOut) { + xOut = x; + yOut = y; + int charsRemaining = charCount; + while (charsRemaining > 0) { + int charsThisRow = (x + charsRemaining < screenColumns) ? charsRemaining + : screenColumns - x; + xOut = x + charsThisRow; + yOut = y; + charsRemaining -= charsThisRow; + x = 0; + ++y; + } + if (xOut == screenColumns) { // we have to special-case line wrap + xOut = 0; + ++yOut; + } +} + +/** + * Calculate a column width using mk_wcswidth() + * @param buf32 text to calculate + * @param len length of text to calculate + */ +namespace linenoise_ng { +int mk_wcswidth(const char32_t* pwcs, size_t n); +} + +static int calculateColumnPosition(char32_t* buf32, int len) { + int width = mk_wcswidth(reinterpret_cast(buf32), len); + if (width == -1) + return len; + else + return width; +} + +static bool isControlChar(char32_t testChar) { + return (testChar < ' ') || // C0 controls + (testChar >= 0x7F && testChar <= 0x9F); // DEL and C1 controls +} + +struct PromptBase { // a convenience struct for grouping prompt info + Utf32String promptText; // our copy of the prompt text, edited + char* promptCharWidths; // character widths from mk_wcwidth() + int promptChars; // chars in promptText + int promptBytes; // bytes in promptText + int promptExtraLines; // extra lines (beyond 1) occupied by prompt + int promptIndentation; // column offset to end of prompt + int promptLastLinePosition; // index into promptText where last line begins + int promptPreviousInputLen; // promptChars of previous input line, for + // clearing + int promptCursorRowOffset; // where the cursor is relative to the start of + // the prompt + int promptScreenColumns; // width of screen in columns + int promptPreviousLen; // help erasing + int promptErrorCode; // error code (invalid UTF-8) or zero + + PromptBase() : promptPreviousInputLen(0) {} + + bool write() { + if (write32(1, promptText.get(), promptBytes) == -1) return false; + + return true; + } +}; + +struct PromptInfo : public PromptBase { + PromptInfo(const char* textPtr, int columns) { + promptExtraLines = 0; + promptLastLinePosition = 0; + promptPreviousLen = 0; + promptScreenColumns = columns; + Utf32String tempUnicode(textPtr); + + // strip control characters from the prompt -- we do allow newline + char32_t* pIn = tempUnicode.get(); + char32_t* pOut = pIn; + + int len = 0; + int x = 0; + + bool const strip = (isatty(1) == 0); + + while (*pIn) { + char32_t c = *pIn; + if ('\n' == c || !isControlChar(c)) { + *pOut = c; + ++pOut; + ++pIn; + ++len; + if ('\n' == c || ++x >= promptScreenColumns) { + x = 0; + ++promptExtraLines; + promptLastLinePosition = len; + } + } else if (c == '\x1b') { + if (strip) { + // jump over control chars + ++pIn; + if (*pIn == '[') { + ++pIn; + while (*pIn && ((*pIn == ';') || ((*pIn >= '0' && *pIn <= '9')))) { + ++pIn; + } + if (*pIn == 'm') { + ++pIn; + } + } + } else { + // copy control chars + *pOut = *pIn; + ++pOut; + ++pIn; + if (*pIn == '[') { + *pOut = *pIn; + ++pOut; + ++pIn; + while (*pIn && ((*pIn == ';') || ((*pIn >= '0' && *pIn <= '9')))) { + *pOut = *pIn; + ++pOut; + ++pIn; + } + if (*pIn == 'm') { + *pOut = *pIn; + ++pOut; + ++pIn; + } + } + } + } else { + ++pIn; + } + } + *pOut = 0; + promptChars = len; + promptBytes = static_cast(pOut - tempUnicode.get()); + promptText = tempUnicode; + + promptIndentation = len - promptLastLinePosition; + promptCursorRowOffset = promptExtraLines; + } +}; + +// Used with DynamicPrompt (history search) +// +static const Utf32String forwardSearchBasePrompt("(i-search)`"); +static const Utf32String reverseSearchBasePrompt("(reverse-i-search)`"); +static const Utf32String endSearchBasePrompt("': "); +static Utf32String + previousSearchText; // remembered across invocations of linenoise() + +// changing prompt for "(reverse-i-search)`text':" etc. +// +struct DynamicPrompt : public PromptBase { + Utf32String searchText; // text we are searching for + char* searchCharWidths; // character widths from mk_wcwidth() + int searchTextLen; // chars in searchText + int direction; // current search direction, 1=forward, -1=reverse + + DynamicPrompt(PromptBase& pi, int initialDirection) + : searchTextLen(0), direction(initialDirection) { + promptScreenColumns = pi.promptScreenColumns; + promptCursorRowOffset = 0; + Utf32String emptyString(1); + searchText = emptyString; + const Utf32String* basePrompt = + (direction > 0) ? &forwardSearchBasePrompt : &reverseSearchBasePrompt; + size_t promptStartLength = basePrompt->length(); + promptChars = + static_cast(promptStartLength + endSearchBasePrompt.length()); + promptBytes = promptChars; + promptLastLinePosition = promptChars; // TODO fix this, we are asssuming + // that the history prompt won't wrap + // (!) + promptPreviousLen = promptChars; + Utf32String tempUnicode(promptChars + 1); + memcpy(tempUnicode.get(), basePrompt->get(), + sizeof(char32_t) * promptStartLength); + memcpy(&tempUnicode[promptStartLength], endSearchBasePrompt.get(), + sizeof(char32_t) * (endSearchBasePrompt.length() + 1)); + tempUnicode.initFromBuffer(); + promptText = tempUnicode; + calculateScreenPosition(0, 0, pi.promptScreenColumns, promptChars, + promptIndentation, promptExtraLines); + } + + void updateSearchPrompt(void) { + const Utf32String* basePrompt = + (direction > 0) ? &forwardSearchBasePrompt : &reverseSearchBasePrompt; + size_t promptStartLength = basePrompt->length(); + promptChars = static_cast(promptStartLength + searchTextLen + + endSearchBasePrompt.length()); + promptBytes = promptChars; + Utf32String tempUnicode(promptChars + 1); + memcpy(tempUnicode.get(), basePrompt->get(), + sizeof(char32_t) * promptStartLength); + memcpy(&tempUnicode[promptStartLength], searchText.get(), + sizeof(char32_t) * searchTextLen); + size_t endIndex = promptStartLength + searchTextLen; + memcpy(&tempUnicode[endIndex], endSearchBasePrompt.get(), + sizeof(char32_t) * (endSearchBasePrompt.length() + 1)); + tempUnicode.initFromBuffer(); + promptText = tempUnicode; + } + + void updateSearchText(const char32_t* textPtr) { + Utf32String tempUnicode(textPtr); + searchTextLen = static_cast(tempUnicode.chars()); + searchText = tempUnicode; + updateSearchPrompt(); + } +}; + +class KillRing { + static const int capacity = 10; + int size; + int index; + char indexToSlot[10]; + vector theRing; + + public: + enum action { actionOther, actionKill, actionYank }; + action lastAction; + size_t lastYankSize; + + KillRing() : size(0), index(0), lastAction(actionOther) { + theRing.reserve(capacity); + } + + void kill(const char32_t* text, int textLen, bool forward) { + if (textLen == 0) { + return; + } + Utf32String killedText(text, textLen); + if (lastAction == actionKill && size > 0) { + int slot = indexToSlot[0]; + int currentLen = static_cast(theRing[slot].length()); + int resultLen = currentLen + textLen; + Utf32String temp(resultLen + 1); + if (forward) { + memcpy(temp.get(), theRing[slot].get(), currentLen * sizeof(char32_t)); + memcpy(&temp[currentLen], killedText.get(), textLen * sizeof(char32_t)); + } else { + memcpy(temp.get(), killedText.get(), textLen * sizeof(char32_t)); + memcpy(&temp[textLen], theRing[slot].get(), + currentLen * sizeof(char32_t)); + } + temp[resultLen] = 0; + temp.initFromBuffer(); + theRing[slot] = temp; + } else { + if (size < capacity) { + if (size > 0) { + memmove(&indexToSlot[1], &indexToSlot[0], size); + } + indexToSlot[0] = size; + size++; + theRing.push_back(killedText); + } else { + int slot = indexToSlot[capacity - 1]; + theRing[slot] = killedText; + memmove(&indexToSlot[1], &indexToSlot[0], capacity - 1); + indexToSlot[0] = slot; + } + index = 0; + } + } + + Utf32String* yank() { return (size > 0) ? &theRing[indexToSlot[index]] : 0; } + + Utf32String* yankPop() { + if (size == 0) { + return 0; + } + ++index; + if (index == size) { + index = 0; + } + return &theRing[indexToSlot[index]]; + } +}; + +class InputBuffer { + char32_t* buf32; // input buffer + char* charWidths; // character widths from mk_wcwidth() + int buflen; // buffer size in characters + int len; // length of text in input buffer + int pos; // character position in buffer ( 0 <= pos <= len ) + + void clearScreen(PromptBase& pi); + int incrementalHistorySearch(PromptBase& pi, int startChar); + int completeLine(PromptBase& pi); + void refreshLine(PromptBase& pi); + + public: + InputBuffer(char32_t* buffer, char* widthArray, int bufferLen) + : buf32(buffer), + charWidths(widthArray), + buflen(bufferLen - 1), + len(0), + pos(0) { + buf32[0] = 0; + } + void preloadBuffer(const char* preloadText) { + size_t ucharCount = 0; + copyString8to32(buf32, buflen + 1, ucharCount, preloadText); + recomputeCharacterWidths(buf32, charWidths, static_cast(ucharCount)); + len = static_cast(ucharCount); + pos = static_cast(ucharCount); + } + int getInputLine(PromptBase& pi); + int length(void) const { return len; } +}; + +// Special codes for keyboard input: +// +// Between Windows and the various Linux "terminal" programs, there is some +// pretty diverse behavior in the "scan codes" and escape sequences we are +// presented with. So ... we'll translate them all into our own pidgin +// pseudocode, trying to stay out of the way of UTF-8 and international +// characters. Here's the general plan. +// +// "User input keystrokes" (key chords, whatever) will be encoded as a single +// value. +// The low 21 bits are reserved for Unicode characters. Popular function-type +// keys +// get their own codes in the range 0x10200000 to (if needed) 0x1FE00000, +// currently +// just arrow keys, Home, End and Delete. Keypresses with Ctrl get ORed with +// 0x20000000, with Alt get ORed with 0x40000000. So, Ctrl+Alt+Home is encoded +// as 0x20000000 + 0x40000000 + 0x10A00000 == 0x70A00000. To keep things +// complicated, +// the Alt key is equivalent to prefixing the keystroke with ESC, so ESC +// followed by +// D is treated the same as Alt + D ... we'll just use Emacs terminology and +// call +// this "Meta". So, we will encode both ESC followed by D and Alt held down +// while D +// is pressed the same, as Meta-D, encoded as 0x40000064. +// +// Here are the definitions of our component constants: +// +// Maximum unsigned 32-bit value = 0xFFFFFFFF; // For reference, max 32-bit +// value +// Highest allocated Unicode char = 0x001FFFFF; // For reference, max +// Unicode value +static const int META = 0x40000000; // Meta key combination +static const int CTRL = 0x20000000; // Ctrl key combination +// static const int SPECIAL_KEY = 0x10000000; // Common bit for all special +// keys +static const int UP_ARROW_KEY = 0x10200000; // Special keys +static const int DOWN_ARROW_KEY = 0x10400000; +static const int RIGHT_ARROW_KEY = 0x10600000; +static const int LEFT_ARROW_KEY = 0x10800000; +static const int HOME_KEY = 0x10A00000; +static const int END_KEY = 0x10C00000; +static const int DELETE_KEY = 0x10E00000; +static const int PAGE_UP_KEY = 0x11000000; +static const int PAGE_DOWN_KEY = 0x11200000; + +static const char* unsupported_term[] = {"dumb", "cons25", "emacs", NULL}; +static linenoiseCompletionCallback* completionCallback = NULL; + +#ifdef _WIN32 +static HANDLE console_in, console_out; +static DWORD oldMode; +static WORD oldDisplayAttribute; +#else +static struct termios orig_termios; /* in order to restore at exit */ +#endif + +static KillRing killRing; + +static int rawmode = 0; /* for atexit() function to check if restore is needed*/ +static int atexit_registered = 0; /* register atexit just 1 time */ +static int historyMaxLen = LINENOISE_DEFAULT_HISTORY_MAX_LEN; +static int historyLen = 0; +static int historyIndex = 0; +static char8_t** history = NULL; + +// used to emulate Windows command prompt on down-arrow after a recall +// we use -2 as our "not set" value because we add 1 to the previous index on +// down-arrow, +// and zero is a valid index (so -1 is a valid "previous index") +static int historyPreviousIndex = -2; +static bool historyRecallMostRecent = false; + +static void linenoiseAtExit(void); + +static bool isUnsupportedTerm(void) { + char* term = getenv("TERM"); + if (term == NULL) return false; + for (int j = 0; unsupported_term[j]; ++j) + if (!strcasecmp(term, unsupported_term[j])) { + return true; + } + return false; +} + +static void beep() { + fprintf(stderr, "\x7"); // ctrl-G == bell/beep + fflush(stderr); +} + +void linenoiseHistoryFree(void) { + if (history) { + for (int j = 0; j < historyLen; ++j) free(history[j]); + historyLen = 0; + free(history); + history = 0; + } +} + +static int enableRawMode(void) { +#ifdef _WIN32 + if (!console_in) { + console_in = GetStdHandle(STD_INPUT_HANDLE); + console_out = GetStdHandle(STD_OUTPUT_HANDLE); + + GetConsoleMode(console_in, &oldMode); + SetConsoleMode(console_in, oldMode & + ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT | + ENABLE_PROCESSED_INPUT)); + } + return 0; +#else + struct termios raw; + + if (!isatty(STDIN_FILENO)) goto fatal; + if (!atexit_registered) { + atexit(linenoiseAtExit); + atexit_registered = 1; + } + if (tcgetattr(0, &orig_termios) == -1) goto fatal; + + raw = orig_termios; /* modify the original mode */ + /* input modes: no break, no CR to NL, no parity check, no strip char, + * no start/stop output control. */ + raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON); + /* output modes - disable post processing */ + // this is wrong, we don't want raw output, it turns newlines into straight + // linefeeds + // raw.c_oflag &= ~(OPOST); + /* control modes - set 8 bit chars */ + raw.c_cflag |= (CS8); + /* local modes - echoing off, canonical off, no extended functions, + * no signal chars (^Z,^C) */ + raw.c_lflag &= ~(ECHO | ICANON | IEXTEN | ISIG); + /* control chars - set return condition: min number of bytes and timer. + * We want read to return every single byte, without timeout. */ + raw.c_cc[VMIN] = 1; + raw.c_cc[VTIME] = 0; /* 1 byte, no timer */ + + /* put terminal in raw mode after flushing */ + if (tcsetattr(0, TCSADRAIN, &raw) < 0) goto fatal; + rawmode = 1; + return 0; + +fatal: + errno = ENOTTY; + return -1; +#endif +} + +static void disableRawMode(void) { +#ifdef _WIN32 + SetConsoleMode(console_in, oldMode); + console_in = 0; + console_out = 0; +#else + if (rawmode && tcsetattr(0, TCSADRAIN, &orig_termios) != -1) rawmode = 0; +#endif +} + +// At exit we'll try to fix the terminal to the initial conditions +static void linenoiseAtExit(void) { disableRawMode(); } + +static int getScreenColumns(void) { + int cols; +#ifdef _WIN32 + CONSOLE_SCREEN_BUFFER_INFO inf; + GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &inf); + cols = inf.dwSize.X; +#else + struct winsize ws; + cols = (ioctl(1, TIOCGWINSZ, &ws) == -1) ? 80 : ws.ws_col; +#endif + // cols is 0 in certain circumstances like inside debugger, which creates + // further issues + return (cols > 0) ? cols : 80; +} + +static int getScreenRows(void) { + int rows; +#ifdef _WIN32 + CONSOLE_SCREEN_BUFFER_INFO inf; + GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &inf); + rows = 1 + inf.srWindow.Bottom - inf.srWindow.Top; +#else + struct winsize ws; + rows = (ioctl(1, TIOCGWINSZ, &ws) == -1) ? 24 : ws.ws_row; +#endif + return (rows > 0) ? rows : 24; +} + +static void setDisplayAttribute(bool enhancedDisplay, bool error) { +#ifdef _WIN32 + if (enhancedDisplay) { + CONSOLE_SCREEN_BUFFER_INFO inf; + GetConsoleScreenBufferInfo(console_out, &inf); + oldDisplayAttribute = inf.wAttributes; + BYTE oldLowByte = oldDisplayAttribute & 0xFF; + BYTE newLowByte; + switch (oldLowByte) { + case 0x07: + // newLowByte = FOREGROUND_BLUE | FOREGROUND_INTENSITY; // too dim + // newLowByte = FOREGROUND_BLUE; // even dimmer + newLowByte = FOREGROUND_BLUE | + FOREGROUND_GREEN; // most similar to xterm appearance + break; + case 0x70: + newLowByte = BACKGROUND_BLUE | BACKGROUND_INTENSITY; + break; + default: + newLowByte = oldLowByte ^ 0xFF; // default to inverse video + break; + } + inf.wAttributes = (inf.wAttributes & 0xFF00) | newLowByte; + SetConsoleTextAttribute(console_out, inf.wAttributes); + } else { + SetConsoleTextAttribute(console_out, oldDisplayAttribute); + } +#else + if (enhancedDisplay) { + char const* p = (error ? "\x1b[1;31m" : "\x1b[1;34m"); + if (write(1, p, 7) == -1) + return; /* bright blue (visible with both B&W bg) */ + } else { + if (write(1, "\x1b[0m", 4) == -1) return; /* reset */ + } +#endif +} + +/** + * Display the dynamic incremental search prompt and the current user input + * line. + * @param pi PromptBase struct holding information about the prompt and our + * screen position + * @param buf32 input buffer to be displayed + * @param len count of characters in the buffer + * @param pos current cursor position within the buffer (0 <= pos <= len) + */ +static void dynamicRefresh(PromptBase& pi, char32_t* buf32, int len, int pos) { + // calculate the position of the end of the prompt + int xEndOfPrompt, yEndOfPrompt; + calculateScreenPosition(0, 0, pi.promptScreenColumns, pi.promptChars, + xEndOfPrompt, yEndOfPrompt); + pi.promptIndentation = xEndOfPrompt; + + // calculate the position of the end of the input line + int xEndOfInput, yEndOfInput; + calculateScreenPosition(xEndOfPrompt, yEndOfPrompt, pi.promptScreenColumns, + calculateColumnPosition(buf32, len), xEndOfInput, + yEndOfInput); + + // calculate the desired position of the cursor + int xCursorPos, yCursorPos; + calculateScreenPosition(xEndOfPrompt, yEndOfPrompt, pi.promptScreenColumns, + calculateColumnPosition(buf32, pos), xCursorPos, + yCursorPos); + +#ifdef _WIN32 + // position at the start of the prompt, clear to end of previous input + CONSOLE_SCREEN_BUFFER_INFO inf; + GetConsoleScreenBufferInfo(console_out, &inf); + inf.dwCursorPosition.X = 0; + inf.dwCursorPosition.Y -= pi.promptCursorRowOffset /*- pi.promptExtraLines*/; + SetConsoleCursorPosition(console_out, inf.dwCursorPosition); + DWORD count; + FillConsoleOutputCharacterA(console_out, ' ', + pi.promptPreviousLen + pi.promptPreviousInputLen, + inf.dwCursorPosition, &count); + pi.promptPreviousLen = pi.promptIndentation; + pi.promptPreviousInputLen = len; + + // display the prompt + if (!pi.write()) return; + + // display the input line + if (write32(1, buf32, len) == -1) return; + + // position the cursor + GetConsoleScreenBufferInfo(console_out, &inf); + inf.dwCursorPosition.X = xCursorPos; // 0-based on Win32 + inf.dwCursorPosition.Y -= yEndOfInput - yCursorPos; + SetConsoleCursorPosition(console_out, inf.dwCursorPosition); +#else // _WIN32 + char seq[64]; + int cursorRowMovement = pi.promptCursorRowOffset - pi.promptExtraLines; + if (cursorRowMovement > 0) { // move the cursor up as required + snprintf(seq, sizeof seq, "\x1b[%dA", cursorRowMovement); + if (write(1, seq, strlen(seq)) == -1) return; + } + // position at the start of the prompt, clear to end of screen + snprintf(seq, sizeof seq, "\x1b[1G\x1b[J"); // 1-based on VT100 + if (write(1, seq, strlen(seq)) == -1) return; + + // display the prompt + if (!pi.write()) return; + + // display the input line + if (write32(1, buf32, len) == -1) return; + + // we have to generate our own newline on line wrap + if (xEndOfInput == 0 && yEndOfInput > 0) + if (write(1, "\n", 1) == -1) return; + + // position the cursor + cursorRowMovement = yEndOfInput - yCursorPos; + if (cursorRowMovement > 0) { // move the cursor up as required + snprintf(seq, sizeof seq, "\x1b[%dA", cursorRowMovement); + if (write(1, seq, strlen(seq)) == -1) return; + } + // position the cursor within the line + snprintf(seq, sizeof seq, "\x1b[%dG", xCursorPos + 1); // 1-based on VT100 + if (write(1, seq, strlen(seq)) == -1) return; +#endif + + pi.promptCursorRowOffset = + pi.promptExtraLines + yCursorPos; // remember row for next pass +} + +/** + * Refresh the user's input line: the prompt is already onscreen and is not + * redrawn here + * @param pi PromptBase struct holding information about the prompt and our + * screen position + */ +void InputBuffer::refreshLine(PromptBase& pi) { + // check for a matching brace/bracket/paren, remember its position if found + int highlight = -1; + bool indicateError = false; + if (pos < len) { + /* this scans for a brace matching buf32[pos] to highlight */ + unsigned char part1, part2; + int scanDirection = 0; + if (strchr("}])", buf32[pos])) { + scanDirection = -1; /* backwards */ + if (buf32[pos] == '}') { + part1 = '}'; part2 = '{'; + } else if (buf32[pos] == ']') { + part1 = ']'; part2 = '['; + } else { + part1 = ')'; part2 = '('; + } + } + else if (strchr("{[(", buf32[pos])) { + scanDirection = 1; /* forwards */ + if (buf32[pos] == '{') { + //part1 = '{'; part2 = '}'; + part1 = '}'; part2 = '{'; + } else if (buf32[pos] == '[') { + //part1 = '['; part2 = ']'; + part1 = ']'; part2 = '['; + } else { + //part1 = '('; part2 = ')'; + part1 = ')'; part2 = '('; + } + } + + if (scanDirection) { + int unmatched = scanDirection; + int unmatchedOther = 0; + for (int i = pos + scanDirection; i >= 0 && i < len; i += scanDirection) { + /* TODO: the right thing when inside a string */ + if (strchr("}])", buf32[i])) { + if (buf32[i] == part1) { + --unmatched; + } else { + --unmatchedOther; + } + } else if (strchr("{[(", buf32[i])) { + if (buf32[i] == part2) { + ++unmatched; + } else { + ++unmatchedOther; + } + } +/* + if (strchr("}])", buf32[i])) + --unmatched; + else if (strchr("{[(", buf32[i])) + ++unmatched; +*/ + if (unmatched == 0) { + highlight = i; + indicateError = (unmatchedOther != 0); + break; + } + } + } + } + + // calculate the position of the end of the input line + int xEndOfInput, yEndOfInput; + calculateScreenPosition(pi.promptIndentation, 0, pi.promptScreenColumns, + calculateColumnPosition(buf32, len), xEndOfInput, + yEndOfInput); + + // calculate the desired position of the cursor + int xCursorPos, yCursorPos; + calculateScreenPosition(pi.promptIndentation, 0, pi.promptScreenColumns, + calculateColumnPosition(buf32, pos), xCursorPos, + yCursorPos); + +#ifdef _WIN32 + // position at the end of the prompt, clear to end of previous input + CONSOLE_SCREEN_BUFFER_INFO inf; + GetConsoleScreenBufferInfo(console_out, &inf); + inf.dwCursorPosition.X = pi.promptIndentation; // 0-based on Win32 + inf.dwCursorPosition.Y -= pi.promptCursorRowOffset - pi.promptExtraLines; + SetConsoleCursorPosition(console_out, inf.dwCursorPosition); + DWORD count; + if (len < pi.promptPreviousInputLen) + FillConsoleOutputCharacterA(console_out, ' ', pi.promptPreviousInputLen, + inf.dwCursorPosition, &count); + pi.promptPreviousInputLen = len; + + // display the input line + if (highlight == -1) { + if (write32(1, buf32, len) == -1) return; + } else { + if (write32(1, buf32, highlight) == -1) return; + setDisplayAttribute(true, indicateError); /* bright blue (visible with both B&W bg) */ + if (write32(1, &buf32[highlight], 1) == -1) return; + setDisplayAttribute(false, indicateError); + if (write32(1, buf32 + highlight + 1, len - highlight - 1) == -1) return; + } + + // position the cursor + GetConsoleScreenBufferInfo(console_out, &inf); + inf.dwCursorPosition.X = xCursorPos; // 0-based on Win32 + inf.dwCursorPosition.Y -= yEndOfInput - yCursorPos; + SetConsoleCursorPosition(console_out, inf.dwCursorPosition); +#else // _WIN32 + char seq[64]; + int cursorRowMovement = pi.promptCursorRowOffset - pi.promptExtraLines; + if (cursorRowMovement > 0) { // move the cursor up as required + snprintf(seq, sizeof seq, "\x1b[%dA", cursorRowMovement); + if (write(1, seq, strlen(seq)) == -1) return; + } + // position at the end of the prompt, clear to end of screen + snprintf(seq, sizeof seq, "\x1b[%dG\x1b[J", + pi.promptIndentation + 1); // 1-based on VT100 + if (write(1, seq, strlen(seq)) == -1) return; + + if (highlight == -1) { // write unhighlighted text + if (write32(1, buf32, len) == -1) return; + } else { // highlight the matching brace/bracket/parenthesis + if (write32(1, buf32, highlight) == -1) return; + setDisplayAttribute(true, indicateError); + if (write32(1, &buf32[highlight], 1) == -1) return; + setDisplayAttribute(false, indicateError); + if (write32(1, buf32 + highlight + 1, len - highlight - 1) == -1) return; + } + + // we have to generate our own newline on line wrap + if (xEndOfInput == 0 && yEndOfInput > 0) + if (write(1, "\n", 1) == -1) return; + + // position the cursor + cursorRowMovement = yEndOfInput - yCursorPos; + if (cursorRowMovement > 0) { // move the cursor up as required + snprintf(seq, sizeof seq, "\x1b[%dA", cursorRowMovement); + if (write(1, seq, strlen(seq)) == -1) return; + } + // position the cursor within the line + snprintf(seq, sizeof seq, "\x1b[%dG", xCursorPos + 1); // 1-based on VT100 + if (write(1, seq, strlen(seq)) == -1) return; +#endif + + pi.promptCursorRowOffset = + pi.promptExtraLines + yCursorPos; // remember row for next pass +} + +#ifndef _WIN32 + +/** + * Read a UTF-8 sequence from the non-Windows keyboard and return the Unicode + * (char32_t) character it + * encodes + * + * @return char32_t Unicode character + */ +static char32_t readUnicodeCharacter(void) { + static char8_t utf8String[5]; + static size_t utf8Count = 0; + while (true) { + char8_t c; + + /* Continue reading if interrupted by signal. */ + ssize_t nread; + do { + nread = read(0, &c, 1); + } while ((nread == -1) && (errno == EINTR)); + + if (nread <= 0) return 0; + if (c <= 0x7F) { // short circuit ASCII + utf8Count = 0; + return c; + } else if (utf8Count < sizeof(utf8String) - 1) { + utf8String[utf8Count++] = c; + utf8String[utf8Count] = 0; + char32_t unicodeChar[2]; + size_t ucharCount; + ConversionResult res = + copyString8to32(unicodeChar, 2, ucharCount, utf8String); + if (res == conversionOK && ucharCount) { + utf8Count = 0; + return unicodeChar[0]; + } + } else { + utf8Count = + 0; // this shouldn't happen: got four bytes but no UTF-8 character + } + } +} + +namespace EscapeSequenceProcessing { // move these out of global namespace + +// This chunk of code does parsing of the escape sequences sent by various Linux +// terminals. +// +// It handles arrow keys, Home, End and Delete keys by interpreting the +// sequences sent by +// gnome terminal, xterm, rxvt, konsole, aterm and yakuake including the Alt and +// Ctrl key +// combinations that are understood by linenoise. +// +// The parsing uses tables, a bunch of intermediate dispatch routines and a +// doDispatch +// loop that reads the tables and sends control to "deeper" routines to continue +// the +// parsing. The starting call to doDispatch( c, initialDispatch ) will +// eventually return +// either a character (with optional CTRL and META bits set), or -1 if parsing +// fails, or +// zero if an attempt to read from the keyboard fails. +// +// This is rather sloppy escape sequence processing, since we're not paying +// attention to what the +// actual TERM is set to and are processing all key sequences for all terminals, +// but it works with +// the most common keystrokes on the most common terminals. It's intricate, but +// the nested 'if' +// statements required to do it directly would be worse. This way has the +// advantage of allowing +// changes and extensions without having to touch a lot of code. + +// This is a typedef for the routine called by doDispatch(). It takes the +// current character +// as input, does any required processing including reading more characters and +// calling other +// dispatch routines, then eventually returns the final (possibly extended or +// special) character. +// +typedef char32_t (*CharacterDispatchRoutine)(char32_t); + +// This structure is used by doDispatch() to hold a list of characters to test +// for and +// a list of routines to call if the character matches. The dispatch routine +// list is one +// longer than the character list; the final entry is used if no character +// matches. +// +struct CharacterDispatch { + unsigned int len; // length of the chars list + const char* chars; // chars to test + CharacterDispatchRoutine* dispatch; // array of routines to call +}; + +// This dispatch routine is given a dispatch table and then farms work out to +// routines +// listed in the table based on the character it is called with. The dispatch +// routines can +// read more input characters to decide what should eventually be returned. +// Eventually, +// a called routine returns either a character or -1 to indicate parsing +// failure. +// +static char32_t doDispatch(char32_t c, CharacterDispatch& dispatchTable) { + for (unsigned int i = 0; i < dispatchTable.len; ++i) { + if (static_cast(dispatchTable.chars[i]) == c) { + return dispatchTable.dispatch[i](c); + } + } + return dispatchTable.dispatch[dispatchTable.len](c); +} + +static char32_t thisKeyMetaCtrl = + 0; // holds pre-set Meta and/or Ctrl modifiers + +// Final dispatch routines -- return something +// +static char32_t normalKeyRoutine(char32_t c) { return thisKeyMetaCtrl | c; } +static char32_t upArrowKeyRoutine(char32_t) { + return thisKeyMetaCtrl | UP_ARROW_KEY; +} +static char32_t downArrowKeyRoutine(char32_t) { + return thisKeyMetaCtrl | DOWN_ARROW_KEY; +} +static char32_t rightArrowKeyRoutine(char32_t) { + return thisKeyMetaCtrl | RIGHT_ARROW_KEY; +} +static char32_t leftArrowKeyRoutine(char32_t) { + return thisKeyMetaCtrl | LEFT_ARROW_KEY; +} +static char32_t homeKeyRoutine(char32_t) { return thisKeyMetaCtrl | HOME_KEY; } +static char32_t endKeyRoutine(char32_t) { return thisKeyMetaCtrl | END_KEY; } +static char32_t pageUpKeyRoutine(char32_t) { + return thisKeyMetaCtrl | PAGE_UP_KEY; +} +static char32_t pageDownKeyRoutine(char32_t) { + return thisKeyMetaCtrl | PAGE_DOWN_KEY; +} +static char32_t deleteCharRoutine(char32_t) { + return thisKeyMetaCtrl | ctrlChar('H'); +} // key labeled Backspace +static char32_t deleteKeyRoutine(char32_t) { + return thisKeyMetaCtrl | DELETE_KEY; +} // key labeled Delete +static char32_t ctrlUpArrowKeyRoutine(char32_t) { + return thisKeyMetaCtrl | CTRL | UP_ARROW_KEY; +} +static char32_t ctrlDownArrowKeyRoutine(char32_t) { + return thisKeyMetaCtrl | CTRL | DOWN_ARROW_KEY; +} +static char32_t ctrlRightArrowKeyRoutine(char32_t) { + return thisKeyMetaCtrl | CTRL | RIGHT_ARROW_KEY; +} +static char32_t ctrlLeftArrowKeyRoutine(char32_t) { + return thisKeyMetaCtrl | CTRL | LEFT_ARROW_KEY; +} +static char32_t escFailureRoutine(char32_t) { + beep(); + return -1; +} + +// Handle ESC [ 1 ; 3 (or 5) escape sequences +// +static CharacterDispatchRoutine escLeftBracket1Semicolon3or5Routines[] = { + upArrowKeyRoutine, downArrowKeyRoutine, rightArrowKeyRoutine, + leftArrowKeyRoutine, escFailureRoutine}; +static CharacterDispatch escLeftBracket1Semicolon3or5Dispatch = { + 4, "ABCD", escLeftBracket1Semicolon3or5Routines}; + +// Handle ESC [ 1 ; escape sequences +// +static char32_t escLeftBracket1Semicolon3Routine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + thisKeyMetaCtrl |= META; + return doDispatch(c, escLeftBracket1Semicolon3or5Dispatch); +} +static char32_t escLeftBracket1Semicolon5Routine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + thisKeyMetaCtrl |= CTRL; + return doDispatch(c, escLeftBracket1Semicolon3or5Dispatch); +} +static CharacterDispatchRoutine escLeftBracket1SemicolonRoutines[] = { + escLeftBracket1Semicolon3Routine, escLeftBracket1Semicolon5Routine, + escFailureRoutine}; +static CharacterDispatch escLeftBracket1SemicolonDispatch = { + 2, "35", escLeftBracket1SemicolonRoutines}; + +// Handle ESC [ 1 escape sequences +// +static char32_t escLeftBracket1SemicolonRoutine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + return doDispatch(c, escLeftBracket1SemicolonDispatch); +} +static CharacterDispatchRoutine escLeftBracket1Routines[] = { + homeKeyRoutine, escLeftBracket1SemicolonRoutine, escFailureRoutine}; +static CharacterDispatch escLeftBracket1Dispatch = {2, "~;", + escLeftBracket1Routines}; + +// Handle ESC [ 3 escape sequences +// +static CharacterDispatchRoutine escLeftBracket3Routines[] = {deleteKeyRoutine, + escFailureRoutine}; +static CharacterDispatch escLeftBracket3Dispatch = {1, "~", + escLeftBracket3Routines}; + +// Handle ESC [ 4 escape sequences +// +static CharacterDispatchRoutine escLeftBracket4Routines[] = {endKeyRoutine, + escFailureRoutine}; +static CharacterDispatch escLeftBracket4Dispatch = {1, "~", + escLeftBracket4Routines}; + +// Handle ESC [ 5 escape sequences +// +static CharacterDispatchRoutine escLeftBracket5Routines[] = {pageUpKeyRoutine, + escFailureRoutine}; +static CharacterDispatch escLeftBracket5Dispatch = {1, "~", + escLeftBracket5Routines}; + +// Handle ESC [ 6 escape sequences +// +static CharacterDispatchRoutine escLeftBracket6Routines[] = {pageDownKeyRoutine, + escFailureRoutine}; +static CharacterDispatch escLeftBracket6Dispatch = {1, "~", + escLeftBracket6Routines}; + +// Handle ESC [ 7 escape sequences +// +static CharacterDispatchRoutine escLeftBracket7Routines[] = {homeKeyRoutine, + escFailureRoutine}; +static CharacterDispatch escLeftBracket7Dispatch = {1, "~", + escLeftBracket7Routines}; + +// Handle ESC [ 8 escape sequences +// +static CharacterDispatchRoutine escLeftBracket8Routines[] = {endKeyRoutine, + escFailureRoutine}; +static CharacterDispatch escLeftBracket8Dispatch = {1, "~", + escLeftBracket8Routines}; + +// Handle ESC [ escape sequences +// +static char32_t escLeftBracket0Routine(char32_t c) { + return escFailureRoutine(c); +} +static char32_t escLeftBracket1Routine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + return doDispatch(c, escLeftBracket1Dispatch); +} +static char32_t escLeftBracket2Routine(char32_t c) { + return escFailureRoutine(c); // Insert key, unused +} +static char32_t escLeftBracket3Routine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + return doDispatch(c, escLeftBracket3Dispatch); +} +static char32_t escLeftBracket4Routine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + return doDispatch(c, escLeftBracket4Dispatch); +} +static char32_t escLeftBracket5Routine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + return doDispatch(c, escLeftBracket5Dispatch); +} +static char32_t escLeftBracket6Routine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + return doDispatch(c, escLeftBracket6Dispatch); +} +static char32_t escLeftBracket7Routine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + return doDispatch(c, escLeftBracket7Dispatch); +} +static char32_t escLeftBracket8Routine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + return doDispatch(c, escLeftBracket8Dispatch); +} +static char32_t escLeftBracket9Routine(char32_t c) { + return escFailureRoutine(c); +} + +// Handle ESC [ escape sequences +// +static CharacterDispatchRoutine escLeftBracketRoutines[] = { + upArrowKeyRoutine, downArrowKeyRoutine, rightArrowKeyRoutine, + leftArrowKeyRoutine, homeKeyRoutine, endKeyRoutine, + escLeftBracket0Routine, escLeftBracket1Routine, escLeftBracket2Routine, + escLeftBracket3Routine, escLeftBracket4Routine, escLeftBracket5Routine, + escLeftBracket6Routine, escLeftBracket7Routine, escLeftBracket8Routine, + escLeftBracket9Routine, escFailureRoutine}; +static CharacterDispatch escLeftBracketDispatch = {16, "ABCDHF0123456789", + escLeftBracketRoutines}; + +// Handle ESC O escape sequences +// +static CharacterDispatchRoutine escORoutines[] = { + upArrowKeyRoutine, downArrowKeyRoutine, rightArrowKeyRoutine, + leftArrowKeyRoutine, homeKeyRoutine, endKeyRoutine, + ctrlUpArrowKeyRoutine, ctrlDownArrowKeyRoutine, ctrlRightArrowKeyRoutine, + ctrlLeftArrowKeyRoutine, escFailureRoutine}; +static CharacterDispatch escODispatch = {10, "ABCDHFabcd", escORoutines}; + +// Initial ESC dispatch -- could be a Meta prefix or the start of an escape +// sequence +// +static char32_t escLeftBracketRoutine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + return doDispatch(c, escLeftBracketDispatch); +} +static char32_t escORoutine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + return doDispatch(c, escODispatch); +} +static char32_t setMetaRoutine(char32_t c); // need forward reference +static CharacterDispatchRoutine escRoutines[] = {escLeftBracketRoutine, + escORoutine, setMetaRoutine}; +static CharacterDispatch escDispatch = {2, "[O", escRoutines}; + +// Initial dispatch -- we are not in the middle of anything yet +// +static char32_t escRoutine(char32_t c) { + c = readUnicodeCharacter(); + if (c == 0) return 0; + return doDispatch(c, escDispatch); +} +static CharacterDispatchRoutine initialRoutines[] = { + escRoutine, deleteCharRoutine, normalKeyRoutine}; +static CharacterDispatch initialDispatch = {2, "\x1B\x7F", initialRoutines}; + +// Special handling for the ESC key because it does double duty +// +static char32_t setMetaRoutine(char32_t c) { + thisKeyMetaCtrl = META; + if (c == 0x1B) { // another ESC, stay in ESC processing mode + c = readUnicodeCharacter(); + if (c == 0) return 0; + return doDispatch(c, escDispatch); + } + return doDispatch(c, initialDispatch); +} + +} // namespace EscapeSequenceProcessing // move these out of global namespace + +#endif // #ifndef _WIN32 + +// linenoiseReadChar -- read a keystroke or keychord from the keyboard, and +// translate it +// into an encoded "keystroke". When convenient, extended keys are translated +// into their +// simpler Emacs keystrokes, so an unmodified "left arrow" becomes Ctrl-B. +// +// A return value of zero means "no input available", and a return value of -1 +// means "invalid key". +// +static char32_t linenoiseReadChar(void) { +#ifdef _WIN32 + + INPUT_RECORD rec; + DWORD count; + int modifierKeys = 0; + bool escSeen = false; + while (true) { + ReadConsoleInputW(console_in, &rec, 1, &count); +#if 0 // helper for debugging keystrokes, display info in the debug "Output" + // window in the debugger + { + if ( rec.EventType == KEY_EVENT ) { + //if ( rec.Event.KeyEvent.uChar.UnicodeChar ) { + char buf[1024]; + sprintf( + buf, + "Unicode character 0x%04X, repeat count %d, virtual keycode 0x%04X, " + "virtual scancode 0x%04X, key %s%s%s%s%s\n", + rec.Event.KeyEvent.uChar.UnicodeChar, + rec.Event.KeyEvent.wRepeatCount, + rec.Event.KeyEvent.wVirtualKeyCode, + rec.Event.KeyEvent.wVirtualScanCode, + rec.Event.KeyEvent.bKeyDown ? "down" : "up", + (rec.Event.KeyEvent.dwControlKeyState & LEFT_CTRL_PRESSED) ? + " L-Ctrl" : "", + (rec.Event.KeyEvent.dwControlKeyState & RIGHT_CTRL_PRESSED) ? + " R-Ctrl" : "", + (rec.Event.KeyEvent.dwControlKeyState & LEFT_ALT_PRESSED) ? + " L-Alt" : "", + (rec.Event.KeyEvent.dwControlKeyState & RIGHT_ALT_PRESSED) ? + " R-Alt" : "" + ); + OutputDebugStringA( buf ); + //} + } + } +#endif + if (rec.EventType != KEY_EVENT) { + continue; + } + // Windows provides for entry of characters that are not on your keyboard by + // sending the + // Unicode characters as a "key up" with virtual keycode 0x12 (VK_MENU == + // Alt key) ... + // accept these characters, otherwise only process characters on "key down" + if (!rec.Event.KeyEvent.bKeyDown && + rec.Event.KeyEvent.wVirtualKeyCode != VK_MENU) { + continue; + } + modifierKeys = 0; + // AltGr is encoded as ( LEFT_CTRL_PRESSED | RIGHT_ALT_PRESSED ), so don't + // treat this + // combination as either CTRL or META we just turn off those two bits, so it + // is still + // possible to combine CTRL and/or META with an AltGr key by using + // right-Ctrl and/or + // left-Alt + if ((rec.Event.KeyEvent.dwControlKeyState & + (LEFT_CTRL_PRESSED | RIGHT_ALT_PRESSED)) == + (LEFT_CTRL_PRESSED | RIGHT_ALT_PRESSED)) { + rec.Event.KeyEvent.dwControlKeyState &= + ~(LEFT_CTRL_PRESSED | RIGHT_ALT_PRESSED); + } + if (rec.Event.KeyEvent.dwControlKeyState & + (RIGHT_CTRL_PRESSED | LEFT_CTRL_PRESSED)) { + modifierKeys |= CTRL; + } + if (rec.Event.KeyEvent.dwControlKeyState & + (RIGHT_ALT_PRESSED | LEFT_ALT_PRESSED)) { + modifierKeys |= META; + } + if (escSeen) { + modifierKeys |= META; + } + if (rec.Event.KeyEvent.uChar.UnicodeChar == 0) { + switch (rec.Event.KeyEvent.wVirtualKeyCode) { + case VK_LEFT: + return modifierKeys | LEFT_ARROW_KEY; + case VK_RIGHT: + return modifierKeys | RIGHT_ARROW_KEY; + case VK_UP: + return modifierKeys | UP_ARROW_KEY; + case VK_DOWN: + return modifierKeys | DOWN_ARROW_KEY; + case VK_DELETE: + return modifierKeys | DELETE_KEY; + case VK_HOME: + return modifierKeys | HOME_KEY; + case VK_END: + return modifierKeys | END_KEY; + case VK_PRIOR: + return modifierKeys | PAGE_UP_KEY; + case VK_NEXT: + return modifierKeys | PAGE_DOWN_KEY; + default: + continue; // in raw mode, ReadConsoleInput shows shift, ctrl ... + } // ... ignore them + } else if (rec.Event.KeyEvent.uChar.UnicodeChar == + ctrlChar('[')) { // ESC, set flag for later + escSeen = true; + continue; + } else { + // we got a real character, return it + return modifierKeys | rec.Event.KeyEvent.uChar.UnicodeChar; + } + } + +#else + char32_t c; + c = readUnicodeCharacter(); + if (c == 0) return 0; + +// If _DEBUG_LINUX_KEYBOARD is set, then ctrl-^ puts us into a keyboard +// debugging mode +// where we print out decimal and decoded values for whatever the "terminal" +// program +// gives us on different keystrokes. Hit ctrl-C to exit this mode. +// +#define _DEBUG_LINUX_KEYBOARD +#if defined(_DEBUG_LINUX_KEYBOARD) + if (c == ctrlChar('^')) { // ctrl-^, special debug mode, prints all keys hit, + // ctrl-C to get out + printf( + "\nEntering keyboard debugging mode (on ctrl-^), press ctrl-C to exit " + "this mode\n"); + while (true) { + unsigned char keys[10]; + int ret = read(0, keys, 10); + + if (ret <= 0) { + printf("\nret: %d\n", ret); + } + for (int i = 0; i < ret; ++i) { + char32_t key = static_cast(keys[i]); + char* friendlyTextPtr; + char friendlyTextBuf[10]; + const char* prefixText = (key < 0x80) ? "" : "0x80+"; + char32_t keyCopy = (key < 0x80) ? key : key - 0x80; + if (keyCopy >= '!' && keyCopy <= '~') { // printable + friendlyTextBuf[0] = '\''; + friendlyTextBuf[1] = keyCopy; + friendlyTextBuf[2] = '\''; + friendlyTextBuf[3] = 0; + friendlyTextPtr = friendlyTextBuf; + } else if (keyCopy == ' ') { + friendlyTextPtr = const_cast("space"); + } else if (keyCopy == 27) { + friendlyTextPtr = const_cast("ESC"); + } else if (keyCopy == 0) { + friendlyTextPtr = const_cast("NUL"); + } else if (keyCopy == 127) { + friendlyTextPtr = const_cast("DEL"); + } else { + friendlyTextBuf[0] = '^'; + friendlyTextBuf[1] = keyCopy + 0x40; + friendlyTextBuf[2] = 0; + friendlyTextPtr = friendlyTextBuf; + } + printf("%d x%02X (%s%s) ", key, key, prefixText, friendlyTextPtr); + } + printf("\x1b[1G\n"); // go to first column of new line + + // drop out of this loop on ctrl-C + if (keys[0] == ctrlChar('C')) { + printf("Leaving keyboard debugging mode (on ctrl-C)\n"); + fflush(stdout); + return -2; + } + } + } +#endif // _DEBUG_LINUX_KEYBOARD + + EscapeSequenceProcessing::thisKeyMetaCtrl = + 0; // no modifiers yet at initialDispatch + return EscapeSequenceProcessing::doDispatch( + c, EscapeSequenceProcessing::initialDispatch); +#endif // #_WIN32 +} + +/** + * Free memory used in a recent command completion session + * + * @param lc pointer to a linenoiseCompletions struct + */ +static void freeCompletions(linenoiseCompletions* lc) { + lc->completionStrings.clear(); +} + +/** + * convert {CTRL + 'A'}, {CTRL + 'a'} and {CTRL + ctrlChar( 'A' )} into + * ctrlChar( 'A' ) + * leave META alone + * + * @param c character to clean up + * @return cleaned-up character + */ +static int cleanupCtrl(int c) { + if (c & CTRL) { + int d = c & 0x1FF; + if (d >= 'a' && d <= 'z') { + c = (c + ('a' - ctrlChar('A'))) & ~CTRL; + } + if (d >= 'A' && d <= 'Z') { + c = (c + ('A' - ctrlChar('A'))) & ~CTRL; + } + if (d >= ctrlChar('A') && d <= ctrlChar('Z')) { + c = c & ~CTRL; + } + } + return c; +} + +// break characters that may precede items to be completed +static const char breakChars[] = " =+-/\\*?\"'`&<>;|@{([])}"; + +// maximum number of completions to display without asking +static const size_t completionCountCutoff = 100; + +/** + * Handle command completion, using a completionCallback() routine to provide + * possible substitutions + * This routine handles the mechanics of updating the user's input buffer with + * possible replacement + * of text as the user selects a proposed completion string, or cancels the + * completion attempt. + * @param pi PromptBase struct holding information about the prompt and our + * screen position + */ +int InputBuffer::completeLine(PromptBase& pi) { + linenoiseCompletions lc; + char32_t c = 0; + + // completionCallback() expects a parsable entity, so find the previous break + // character and + // extract a copy to parse. we also handle the case where tab is hit while + // not at end-of-line. + int startIndex = pos; + while (--startIndex >= 0) { + if (strchr(breakChars, buf32[startIndex])) { + break; + } + } + ++startIndex; + int itemLength = pos - startIndex; + Utf32String unicodeCopy(&buf32[startIndex], itemLength); + Utf8String parseItem(unicodeCopy); + + // get a list of completions + completionCallback(parseItem.get(), &lc); + + // if no completions, we are done + if (lc.completionStrings.size() == 0) { + beep(); + freeCompletions(&lc); + return 0; + } + + // at least one completion + int longestCommonPrefix = 0; + int displayLength = 0; + if (lc.completionStrings.size() == 1) { + longestCommonPrefix = static_cast(lc.completionStrings[0].length()); + } else { + bool keepGoing = true; + while (keepGoing) { + for (size_t j = 0; j < lc.completionStrings.size() - 1; ++j) { + char32_t c1 = lc.completionStrings[j][longestCommonPrefix]; + char32_t c2 = lc.completionStrings[j + 1][longestCommonPrefix]; + if ((0 == c1) || (0 == c2) || (c1 != c2)) { + keepGoing = false; + break; + } + } + if (keepGoing) { + ++longestCommonPrefix; + } + } + } + if (lc.completionStrings.size() != 1) { // beep if ambiguous + beep(); + } + + // if we can extend the item, extend it and return to main loop + if (longestCommonPrefix > itemLength) { + displayLength = len + longestCommonPrefix - itemLength; + if (displayLength > buflen) { + longestCommonPrefix -= displayLength - buflen; // don't overflow buffer + displayLength = buflen; // truncate the insertion + beep(); // and make a noise + } + Utf32String displayText(displayLength + 1); + memcpy(displayText.get(), buf32, sizeof(char32_t) * startIndex); + memcpy(&displayText[startIndex], &lc.completionStrings[0][0], + sizeof(char32_t) * longestCommonPrefix); + int tailIndex = startIndex + longestCommonPrefix; + memcpy(&displayText[tailIndex], &buf32[pos], + sizeof(char32_t) * (displayLength - tailIndex + 1)); + copyString32(buf32, displayText.get(), displayLength); + pos = startIndex + longestCommonPrefix; + len = displayLength; + refreshLine(pi); + return 0; + } + + // we can't complete any further, wait for second tab + do { + c = linenoiseReadChar(); + c = cleanupCtrl(c); + } while (c == static_cast(-1)); + + // if any character other than tab, pass it to the main loop + if (c != ctrlChar('I')) { + freeCompletions(&lc); + return c; + } + + // we got a second tab, maybe show list of possible completions + bool showCompletions = true; + bool onNewLine = false; + if (lc.completionStrings.size() > completionCountCutoff) { + int savePos = + pos; // move cursor to EOL to avoid overwriting the command line + pos = len; + refreshLine(pi); + pos = savePos; + printf("\nDisplay all %u possibilities? (y or n)", + static_cast(lc.completionStrings.size())); + fflush(stdout); + onNewLine = true; + while (c != 'y' && c != 'Y' && c != 'n' && c != 'N' && c != ctrlChar('C')) { + do { + c = linenoiseReadChar(); + c = cleanupCtrl(c); + } while (c == static_cast(-1)); + } + switch (c) { + case 'n': + case 'N': + showCompletions = false; + freeCompletions(&lc); + break; + case ctrlChar('C'): + showCompletions = false; + freeCompletions(&lc); + if (write(1, "^C", 2) == -1) return -1; // Display the ^C we got + c = 0; + break; + default: + break; + } + } + + // if showing the list, do it the way readline does it + bool stopList = false; + if (showCompletions) { + int longestCompletion = 0; + for (size_t j = 0; j < lc.completionStrings.size(); ++j) { + itemLength = static_cast(lc.completionStrings[j].length()); + if (itemLength > longestCompletion) { + longestCompletion = itemLength; + } + } + longestCompletion += 2; + int columnCount = pi.promptScreenColumns / longestCompletion; + if (columnCount < 1) { + columnCount = 1; + } + if (!onNewLine) { // skip this if we showed "Display all %d possibilities?" + int savePos = + pos; // move cursor to EOL to avoid overwriting the command line + pos = len; + refreshLine(pi); + pos = savePos; + } + size_t pauseRow = getScreenRows() - 1; + size_t rowCount = + (lc.completionStrings.size() + columnCount - 1) / columnCount; + for (size_t row = 0; row < rowCount; ++row) { + if (row == pauseRow) { + printf("\n--More--"); + fflush(stdout); + c = 0; + bool doBeep = false; + while (c != ' ' && c != '\r' && c != '\n' && c != 'y' && c != 'Y' && + c != 'n' && c != 'N' && c != 'q' && c != 'Q' && + c != ctrlChar('C')) { + if (doBeep) { + beep(); + } + doBeep = true; + do { + c = linenoiseReadChar(); + c = cleanupCtrl(c); + } while (c == static_cast(-1)); + } + switch (c) { + case ' ': + case 'y': + case 'Y': + printf("\r \r"); + pauseRow += getScreenRows() - 1; + break; + case '\r': + case '\n': + printf("\r \r"); + ++pauseRow; + break; + case 'n': + case 'N': + case 'q': + case 'Q': + printf("\r \r"); + stopList = true; + break; + case ctrlChar('C'): + if (write(1, "^C", 2) == -1) return -1; // Display the ^C we got + stopList = true; + break; + default: + break; + } + } else { + printf("\n"); + } + if (stopList) { + break; + } + for (int column = 0; column < columnCount; ++column) { + size_t index = (column * rowCount) + row; + if (index < lc.completionStrings.size()) { + itemLength = static_cast(lc.completionStrings[index].length()); + fflush(stdout); + if (write32(1, lc.completionStrings[index].get(), itemLength) == -1) + return -1; + if (((column + 1) * rowCount) + row < lc.completionStrings.size()) { + for (int k = itemLength; k < longestCompletion; ++k) { + printf(" "); + } + } + } + } + } + fflush(stdout); + freeCompletions(&lc); + } + + // display the prompt on a new line, then redisplay the input buffer + if (!stopList || c == ctrlChar('C')) { + if (write(1, "\n", 1) == -1) return 0; + } + if (!pi.write()) return 0; +#ifndef _WIN32 + // we have to generate our own newline on line wrap on Linux + if (pi.promptIndentation == 0 && pi.promptExtraLines > 0) + if (write(1, "\n", 1) == -1) return 0; +#endif + pi.promptCursorRowOffset = pi.promptExtraLines; + refreshLine(pi); + return 0; +} + +/** + * Clear the screen ONLY (no redisplay of anything) + */ +void linenoiseClearScreen(void) { +#ifdef _WIN32 + COORD coord = {0, 0}; + CONSOLE_SCREEN_BUFFER_INFO inf; + HANDLE screenHandle = GetStdHandle(STD_OUTPUT_HANDLE); + GetConsoleScreenBufferInfo(screenHandle, &inf); + SetConsoleCursorPosition(screenHandle, coord); + DWORD count; + FillConsoleOutputCharacterA(screenHandle, ' ', inf.dwSize.X * inf.dwSize.Y, + coord, &count); +#else + if (write(1, "\x1b[H\x1b[2J", 7) <= 0) return; +#endif +} + +void InputBuffer::clearScreen(PromptBase& pi) { + linenoiseClearScreen(); + if (!pi.write()) return; +#ifndef _WIN32 + // we have to generate our own newline on line wrap on Linux + if (pi.promptIndentation == 0 && pi.promptExtraLines > 0) + if (write(1, "\n", 1) == -1) return; +#endif + pi.promptCursorRowOffset = pi.promptExtraLines; + refreshLine(pi); +} + +/** + * Incremental history search -- take over the prompt and keyboard as the user + * types a search + * string, deletes characters from it, changes direction, and either accepts the + * found line (for + * execution orediting) or cancels. + * @param pi PromptBase struct holding information about the (old, + * static) prompt and our + * screen position + * @param startChar the character that began the search, used to set the initial + * direction + */ +int InputBuffer::incrementalHistorySearch(PromptBase& pi, int startChar) { + size_t bufferSize; + size_t ucharCount = 0; + + // if not already recalling, add the current line to the history list so we + // don't have to + // special case it + if (historyIndex == historyLen - 1) { + free(history[historyLen - 1]); + bufferSize = sizeof(char32_t) * len + 1; + unique_ptr tempBuffer(new char[bufferSize]); + copyString32to8(tempBuffer.get(), bufferSize, buf32); + history[historyLen - 1] = strdup8(tempBuffer.get()); + } + int historyLineLength = len; + int historyLinePosition = pos; + char32_t emptyBuffer[1]; + char emptyWidths[1]; + InputBuffer empty(emptyBuffer, emptyWidths, 1); + empty.refreshLine(pi); // erase the old input first + DynamicPrompt dp(pi, (startChar == ctrlChar('R')) ? -1 : 1); + + dp.promptPreviousLen = pi.promptPreviousLen; + dp.promptPreviousInputLen = pi.promptPreviousInputLen; + dynamicRefresh(dp, buf32, historyLineLength, + historyLinePosition); // draw user's text with our prompt + + // loop until we get an exit character + int c = 0; + bool keepLooping = true; + bool useSearchedLine = true; + bool searchAgain = false; + char32_t* activeHistoryLine = 0; + while (keepLooping) { + c = linenoiseReadChar(); + c = cleanupCtrl(c); // convert CTRL + into normal ctrl + + switch (c) { + // these characters keep the selected text but do not execute it + case ctrlChar('A'): // ctrl-A, move cursor to start of line + case HOME_KEY: + case ctrlChar('B'): // ctrl-B, move cursor left by one character + case LEFT_ARROW_KEY: + case META + 'b': // meta-B, move cursor left by one word + case META + 'B': + case CTRL + LEFT_ARROW_KEY: + case META + LEFT_ARROW_KEY: // Emacs allows Meta, bash & readline don't + case ctrlChar('D'): + case META + 'd': // meta-D, kill word to right of cursor + case META + 'D': + case ctrlChar('E'): // ctrl-E, move cursor to end of line + case END_KEY: + case ctrlChar('F'): // ctrl-F, move cursor right by one character + case RIGHT_ARROW_KEY: + case META + 'f': // meta-F, move cursor right by one word + case META + 'F': + case CTRL + RIGHT_ARROW_KEY: + case META + RIGHT_ARROW_KEY: // Emacs allows Meta, bash & readline don't + case META + ctrlChar('H'): + case ctrlChar('J'): + case ctrlChar('K'): // ctrl-K, kill from cursor to end of line + case ctrlChar('M'): + case ctrlChar('N'): // ctrl-N, recall next line in history + case ctrlChar('P'): // ctrl-P, recall previous line in history + case DOWN_ARROW_KEY: + case UP_ARROW_KEY: + case ctrlChar('T'): // ctrl-T, transpose characters + case ctrlChar( + 'U'): // ctrl-U, kill all characters to the left of the cursor + case ctrlChar('W'): + case META + 'y': // meta-Y, "yank-pop", rotate popped text + case META + 'Y': + case 127: + case DELETE_KEY: + case META + '<': // start of history + case PAGE_UP_KEY: + case META + '>': // end of history + case PAGE_DOWN_KEY: + keepLooping = false; + break; + + // these characters revert the input line to its previous state + case ctrlChar('C'): // ctrl-C, abort this line + case ctrlChar('G'): + case ctrlChar('L'): // ctrl-L, clear screen and redisplay line + keepLooping = false; + useSearchedLine = false; + if (c != ctrlChar('L')) { + c = -1; // ctrl-C and ctrl-G just abort the search and do nothing + // else + } + break; + + // these characters stay in search mode and update the display + case ctrlChar('S'): + case ctrlChar('R'): + if (dp.searchTextLen == + 0) { // if no current search text, recall previous text + if (previousSearchText.length()) { + dp.updateSearchText(previousSearchText.get()); + } + } + if ((dp.direction == 1 && c == ctrlChar('R')) || + (dp.direction == -1 && c == ctrlChar('S'))) { + dp.direction = 0 - dp.direction; // reverse direction + dp.updateSearchPrompt(); // change the prompt + } else { + searchAgain = true; // same direction, search again + } + break; + +// job control is its own thing +#ifndef _WIN32 + case ctrlChar('Z'): // ctrl-Z, job control + disableRawMode(); // Returning to Linux (whatever) shell, leave raw + // mode + raise(SIGSTOP); // Break out in mid-line + enableRawMode(); // Back from Linux shell, re-enter raw mode + { + bufferSize = historyLineLength + 1; + unique_ptr tempUnicode(new char32_t[bufferSize]); + copyString8to32(tempUnicode.get(), bufferSize, ucharCount, + history[historyIndex]); + dynamicRefresh(dp, tempUnicode.get(), historyLineLength, + historyLinePosition); + } + continue; + break; +#endif + + // these characters update the search string, and hence the selected input + // line + case ctrlChar('H'): // backspace/ctrl-H, delete char to left of cursor + if (dp.searchTextLen > 0) { + unique_ptr tempUnicode(new char32_t[dp.searchTextLen]); + --dp.searchTextLen; + dp.searchText[dp.searchTextLen] = 0; + copyString32(tempUnicode.get(), dp.searchText.get(), + dp.searchTextLen); + dp.updateSearchText(tempUnicode.get()); + } else { + beep(); + } + break; + + case ctrlChar('Y'): // ctrl-Y, yank killed text + break; + + default: + if (!isControlChar(c) && c <= 0x0010FFFF) { // not an action character + unique_ptr tempUnicode( + new char32_t[dp.searchTextLen + 2]); + copyString32(tempUnicode.get(), dp.searchText.get(), + dp.searchTextLen); + tempUnicode[dp.searchTextLen] = c; + tempUnicode[dp.searchTextLen + 1] = 0; + dp.updateSearchText(tempUnicode.get()); + } else { + beep(); + } + } // switch + + // if we are staying in search mode, search now + if (keepLooping) { + bufferSize = historyLineLength + 1; + if (activeHistoryLine) { + delete[] activeHistoryLine; + activeHistoryLine = nullptr; + } + activeHistoryLine = new char32_t[bufferSize]; + copyString8to32(activeHistoryLine, bufferSize, ucharCount, + history[historyIndex]); + if (dp.searchTextLen > 0) { + bool found = false; + int historySearchIndex = historyIndex; + int lineLength = static_cast(ucharCount); + int lineSearchPos = historyLinePosition; + if (searchAgain) { + lineSearchPos += dp.direction; + } + searchAgain = false; + while (true) { + while ((dp.direction > 0) ? (lineSearchPos < lineLength) + : (lineSearchPos >= 0)) { + if (strncmp32(dp.searchText.get(), + &activeHistoryLine[lineSearchPos], + dp.searchTextLen) == 0) { + found = true; + break; + } + lineSearchPos += dp.direction; + } + if (found) { + historyIndex = historySearchIndex; + historyLineLength = lineLength; + historyLinePosition = lineSearchPos; + break; + } else if ((dp.direction > 0) ? (historySearchIndex < historyLen - 1) + : (historySearchIndex > 0)) { + historySearchIndex += dp.direction; + bufferSize = strlen8(history[historySearchIndex]) + 1; + delete[] activeHistoryLine; + activeHistoryLine = nullptr; + activeHistoryLine = new char32_t[bufferSize]; + copyString8to32(activeHistoryLine, bufferSize, ucharCount, + history[historySearchIndex]); + lineLength = static_cast(ucharCount); + lineSearchPos = + (dp.direction > 0) ? 0 : (lineLength - dp.searchTextLen); + } else { + beep(); + break; + } + }; // while + } + if (activeHistoryLine) { + delete[] activeHistoryLine; + activeHistoryLine = nullptr; + } + bufferSize = historyLineLength + 1; + activeHistoryLine = new char32_t[bufferSize]; + copyString8to32(activeHistoryLine, bufferSize, ucharCount, + history[historyIndex]); + dynamicRefresh(dp, activeHistoryLine, historyLineLength, + historyLinePosition); // draw user's text with our prompt + } + } // while + + // leaving history search, restore previous prompt, maybe make searched line + // current + PromptBase pb; + pb.promptChars = pi.promptIndentation; + pb.promptBytes = pi.promptBytes; + Utf32String tempUnicode(pb.promptBytes + 1); + + copyString32(tempUnicode.get(), &pi.promptText[pi.promptLastLinePosition], + pb.promptBytes - pi.promptLastLinePosition); + tempUnicode.initFromBuffer(); + pb.promptText = tempUnicode; + pb.promptExtraLines = 0; + pb.promptIndentation = pi.promptIndentation; + pb.promptLastLinePosition = 0; + pb.promptPreviousInputLen = historyLineLength; + pb.promptCursorRowOffset = dp.promptCursorRowOffset; + pb.promptScreenColumns = pi.promptScreenColumns; + pb.promptPreviousLen = dp.promptChars; + if (useSearchedLine && activeHistoryLine) { + historyRecallMostRecent = true; + copyString32(buf32, activeHistoryLine, buflen + 1); + len = historyLineLength; + pos = historyLinePosition; + } + if (activeHistoryLine) { + delete[] activeHistoryLine; + activeHistoryLine = nullptr; + } + dynamicRefresh(pb, buf32, len, + pos); // redraw the original prompt with current input + pi.promptPreviousInputLen = len; + pi.promptCursorRowOffset = pi.promptExtraLines + pb.promptCursorRowOffset; + previousSearchText = + dp.searchText; // save search text for possible reuse on ctrl-R ctrl-R + return c; // pass a character or -1 back to main loop +} + +static bool isCharacterAlphanumeric(char32_t testChar) { +#ifdef _WIN32 + return (iswalnum((wint_t)testChar) != 0 ? true : false); +#else + return (iswalnum(testChar) != 0 ? true : false); +#endif +} + +#ifndef _WIN32 +static bool gotResize = false; +#endif +static int keyType = 0; + +int InputBuffer::getInputLine(PromptBase& pi) { + keyType = 0; + + // The latest history entry is always our current buffer + if (len > 0) { + size_t bufferSize = sizeof(char32_t) * len + 1; + unique_ptr tempBuffer(new char[bufferSize]); + copyString32to8(tempBuffer.get(), bufferSize, buf32); + linenoiseHistoryAdd(tempBuffer.get()); + } else { + linenoiseHistoryAdd(""); + } + historyIndex = historyLen - 1; + historyRecallMostRecent = false; + + // display the prompt + if (!pi.write()) return -1; + +#ifndef _WIN32 + // we have to generate our own newline on line wrap on Linux + if (pi.promptIndentation == 0 && pi.promptExtraLines > 0) + if (write(1, "\n", 1) == -1) return -1; +#endif + + // the cursor starts out at the end of the prompt + pi.promptCursorRowOffset = pi.promptExtraLines; + + // kill and yank start in "other" mode + killRing.lastAction = KillRing::actionOther; + + // when history search returns control to us, we execute its terminating + // keystroke + int terminatingKeystroke = -1; + + // if there is already text in the buffer, display it first + if (len > 0) { + refreshLine(pi); + } + + // loop collecting characters, respond to line editing characters + while (true) { + int c; + if (terminatingKeystroke == -1) { + c = linenoiseReadChar(); // get a new keystroke + + keyType = 0; + if (c != 0) { + // set flag that we got some input + if (c == ctrlChar('C')) { + keyType = 1; + } else if (c == ctrlChar('D')) { + keyType = 2; + } + } + +#ifndef _WIN32 + if (c == 0 && gotResize) { + // caught a window resize event + // now redraw the prompt and line + gotResize = false; + pi.promptScreenColumns = getScreenColumns(); + dynamicRefresh(pi, buf32, len, + pos); // redraw the original prompt with current input + continue; + } +#endif + } else { + c = terminatingKeystroke; // use the terminating keystroke from search + terminatingKeystroke = -1; // clear it once we've used it + } + + c = cleanupCtrl(c); // convert CTRL + into normal ctrl + + if (c == 0) { + return len; + } + + if (c == -1) { + refreshLine(pi); + continue; + } + + if (c == -2) { + if (!pi.write()) return -1; + refreshLine(pi); + continue; + } + + // ctrl-I/tab, command completion, needs to be before switch statement + if (c == ctrlChar('I') && completionCallback) { + if (pos == 0) // SERVER-4967 -- in earlier versions, you could paste + // previous output + continue; // back into the shell ... this output may have leading + // tabs. + // This hack (i.e. what the old code did) prevents command completion + // on an empty line but lets users paste text with leading tabs. + + killRing.lastAction = KillRing::actionOther; + historyRecallMostRecent = false; + + // completeLine does the actual completion and replacement + c = completeLine(pi); + + if (c < 0) // return on error + return len; + + if (c == 0) // read next character when 0 + continue; + + // deliberate fall-through here, so we use the terminating character + } + + switch (c) { + case ctrlChar('A'): // ctrl-A, move cursor to start of line + case HOME_KEY: + killRing.lastAction = KillRing::actionOther; + pos = 0; + refreshLine(pi); + break; + + case ctrlChar('B'): // ctrl-B, move cursor left by one character + case LEFT_ARROW_KEY: + killRing.lastAction = KillRing::actionOther; + if (pos > 0) { + --pos; + refreshLine(pi); + } + break; + + case META + 'b': // meta-B, move cursor left by one word + case META + 'B': + case CTRL + LEFT_ARROW_KEY: + case META + LEFT_ARROW_KEY: // Emacs allows Meta, bash & readline don't + killRing.lastAction = KillRing::actionOther; + if (pos > 0) { + while (pos > 0 && !isCharacterAlphanumeric(buf32[pos - 1])) { + --pos; + } + while (pos > 0 && isCharacterAlphanumeric(buf32[pos - 1])) { + --pos; + } + refreshLine(pi); + } + break; + + case ctrlChar('C'): // ctrl-C, abort this line + killRing.lastAction = KillRing::actionOther; + historyRecallMostRecent = false; + errno = EAGAIN; + --historyLen; + free(history[historyLen]); + // we need one last refresh with the cursor at the end of the line + // so we don't display the next prompt over the previous input line + pos = len; // pass len as pos for EOL + refreshLine(pi); + if (write(1, "^C", 2) == -1) return -1; // Display the ^C we got + return -1; + + case META + 'c': // meta-C, give word initial Cap + case META + 'C': + killRing.lastAction = KillRing::actionOther; + historyRecallMostRecent = false; + if (pos < len) { + while (pos < len && !isCharacterAlphanumeric(buf32[pos])) { + ++pos; + } + if (pos < len && isCharacterAlphanumeric(buf32[pos])) { + if (buf32[pos] >= 'a' && buf32[pos] <= 'z') { + buf32[pos] += 'A' - 'a'; + } + ++pos; + } + while (pos < len && isCharacterAlphanumeric(buf32[pos])) { + if (buf32[pos] >= 'A' && buf32[pos] <= 'Z') { + buf32[pos] += 'a' - 'A'; + } + ++pos; + } + refreshLine(pi); + } + break; + + // ctrl-D, delete the character under the cursor + // on an empty line, exit the shell + case ctrlChar('D'): + killRing.lastAction = KillRing::actionOther; + if (len > 0 && pos < len) { + historyRecallMostRecent = false; + memmove(buf32 + pos, buf32 + pos + 1, sizeof(char32_t) * (len - pos)); + --len; + refreshLine(pi); + } else if (len == 0) { + --historyLen; + free(history[historyLen]); + return -1; + } + break; + + case META + 'd': // meta-D, kill word to right of cursor + case META + 'D': + if (pos < len) { + historyRecallMostRecent = false; + int endingPos = pos; + while (endingPos < len && + !isCharacterAlphanumeric(buf32[endingPos])) { + ++endingPos; + } + while (endingPos < len && isCharacterAlphanumeric(buf32[endingPos])) { + ++endingPos; + } + killRing.kill(&buf32[pos], endingPos - pos, true); + memmove(buf32 + pos, buf32 + endingPos, + sizeof(char32_t) * (len - endingPos + 1)); + len -= endingPos - pos; + refreshLine(pi); + } + killRing.lastAction = KillRing::actionKill; + break; + + case ctrlChar('E'): // ctrl-E, move cursor to end of line + case END_KEY: + killRing.lastAction = KillRing::actionOther; + pos = len; + refreshLine(pi); + break; + + case ctrlChar('F'): // ctrl-F, move cursor right by one character + case RIGHT_ARROW_KEY: + killRing.lastAction = KillRing::actionOther; + if (pos < len) { + ++pos; + refreshLine(pi); + } + break; + + case META + 'f': // meta-F, move cursor right by one word + case META + 'F': + case CTRL + RIGHT_ARROW_KEY: + case META + RIGHT_ARROW_KEY: // Emacs allows Meta, bash & readline don't + killRing.lastAction = KillRing::actionOther; + if (pos < len) { + while (pos < len && !isCharacterAlphanumeric(buf32[pos])) { + ++pos; + } + while (pos < len && isCharacterAlphanumeric(buf32[pos])) { + ++pos; + } + refreshLine(pi); + } + break; + + case ctrlChar('H'): // backspace/ctrl-H, delete char to left of cursor + killRing.lastAction = KillRing::actionOther; + if (pos > 0) { + historyRecallMostRecent = false; + memmove(buf32 + pos - 1, buf32 + pos, + sizeof(char32_t) * (1 + len - pos)); + --pos; + --len; + refreshLine(pi); + } + break; + + // meta-Backspace, kill word to left of cursor + case META + ctrlChar('H'): + if (pos > 0) { + historyRecallMostRecent = false; + int startingPos = pos; + while (pos > 0 && !isCharacterAlphanumeric(buf32[pos - 1])) { + --pos; + } + while (pos > 0 && isCharacterAlphanumeric(buf32[pos - 1])) { + --pos; + } + killRing.kill(&buf32[pos], startingPos - pos, false); + memmove(buf32 + pos, buf32 + startingPos, + sizeof(char32_t) * (len - startingPos + 1)); + len -= startingPos - pos; + refreshLine(pi); + } + killRing.lastAction = KillRing::actionKill; + break; + + case ctrlChar('J'): // ctrl-J/linefeed/newline, accept line + case ctrlChar('M'): // ctrl-M/return/enter + killRing.lastAction = KillRing::actionOther; + // we need one last refresh with the cursor at the end of the line + // so we don't display the next prompt over the previous input line + pos = len; // pass len as pos for EOL + refreshLine(pi); + historyPreviousIndex = historyRecallMostRecent ? historyIndex : -2; + --historyLen; + free(history[historyLen]); + return len; + + case ctrlChar('K'): // ctrl-K, kill from cursor to end of line + killRing.kill(&buf32[pos], len - pos, true); + buf32[pos] = '\0'; + len = pos; + refreshLine(pi); + killRing.lastAction = KillRing::actionKill; + historyRecallMostRecent = false; + break; + + case ctrlChar('L'): // ctrl-L, clear screen and redisplay line + clearScreen(pi); + break; + + case META + 'l': // meta-L, lowercase word + case META + 'L': + killRing.lastAction = KillRing::actionOther; + if (pos < len) { + historyRecallMostRecent = false; + while (pos < len && !isCharacterAlphanumeric(buf32[pos])) { + ++pos; + } + while (pos < len && isCharacterAlphanumeric(buf32[pos])) { + if (buf32[pos] >= 'A' && buf32[pos] <= 'Z') { + buf32[pos] += 'a' - 'A'; + } + ++pos; + } + refreshLine(pi); + } + break; + + case ctrlChar('N'): // ctrl-N, recall next line in history + case ctrlChar('P'): // ctrl-P, recall previous line in history + case DOWN_ARROW_KEY: + case UP_ARROW_KEY: + killRing.lastAction = KillRing::actionOther; + // if not already recalling, add the current line to the history list so + // we don't + // have to special case it + if (historyIndex == historyLen - 1) { + free(history[historyLen - 1]); + size_t tempBufferSize = sizeof(char32_t) * len + 1; + unique_ptr tempBuffer(new char[tempBufferSize]); + copyString32to8(tempBuffer.get(), tempBufferSize, buf32); + history[historyLen - 1] = strdup8(tempBuffer.get()); + } + if (historyLen > 1) { + if (c == UP_ARROW_KEY) { + c = ctrlChar('P'); + } + if (historyPreviousIndex != -2 && c != ctrlChar('P')) { + historyIndex = + 1 + historyPreviousIndex; // emulate Windows down-arrow + } else { + historyIndex += (c == ctrlChar('P')) ? -1 : 1; + } + historyPreviousIndex = -2; + if (historyIndex < 0) { + historyIndex = 0; + break; + } else if (historyIndex >= historyLen) { + historyIndex = historyLen - 1; + break; + } + historyRecallMostRecent = true; + size_t ucharCount = 0; + copyString8to32(buf32, buflen, ucharCount, history[historyIndex]); + len = pos = static_cast(ucharCount); + refreshLine(pi); + } + break; + + case ctrlChar('R'): // ctrl-R, reverse history search + case ctrlChar('S'): // ctrl-S, forward history search + terminatingKeystroke = incrementalHistorySearch(pi, c); + break; + + case ctrlChar('T'): // ctrl-T, transpose characters + killRing.lastAction = KillRing::actionOther; + if (pos > 0 && len > 1) { + historyRecallMostRecent = false; + size_t leftCharPos = (pos == len) ? pos - 2 : pos - 1; + char32_t aux = buf32[leftCharPos]; + buf32[leftCharPos] = buf32[leftCharPos + 1]; + buf32[leftCharPos + 1] = aux; + if (pos != len) ++pos; + refreshLine(pi); + } + break; + + case ctrlChar( + 'U'): // ctrl-U, kill all characters to the left of the cursor + if (pos > 0) { + historyRecallMostRecent = false; + killRing.kill(&buf32[0], pos, false); + len -= pos; + memmove(buf32, buf32 + pos, sizeof(char32_t) * (len + 1)); + pos = 0; + refreshLine(pi); + } + killRing.lastAction = KillRing::actionKill; + break; + + case META + 'u': // meta-U, uppercase word + case META + 'U': + killRing.lastAction = KillRing::actionOther; + if (pos < len) { + historyRecallMostRecent = false; + while (pos < len && !isCharacterAlphanumeric(buf32[pos])) { + ++pos; + } + while (pos < len && isCharacterAlphanumeric(buf32[pos])) { + if (buf32[pos] >= 'a' && buf32[pos] <= 'z') { + buf32[pos] += 'A' - 'a'; + } + ++pos; + } + refreshLine(pi); + } + break; + + // ctrl-W, kill to whitespace (not word) to left of cursor + case ctrlChar('W'): + if (pos > 0) { + historyRecallMostRecent = false; + int startingPos = pos; + while (pos > 0 && buf32[pos - 1] == ' ') { + --pos; + } + while (pos > 0 && buf32[pos - 1] != ' ') { + --pos; + } + killRing.kill(&buf32[pos], startingPos - pos, false); + memmove(buf32 + pos, buf32 + startingPos, + sizeof(char32_t) * (len - startingPos + 1)); + len -= startingPos - pos; + refreshLine(pi); + } + killRing.lastAction = KillRing::actionKill; + break; + + case ctrlChar('Y'): // ctrl-Y, yank killed text + historyRecallMostRecent = false; + { + Utf32String* restoredText = killRing.yank(); + if (restoredText) { + bool truncated = false; + size_t ucharCount = restoredText->length(); + if (ucharCount > static_cast(buflen - len)) { + ucharCount = buflen - len; + truncated = true; + } + memmove(buf32 + pos + ucharCount, buf32 + pos, + sizeof(char32_t) * (len - pos + 1)); + memmove(buf32 + pos, restoredText->get(), + sizeof(char32_t) * ucharCount); + pos += static_cast(ucharCount); + len += static_cast(ucharCount); + refreshLine(pi); + killRing.lastAction = KillRing::actionYank; + killRing.lastYankSize = ucharCount; + if (truncated) { + beep(); + } + } else { + beep(); + } + } + break; + + case META + 'y': // meta-Y, "yank-pop", rotate popped text + case META + 'Y': + if (killRing.lastAction == KillRing::actionYank) { + historyRecallMostRecent = false; + Utf32String* restoredText = killRing.yankPop(); + if (restoredText) { + bool truncated = false; + size_t ucharCount = restoredText->length(); + if (ucharCount > + static_cast(killRing.lastYankSize + buflen - len)) { + ucharCount = killRing.lastYankSize + buflen - len; + truncated = true; + } + if (ucharCount > killRing.lastYankSize) { + memmove(buf32 + pos + ucharCount - killRing.lastYankSize, + buf32 + pos, sizeof(char32_t) * (len - pos + 1)); + memmove(buf32 + pos - killRing.lastYankSize, restoredText->get(), + sizeof(char32_t) * ucharCount); + } else { + memmove(buf32 + pos - killRing.lastYankSize, restoredText->get(), + sizeof(char32_t) * ucharCount); + memmove(buf32 + pos + ucharCount - killRing.lastYankSize, + buf32 + pos, sizeof(char32_t) * (len - pos + 1)); + } + pos += static_cast(ucharCount - killRing.lastYankSize); + len += static_cast(ucharCount - killRing.lastYankSize); + killRing.lastYankSize = ucharCount; + refreshLine(pi); + if (truncated) { + beep(); + } + break; + } + } + beep(); + break; + +#ifndef _WIN32 + case ctrlChar('Z'): // ctrl-Z, job control + disableRawMode(); // Returning to Linux (whatever) shell, leave raw + // mode + raise(SIGSTOP); // Break out in mid-line + enableRawMode(); // Back from Linux shell, re-enter raw mode + if (!pi.write()) break; // Redraw prompt + refreshLine(pi); // Refresh the line + break; +#endif + + // DEL, delete the character under the cursor + case 127: + case DELETE_KEY: + killRing.lastAction = KillRing::actionOther; + if (len > 0 && pos < len) { + historyRecallMostRecent = false; + memmove(buf32 + pos, buf32 + pos + 1, sizeof(char32_t) * (len - pos)); + --len; + refreshLine(pi); + } + break; + + case META + '<': // meta-<, beginning of history + case PAGE_UP_KEY: // Page Up, beginning of history + case META + '>': // meta->, end of history + case PAGE_DOWN_KEY: // Page Down, end of history + killRing.lastAction = KillRing::actionOther; + // if not already recalling, add the current line to the history list so + // we don't + // have to special case it + if (historyIndex == historyLen - 1) { + free(history[historyLen - 1]); + size_t tempBufferSize = sizeof(char32_t) * len + 1; + unique_ptr tempBuffer(new char[tempBufferSize]); + copyString32to8(tempBuffer.get(), tempBufferSize, buf32); + history[historyLen - 1] = strdup8(tempBuffer.get()); + } + if (historyLen > 1) { + historyIndex = + (c == META + '<' || c == PAGE_UP_KEY) ? 0 : historyLen - 1; + historyPreviousIndex = -2; + historyRecallMostRecent = true; + size_t ucharCount = 0; + copyString8to32(buf32, buflen, ucharCount, history[historyIndex]); + len = pos = static_cast(ucharCount); + refreshLine(pi); + } + break; + + // not one of our special characters, maybe insert it in the buffer + default: + killRing.lastAction = KillRing::actionOther; + historyRecallMostRecent = false; + if (c & (META | CTRL)) { // beep on unknown Ctrl and/or Meta keys + beep(); + break; + } + if (len < buflen) { + if (isControlChar(c)) { // don't insert control characters + beep(); + break; + } + if (len == pos) { // at end of buffer + buf32[pos] = c; + ++pos; + ++len; + buf32[len] = '\0'; + int inputLen = calculateColumnPosition(buf32, len); + if (pi.promptIndentation + inputLen < pi.promptScreenColumns) { + if (inputLen > pi.promptPreviousInputLen) + pi.promptPreviousInputLen = inputLen; + /* Avoid a full update of the line in the + * trivial case. */ + if (write32(1, reinterpret_cast(&c), 1) == -1) + return -1; + } else { + refreshLine(pi); + } + } else { // not at end of buffer, have to move characters to our + // right + memmove(buf32 + pos + 1, buf32 + pos, + sizeof(char32_t) * (len - pos)); + buf32[pos] = c; + ++len; + ++pos; + buf32[len] = '\0'; + refreshLine(pi); + } + } else { + beep(); // buffer is full, beep on new characters + } + break; + } + } + return len; +} + +static string preloadedBufferContents; // used with linenoisePreloadBuffer +static string preloadErrorMessage; + +/** + * linenoisePreloadBuffer provides text to be inserted into the command buffer + * + * the provided text will be processed to be usable and will be used to preload + * the input buffer on the next call to linenoise() + * + * @param preloadText text to begin with on the next call to linenoise() + */ +void linenoisePreloadBuffer(const char* preloadText) { + if (!preloadText) { + return; + } + int bufferSize = static_cast(strlen(preloadText) + 1); + unique_ptr tempBuffer(new char[bufferSize]); + strncpy(&tempBuffer[0], preloadText, bufferSize); + + // remove characters that won't display correctly + char* pIn = &tempBuffer[0]; + char* pOut = pIn; + bool controlsStripped = false; + bool whitespaceSeen = false; + while (*pIn) { + unsigned char c = + *pIn++; // we need unsigned so chars 0x80 and above are allowed + if ('\r' == c) { // silently skip CR + continue; + } + if ('\n' == c || '\t' == c) { // note newline or tab + whitespaceSeen = true; + continue; + } + if (isControlChar( + c)) { // remove other control characters, flag for message + controlsStripped = true; + *pOut++ = ' '; + continue; + } + if (whitespaceSeen) { // convert whitespace to a single space + *pOut++ = ' '; + whitespaceSeen = false; + } + *pOut++ = c; + } + *pOut = 0; + int processedLength = static_cast(pOut - tempBuffer.get()); + bool lineTruncated = false; + if (processedLength > (LINENOISE_MAX_LINE - 1)) { + lineTruncated = true; + tempBuffer[LINENOISE_MAX_LINE - 1] = 0; + } + preloadedBufferContents = tempBuffer.get(); + if (controlsStripped) { + preloadErrorMessage += + " [Edited line: control characters were converted to spaces]\n"; + } + if (lineTruncated) { + preloadErrorMessage += " [Edited line: the line length was reduced from "; + char buf[128]; + snprintf(buf, sizeof(buf), "%d to %d]\n", processedLength, + (LINENOISE_MAX_LINE - 1)); + preloadErrorMessage += buf; + } +} + +/** + * linenoise is a readline replacement. + * + * call it with a prompt to display and it will return a line of input from the + * user + * + * @param prompt text of prompt to display to the user + * @return the returned string belongs to the caller on return and must be + * freed to prevent + * memory leaks + */ +char* linenoise(const char* prompt) { +#ifndef _WIN32 + gotResize = false; +#endif + if (isatty(STDIN_FILENO)) { // input is from a terminal + char32_t buf32[LINENOISE_MAX_LINE]; + char charWidths[LINENOISE_MAX_LINE]; + if (!preloadErrorMessage.empty()) { + printf("%s", preloadErrorMessage.c_str()); + fflush(stdout); + preloadErrorMessage.clear(); + } + PromptInfo pi(prompt, getScreenColumns()); + if (isUnsupportedTerm()) { + if (!pi.write()) return 0; + fflush(stdout); + if (preloadedBufferContents.empty()) { + unique_ptr buf8(new char[LINENOISE_MAX_LINE]); + if (fgets(buf8.get(), LINENOISE_MAX_LINE, stdin) == NULL) { + return NULL; + } + size_t len = strlen(buf8.get()); + while (len && (buf8[len - 1] == '\n' || buf8[len - 1] == '\r')) { + --len; + buf8[len] = '\0'; + } + return strdup(buf8.get()); // caller must free buffer + } else { + char* buf8 = strdup(preloadedBufferContents.c_str()); + preloadedBufferContents.clear(); + return buf8; // caller must free buffer + } + } else { + if (enableRawMode() == -1) { + return NULL; + } + InputBuffer ib(buf32, charWidths, LINENOISE_MAX_LINE); + if (!preloadedBufferContents.empty()) { + ib.preloadBuffer(preloadedBufferContents.c_str()); + preloadedBufferContents.clear(); + } + int count = ib.getInputLine(pi); + disableRawMode(); + printf("\n"); + if (count == -1) { + return NULL; + } + size_t bufferSize = sizeof(char32_t) * ib.length() + 1; + unique_ptr buf8(new char[bufferSize]); + copyString32to8(buf8.get(), bufferSize, buf32); + return strdup(buf8.get()); // caller must free buffer + } + } else { // input not from a terminal, we should work with piped input, i.e. + // redirected stdin + unique_ptr buf8(new char[LINENOISE_MAX_LINE]); + if (fgets(buf8.get(), LINENOISE_MAX_LINE, stdin) == NULL) { + return NULL; + } + + // if fgets() gave us the newline, remove it + int count = static_cast(strlen(buf8.get())); + if (count > 0 && buf8[count - 1] == '\n') { + --count; + buf8[count] = '\0'; + } + return strdup(buf8.get()); // caller must free buffer + } +} + +/* Register a callback function to be called for tab-completion. */ +void linenoiseSetCompletionCallback(linenoiseCompletionCallback* fn) { + completionCallback = fn; +} + +void linenoiseAddCompletion(linenoiseCompletions* lc, const char* str) { + lc->completionStrings.push_back(Utf32String(str)); +} + +int linenoiseHistoryAdd(const char* line) { + if (historyMaxLen == 0) { + return 0; + } + if (history == NULL) { + history = + reinterpret_cast(malloc(sizeof(char8_t*) * historyMaxLen)); + if (history == NULL) { + return 0; + } + memset(history, 0, (sizeof(char*) * historyMaxLen)); + } + char8_t* linecopy = strdup8(line); + if (!linecopy) { + return 0; + } + + // convert newlines in multi-line code to spaces before storing + char8_t* p = linecopy; + while (*p) { + if (*p == '\n') { + *p = ' '; + } + ++p; + } + + // prevent duplicate history entries + if (historyLen > 0 && history[historyLen - 1] != nullptr && + strcmp(reinterpret_cast(history[historyLen - 1]), + reinterpret_cast(linecopy)) == 0) { + free(linecopy); + return 0; + } + + if (historyLen == historyMaxLen) { + free(history[0]); + memmove(history, history + 1, sizeof(char*) * (historyMaxLen - 1)); + --historyLen; + if (--historyPreviousIndex < -1) { + historyPreviousIndex = -2; + } + } + + history[historyLen] = linecopy; + ++historyLen; + return 1; +} + +int linenoiseHistorySetMaxLen(int len) { + if (len < 1) { + return 0; + } + if (history) { + int tocopy = historyLen; + char8_t** newHistory = + reinterpret_cast(malloc(sizeof(char8_t*) * len)); + if (newHistory == NULL) { + return 0; + } + if (len < tocopy) { + tocopy = len; + } + memcpy(newHistory, history + historyMaxLen - tocopy, + sizeof(char8_t*) * tocopy); + free(history); + history = newHistory; + } + historyMaxLen = len; + if (historyLen > historyMaxLen) { + historyLen = historyMaxLen; + } + return 1; +} + +/* Fetch a line of the history by (zero-based) index. If the requested + * line does not exist, NULL is returned. The return value is a heap-allocated + * copy of the line, and the caller is responsible for de-allocating it. */ +char* linenoiseHistoryLine(int index) { + if (index < 0 || index >= historyLen) return NULL; + + return strdup(reinterpret_cast(history[index])); +} + +/* Save the history in the specified file. On success 0 is returned + * otherwise -1 is returned. */ +int linenoiseHistorySave(const char* filename) { +#if _WIN32 + FILE* fp = fopen(filename, "wt"); +#else + int fd = open(filename, O_CREAT | O_TRUNC | O_WRONLY, S_IRUSR | S_IWUSR); + + if (fd < 0) { + return -1; + } + + FILE* fp = fdopen(fd, "wt"); +#endif + + if (fp == NULL) { + return -1; + } + + for (int j = 0; j < historyLen; ++j) { + if (history[j][0] != '\0') { + fprintf(fp, "%s\n", history[j]); + } + } + + fclose(fp); + + return 0; +} + +/* Load the history from the specified file. If the file does not exist + * zero is returned and no operation is performed. + * + * If the file exists and the operation succeeded 0 is returned, otherwise + * on error -1 is returned. */ +int linenoiseHistoryLoad(const char* filename) { + FILE* fp = fopen(filename, "rt"); + if (fp == NULL) { + return -1; + } + + char buf[LINENOISE_MAX_LINE]; + while (fgets(buf, LINENOISE_MAX_LINE, fp) != NULL) { + char* p = strchr(buf, '\r'); + if (!p) { + p = strchr(buf, '\n'); + } + if (p) { + *p = '\0'; + } + if (p != buf) { + linenoiseHistoryAdd(buf); + } + } + fclose(fp); + return 0; +} + +/* Set if to use or not the multi line mode. */ +/* note that this is a stub only, as linenoise-ng always multi-line */ +void linenoiseSetMultiLine(int) {} + +/* This special mode is used by linenoise in order to print scan codes + * on screen for debugging / development purposes. It is implemented + * by the linenoise_example program using the --keycodes option. */ +void linenoisePrintKeyCodes(void) { + char quit[4]; + + printf( + "Linenoise key codes debugging mode.\n" + "Press keys to see scan codes. Type 'quit' at any time to exit.\n"); + if (enableRawMode() == -1) return; + memset(quit, ' ', 4); + while (1) { + char c; + int nread; + +#if _WIN32 + nread = _read(STDIN_FILENO, &c, 1); +#else + nread = read(STDIN_FILENO, &c, 1); +#endif + if (nread <= 0) continue; + memmove(quit, quit + 1, sizeof(quit) - 1); /* shift string to left. */ + quit[sizeof(quit) - 1] = c; /* Insert current char on the right. */ + if (memcmp(quit, "quit", sizeof(quit)) == 0) break; + + printf("'%c' %02x (%d) (type quit to exit)\n", isprint(c) ? c : '?', (int)c, + (int)c); + printf("\r"); /* Go left edge manually, we are in raw mode. */ + fflush(stdout); + } + disableRawMode(); +} + +#ifndef _WIN32 +static void WindowSizeChanged(int) { + // do nothing here but setting this flag + gotResize = true; +} +#endif + +int linenoiseInstallWindowChangeHandler(void) { +#ifndef _WIN32 + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = &WindowSizeChanged; + + if (sigaction(SIGWINCH, &sa, nullptr) == -1) { + return errno; + } +#endif + return 0; +} + +int linenoiseKeyType(void) { + return keyType; +} diff --git a/3rdparty/linenoise-ng/source/wcwidth.cpp b/3rdparty/linenoise-ng/source/wcwidth.cpp new file mode 100644 index 0000000..deec0ba --- /dev/null +++ b/3rdparty/linenoise-ng/source/wcwidth.cpp @@ -0,0 +1,315 @@ +/* + * This is an implementation of wcwidth() and wcswidth() (defined in + * IEEE Std 1002.1-2001) for Unicode. + * + * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html + * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html + * + * In fixed-width output devices, Latin characters all occupy a single + * "cell" position of equal width, whereas ideographic CJK characters + * occupy two such cells. Interoperability between terminal-line + * applications and (teletype-style) character terminals using the + * UTF-8 encoding requires agreement on which character should advance + * the cursor by how many cell positions. No established formal + * standards exist at present on which Unicode character shall occupy + * how many cell positions on character terminals. These routines are + * a first attempt of defining such behavior based on simple rules + * applied to data provided by the Unicode Consortium. + * + * For some graphical characters, the Unicode standard explicitly + * defines a character-cell width via the definition of the East Asian + * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. + * In all these cases, there is no ambiguity about which width a + * terminal shall use. For characters in the East Asian Ambiguous (A) + * class, the width choice depends purely on a preference of backward + * compatibility with either historic CJK or Western practice. + * Choosing single-width for these characters is easy to justify as + * the appropriate long-term solution, as the CJK practice of + * displaying these characters as double-width comes from historic + * implementation simplicity (8-bit encoded characters were displayed + * single-width and 16-bit ones double-width, even for Greek, + * Cyrillic, etc.) and not any typographic considerations. + * + * Much less clear is the choice of width for the Not East Asian + * (Neutral) class. Existing practice does not dictate a width for any + * of these characters. It would nevertheless make sense + * typographically to allocate two character cells to characters such + * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be + * represented adequately with a single-width glyph. The following + * routines at present merely assign a single-cell width to all + * neutral characters, in the interest of simplicity. This is not + * entirely satisfactory and should be reconsidered before + * establishing a formal standard in this area. At the moment, the + * decision which Not East Asian (Neutral) characters should be + * represented by double-width glyphs cannot yet be answered by + * applying a simple rule from the Unicode database content. Setting + * up a proper standard for the behavior of UTF-8 character terminals + * will require a careful analysis not only of each Unicode character, + * but also of each presentation form, something the author of these + * routines has avoided to do so far. + * + * http://www.unicode.org/unicode/reports/tr11/ + * + * Markus Kuhn -- 2007-05-26 (Unicode 5.0) + * + * Permission to use, copy, modify, and distribute this software + * for any purpose and without fee is hereby granted. The author + * disclaims all warranties with regard to this software. + * + * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + */ + +#include +#include +#include + +namespace linenoise_ng { + +struct interval { + char32_t first; + char32_t last; +}; + +/* auxiliary function for binary search in interval table */ +static int bisearch(char32_t ucs, const struct interval *table, int max) { + int min = 0; + int mid; + + if (ucs < table[0].first || ucs > table[max].last) + return 0; + while (max >= min) { + mid = (min + max) / 2; + if (ucs > table[mid].last) + min = mid + 1; + else if (ucs < table[mid].first) + max = mid - 1; + else + return 1; + } + + return 0; +} + + +/* The following two functions define the column width of an ISO 10646 + * character as follows: + * + * - The null character (U+0000) has a column width of 0. + * + * - Other C0/C1 control characters and DEL will lead to a return + * value of -1. + * + * - Non-spacing and enclosing combining characters (general + * category code Mn or Me in the Unicode database) have a + * column width of 0. + * + * - SOFT HYPHEN (U+00AD) has a column width of 1. + * + * - Other format characters (general category code Cf in the Unicode + * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. + * + * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) + * have a column width of 0. + * + * - Spacing characters in the East Asian Wide (W) or East Asian + * Full-width (F) category as defined in Unicode Technical + * Report #11 have a column width of 2. + * + * - All remaining characters (including all printable + * ISO 8859-1 and WGL4 characters, Unicode control characters, + * etc.) have a column width of 1. + * + * This implementation assumes that wchar_t characters are encoded + * in ISO 10646. + */ + +int mk_wcwidth(char32_t ucs) +{ + /* sorted list of non-overlapping intervals of non-spacing characters */ + /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ + static const struct interval combining[] = { + { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, + { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, + { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, + { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, + { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, + { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, + { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 }, + { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, + { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, + { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, + { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, + { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, + { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, + { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, + { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, + { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, + { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, + { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, + { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC }, + { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, + { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, + { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, + { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, + { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, + { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, + { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, + { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, + { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, + { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, + { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F }, + { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, + { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, + { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, + { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, + { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, + { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, + { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 }, + { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF }, + { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 }, + { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F }, + { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, + { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, + { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }, + { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, + { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 }, + { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, + { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F }, + { 0xE0100, 0xE01EF } + }; + + /* test for 8-bit control characters */ + if (ucs == 0) + return 0; + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) + return -1; + + /* binary search in table of non-spacing characters */ + if (bisearch(ucs, combining, + sizeof(combining) / sizeof(struct interval) - 1)) + return 0; + + /* if we arrive here, ucs is not a combining or C0/C1 control character */ + + return 1 + + (ucs >= 0x1100 && + (ucs <= 0x115f || /* Hangul Jamo init. consonants */ + ucs == 0x2329 || ucs == 0x232a || + (ucs >= 0x2e80 && ucs <= 0xa4cf && + ucs != 0x303f) || /* CJK ... Yi */ + (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ + (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ + (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */ + (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ + (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */ + (ucs >= 0xffe0 && ucs <= 0xffe6) || + (ucs >= 0x20000 && ucs <= 0x2fffd) || + (ucs >= 0x30000 && ucs <= 0x3fffd))); +} + + +int mk_wcswidth(const char32_t* pwcs, size_t n) +{ + int w, width = 0; + + for (;*pwcs && n-- > 0; pwcs++) + if ((w = mk_wcwidth(*pwcs)) < 0) + return -1; + else + width += w; + + return width; +} + + +/* + * The following functions are the same as mk_wcwidth() and + * mk_wcswidth(), except that spacing characters in the East Asian + * Ambiguous (A) category as defined in Unicode Technical Report #11 + * have a column width of 2. This variant might be useful for users of + * CJK legacy encodings who want to migrate to UCS without changing + * the traditional terminal character-width behaviour. It is not + * otherwise recommended for general use. + */ +int mk_wcwidth_cjk(wchar_t ucs) +{ + /* sorted list of non-overlapping intervals of East Asian Ambiguous + * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */ + static const struct interval ambiguous[] = { + { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 }, + { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 }, + { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 }, + { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 }, + { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED }, + { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA }, + { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 }, + { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B }, + { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 }, + { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 }, + { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 }, + { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE }, + { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 }, + { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA }, + { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 }, + { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB }, + { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB }, + { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 }, + { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 }, + { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 }, + { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 }, + { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 }, + { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 }, + { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 }, + { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC }, + { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 }, + { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 }, + { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 }, + { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 }, + { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 }, + { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 }, + { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B }, + { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 }, + { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 }, + { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E }, + { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 }, + { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 }, + { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F }, + { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 }, + { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF }, + { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x254B }, + { 0x2550, 0x2573 }, { 0x2580, 0x258F }, { 0x2592, 0x2595 }, + { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 }, + { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 }, + { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 }, + { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF }, { 0x2605, 0x2606 }, + { 0x2609, 0x2609 }, { 0x260E, 0x260F }, { 0x2614, 0x2615 }, + { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 }, + { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 }, + { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F }, + { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF }, + { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD } + }; + + /* binary search in table of non-spacing characters */ + if (bisearch(ucs, ambiguous, + sizeof(ambiguous) / sizeof(struct interval) - 1)) + return 2; + + return mk_wcwidth(ucs); +} + + +int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n) +{ + int w, width = 0; + + for (;*pwcs && n-- > 0; pwcs++) + if ((w = mk_wcwidth_cjk(*pwcs)) < 0) + return -1; + else + width += w; + + return width; +} + +} diff --git a/CMakeLists.txt b/CMakeLists.txt index e16b5a5..b7951c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,6 +171,7 @@ endif() # Project modules # +add_subdirectory(3rdparty) add_subdirectory(source) add_subdirectory(docs) add_subdirectory(deploy) diff --git a/LICENSE b/LICENSE index 54b6182..eb0e4f0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,8 +1,75 @@ -Copyright (c) 2013-2016 Computer Graphics Systems Group at the Hasso-Plattner-Institute, Germany. +Copyright (c) 2013-2016 Computer Graphics Systems Group at the Hasso-Plattner-Institute, Germany. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +linenoise.cpp +============= + +Copyright (c) 2010, Salvatore Sanfilippo +Copyright (c) 2010, Pieter Noordhuis + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Redis nor the names of its contributors may be used + to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + + +wcwidth.cpp +=========== + +Markus Kuhn -- 2007-05-26 (Unicode 5.0) + +Permission to use, copy, modify, and distribute this software +for any purpose and without fee is hereby granted. The author +disclaims all warranties with regard to this software. + + + +ConvertUTF.cpp +============== + +Copyright 2001-2004 Unicode, Inc. + +Disclaimer + +This source code is provided as is by Unicode, Inc. No claims are +made as to fitness for any particular purpose. No warranties of any +kind are expressed or implied. The recipient agrees to determine +applicability of information provided. If this file has been +purchased on magnetic or optical media from Unicode, Inc., the +sole remedy for any claim will be exchange of defective media +within 90 days of receipt. + +Limitations on Rights to Redistribute This Code + +Unicode, Inc. hereby grants the right to freely use the information +supplied in this file in the creation of products supporting the +Unicode Standard, and to make copies of this file in any form +for internal or external distribution as long as this notice +remains attached. diff --git a/source/cppassist/CMakeLists.txt b/source/cppassist/CMakeLists.txt index e9fe210..213c54d 100644 --- a/source/cppassist/CMakeLists.txt +++ b/source/cppassist/CMakeLists.txt @@ -223,6 +223,7 @@ target_include_directories(${target} target_link_libraries(${target} PRIVATE ${ADDITIONAL_LIBRARIES} + externals::linenoise-ng PUBLIC ${DEFAULT_LIBRARIES} diff --git a/source/cppassist/include/cppassist/string/conversion.h b/source/cppassist/include/cppassist/string/conversion.h index 50bade8..4719749 100644 --- a/source/cppassist/include/cppassist/string/conversion.h +++ b/source/cppassist/include/cppassist/string/conversion.h @@ -25,6 +25,7 @@ enum class Encoding : unsigned int ANSI , ASCII = Encoding::ANSI // for both ANSI and ASCII 1 byte in a std::string or char is used to encode it , UTF8 +, UTF16 }; //@} @@ -99,14 +100,14 @@ CPPASSIST_API std::string toString(const bool & value); * */ CPPASSIST_API std::u32string encode(const std::string & input, Encoding encoding); -//CPPASSIST_API std::u32string encode(const std::wstring & input, Encoding encoding); -//CPPASSIST_API std::u32string encode(const std::u16string & input, Encoding encoding); -//CPPASSIST_API std::u32string encode(const char * input, size_t size, Encoding encoding); - -//CPPASSIST_API void decode(const std::u32string & input, std::string & output, Encoding encoding); -//CPPASSIST_API void decode(const std::u32string & input, std::wstring & output, Encoding encoding); -//CPPASSIST_API void decode(const std::u32string & input, std::u16string & output, Encoding encoding); -//CPPASSIST_API void decode(const std::u32string & input, const char * & output, std::size_t size, Encoding encoding); +CPPASSIST_API std::u32string encode(const std::wstring & input, Encoding encoding); +CPPASSIST_API std::u32string encode(const std::u16string & input, Encoding encoding); +CPPASSIST_API std::u32string encode(const char * input, const size_t size, Encoding encoding); + +CPPASSIST_API void decode(const std::u32string & input, std::string & output, Encoding encoding); +CPPASSIST_API void decode(const std::u32string & input, std::wstring & output, Encoding encoding); +CPPASSIST_API void decode(const std::u32string & input, std::u16string & output, Encoding encoding); +CPPASSIST_API void decode(const std::u32string & input, char * & output, std::size_t & size, Encoding encoding); //@} diff --git a/source/cppassist/source/string/conversion.cpp b/source/cppassist/source/string/conversion.cpp index df3037f..3c3dbc1 100644 --- a/source/cppassist/source/string/conversion.cpp +++ b/source/cppassist/source/string/conversion.cpp @@ -5,48 +5,84 @@ #include #include -#ifndef __has_include - #define __has_include(x) 0 -#endif - -#if defined(__GNUG__) && !defined(__clang__) && (__GNUG__ < 5) -// not implemented for GCC < 5 -#define CPPASSIST_CODECVT_AVAILABLE 0 -#elif defined(__clang__) && !__has_include() -// not implemented for clang without codecvt header -#define CPPASSIST_CODECVT_AVAILABLE 0 -#else -#include -#include -#define CPPASSIST_CODECVT_AVAILABLE 1 -#endif +#include namespace { -// [TODO]: probably rename to decodeUTF8 as it is UTF-8 -> UCS4 -void encodeUTF8(const std::string & input, std::u32string & output) +void convertUTF8toUTF32(const std::string & input, std::u32string & output) { -#if CPPASSIST_CODECVT_AVAILABLE - #if defined(_MSC_VER) && (_MSC_VER >= 1900) - // MSVC 2015 and 2017 were compiled using uint32_t instead of char32_t, see https://social.msdn.microsoft.com/Forums/expression/en-US/8f40dcd8-c67f-4eba-9134-a19b9178e481/vs-2015-rc-linker-stdcodecvt-error?forum=vcgeneral - static std::wstring_convert, uint32_t> conversion; + output.resize(input.size()); - const auto temp = conversion.from_bytes(input); - output = std::u32string(temp.begin(), temp.end()); - #else - static std::wstring_convert, char32_t> conversion; + const uint8_t * inStart = (uint8_t*) input.data(); + const uint8_t * inEnd = (uint8_t*) input.data() + input.size(); + uint32_t * outStart = (uint32_t*) output.data(); + uint32_t * outEnd = (uint32_t*) output.data() + output.size(); - output = conversion.from_bytes(input); - #endif -#else - #pragma message "encodeUTF8 not implemented since it depends on codecvt" - output.clear(); - output.reserve(input.size()); - std::copy(input.begin(), input.end(), std::back_inserter(output)); -#endif + uint32_t * outStartSaved = (uint32_t*) output.data(); + + // TODO: Error handling + auto error = linenoise_ng::ConvertUTF8toUTF32(&inStart, inEnd, &outStart, outEnd, linenoise_ng::strictConversion); + + const auto outSize = outStart - outStartSaved; + output.resize(outSize); +} + +void convertUTF16toUTF32(const std::u16string & input, std::u32string & output) +{ + output.resize(input.size()); + + const uint16_t * inStart = (uint16_t*) input.data(); + const uint16_t * inEnd = (uint16_t*) input.data() + input.size(); + uint32_t * outStart = (uint32_t*) output.data(); + uint32_t * outEnd = (uint32_t*) output.data() + output.size(); + + uint32_t * outStartSaved = (uint32_t*) output.data(); + + // TODO: Error handling + auto error = linenoise_ng::ConvertUTF16toUTF32(&inStart, inEnd, &outStart, outEnd, linenoise_ng::strictConversion); + + const auto outSize = outStart - outStartSaved; + output.resize(outSize); +} + +void convertUTF32toUTF8(const std::u32string & input, std::string & output) +{ + output.resize(input.size() * 6); + + const uint32_t * inStart = (uint32_t*) input.data(); + const uint32_t * inEnd = (uint32_t*) input.data() + input.size(); + uint8_t * outStart = (uint8_t*) output.data(); + uint8_t * outEnd = (uint8_t*) output.data() + output.size(); + + uint8_t * outStartSaved = (uint8_t*) output.data(); + + // TODO: Error handling + auto error = linenoise_ng::ConvertUTF32toUTF8(&inStart, inEnd, &outStart, outEnd, linenoise_ng::strictConversion); + + const auto outSize = outStart - outStartSaved; + output.resize(outSize); +} + +void convertUTF32toUTF16(const std::u32string & input, std::u16string & output) +{ + // Note: char16_t instead of uint16_t because of an inconsistency in linenoise-ng + output.resize(input.size() * 2); + + const uint32_t * inStart = (uint32_t*) input.data(); + const uint32_t * inEnd = (uint32_t*) input.data() + input.size(); + char16_t * outStart = (char16_t*) output.data(); + char16_t * outEnd = (char16_t*) output.data() + output.size(); + + char16_t * outStartSaved = (char16_t*) output.data(); + + // TODO: Error handling + auto error = linenoise_ng::ConvertUTF32toUTF16(&inStart, inEnd, &outStart, outEnd, linenoise_ng::strictConversion); + + const auto outSize = outStart - outStartSaved; + output.resize(outSize); } @@ -128,7 +164,7 @@ std::u32string encode(const std::string & input, const Encoding encoding) break; case Encoding::UTF8: - encodeUTF8(input, output); + convertUTF8toUTF32(input, output); break; default: @@ -138,40 +174,122 @@ std::u32string encode(const std::string & input, const Encoding encoding) return output; } -//std::u32string encode(const std::wstring & input, const Encoding encoding) -//{ -// assert(false); -//} -// -//std::u32string encode(const std::u16string & input, const Encoding encoding) -//{ -// assert(false); -//} -// -//std::u32string encode(const char * input, size_t size, const Encoding encoding) -//{ -// assert(false); -//} -// -//void decode(const std::u32string & input, std::string & output, const Encoding encoding) -//{ -// assert(false); -//} -// -//void decode(const std::u32string & input, std::wstring & output, const Encoding encoding) -//{ -// assert(false); -//} -// -//void decode(const std::u32string & input, std::u16string & output, const Encoding encoding) -//{ -// assert(false); -//} -// -//void decode(const std::u32string & input, const char * & output, size_t size, const Encoding encoding) -//{ -// assert(false); -//} +std::u32string encode(const std::wstring & input, const Encoding encoding) +{ + switch (sizeof(wchar_t)) + { + case 1: + return encode(std::string(input.begin(), input.end()), encoding); + + case 2: + return encode(std::u16string(input.begin(), input.end()), encoding); + + case 4: + return std::u32string(input.begin(), input.end()); + + default: + assert(false); + } +} + +std::u32string encode(const std::u16string & input, const Encoding encoding) +{ + auto output = std::u32string(); + + switch (encoding) + { + case Encoding::ANSI: + //case Encoding::ASCII: + std::copy(input.begin(), input.end(), std::back_inserter(output)); + break; + + case Encoding::UTF16: + convertUTF16toUTF32(input, output); + break; + + default: + assert(false); + } + + return output; +} + +std::u32string encode(const char * input, const size_t size, const Encoding encoding) +{ + return encode(std::string(input, size), encoding); +} + +void decode(const std::u32string & input, std::string & output, const Encoding encoding) +{ + output.clear(); + + switch (encoding) + { + case Encoding::ANSI: + //case Encoding::ASCII: + std::copy(input.begin(), input.end(), std::back_inserter(output)); + break; + + case Encoding::UTF8: + convertUTF32toUTF8(input, output); + break; + } +} + +void decode(const std::u32string & input, std::wstring & output, const Encoding encoding) +{ + switch(sizeof(wchar_t)) + { + case 1: + { + std::string temp; + decode(input, temp, encoding); + output = std::wstring(temp.begin(), temp.end()); + } + + case 2: + { + std::u16string temp; + decode(input, temp, encoding); + output = std::wstring(temp.begin(), temp.end()); + } + + case 4: + output = std::wstring(input.begin(), input.end()); + + default: + assert(false); + } +} + +void decode(const std::u32string & input, std::u16string & output, const Encoding encoding) +{ + output.clear(); + + switch (encoding) + { + case Encoding::ANSI: + //case Encoding::ASCII: + std::copy(input.begin(), input.end(), std::back_inserter(output)); + break; + + case Encoding::UTF16: + convertUTF32toUTF16(input, output); + break; + } +} + +void decode(const std::u32string & input, char * & output, size_t & size, const Encoding encoding) +{ + std::string temp; + decode(input, temp, encoding); + + size = temp.size(); + free(output); + output = (char*)malloc(size + 1); + temp.copy(output, temp.npos); + output[size] = 0x0; +} std::string toLower(const std::string & input) diff --git a/source/tests/cppassist-test/conversion_test.cpp b/source/tests/cppassist-test/conversion_test.cpp index 81c12d6..fe190fb 100644 --- a/source/tests/cppassist-test/conversion_test.cpp +++ b/source/tests/cppassist-test/conversion_test.cpp @@ -276,3 +276,82 @@ TEST_F(conversion_test, toUpper_uniRef) ASSERT_EQ("TOUPPER", output); } + +// from https://unicode-table.com/ +// U+1D11E - Musical Symbol G Clef +// Encoding Hex Dec Bytes +// UTF-8 F0 9D 84 9E 240 157 132 158 +// UTF-16BE D8 34 DD 1E 216 52 221 30 +// UTF-16LE 34 D8 1E DD 52 216 30 221 +// UTF-32BE 00 01 D1 1E 0 1 209 30 +// UTF-32LE 1E D1 01 00 30 209 1 0 + +// Big endian as we use a big endian converter +const char utf8Val [5] = {0xF0, 0x9D, 0x84, 0x9E, 0x0}; +const char16_t utf16Val[3] = {0xD834, 0xDD1E, 0x0}; +const char32_t utf32Val[2] = {0x0001D11E, 0x0}; + +TEST_F(conversion_test, utf_encode_string) +{ + std::string input(utf8Val); + std::u32string expected(utf32Val); + + auto output = string::encode(input, Encoding::UTF8); + + EXPECT_EQ(expected, output); +} + +TEST_F(conversion_test, utf_encode_u16string) +{ + std::u16string input(utf16Val); + std::u32string expected(utf32Val); + + auto output = string::encode(input, Encoding::UTF16); + + EXPECT_EQ(expected, output); +} + +TEST_F(conversion_test, utf_encode_cstr) +{ + const char* input = utf8Val; + const size_t inSize = 4; + std::u32string expected(utf32Val); + + auto output = string::encode(input, inSize, Encoding::UTF8); +} + +TEST_F(conversion_test, utf_decode_string) +{ + std::u32string input(utf32Val); + std::string expected(utf8Val); + std::string output; + + string::decode(input, output, Encoding::UTF8); + + EXPECT_EQ(expected, output); +} + +TEST_F(conversion_test, utf_decode_u16string) +{ + std::u32string input(utf32Val); + std::u16string expected(utf16Val); + std::u16string output; + + string::decode(input, output, Encoding::UTF16); + + EXPECT_EQ(expected, output); +} + +TEST_F(conversion_test, utf_decode_cstr) +{ + std::u32string input(utf32Val); + const char* expected = utf8Val; + char* output = nullptr; + size_t outSize = 0; + + string::decode(input, output, outSize, Encoding::UTF8); + + ASSERT_NE(nullptr, output); + EXPECT_EQ(4, outSize); + EXPECT_STREQ(expected, output); +} From 0f8f2e1eea422a652ebb65c486021c3090960483 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20Kn=C3=B6schke?= Date: Thu, 6 Sep 2018 15:40:04 +0200 Subject: [PATCH 2/3] Fix GCC errors & warnings --- source/cppassist/source/string/conversion.cpp | 6 ++++++ source/tests/cppassist-test/conversion_test.cpp | 16 +++++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/source/cppassist/source/string/conversion.cpp b/source/cppassist/source/string/conversion.cpp index 3c3dbc1..5917c2c 100644 --- a/source/cppassist/source/string/conversion.cpp +++ b/source/cppassist/source/string/conversion.cpp @@ -233,6 +233,9 @@ void decode(const std::u32string & input, std::string & output, const Encoding e case Encoding::UTF8: convertUTF32toUTF8(input, output); break; + + default: + assert(false); } } @@ -276,6 +279,9 @@ void decode(const std::u32string & input, std::u16string & output, const Encodin case Encoding::UTF16: convertUTF32toUTF16(input, output); break; + + default: + assert(false); } } diff --git a/source/tests/cppassist-test/conversion_test.cpp b/source/tests/cppassist-test/conversion_test.cpp index fe190fb..cc9518b 100644 --- a/source/tests/cppassist-test/conversion_test.cpp +++ b/source/tests/cppassist-test/conversion_test.cpp @@ -287,13 +287,13 @@ TEST_F(conversion_test, toUpper_uniRef) // UTF-32LE 1E D1 01 00 30 209 1 0 // Big endian as we use a big endian converter -const char utf8Val [5] = {0xF0, 0x9D, 0x84, 0x9E, 0x0}; -const char16_t utf16Val[3] = {0xD834, 0xDD1E, 0x0}; -const char32_t utf32Val[2] = {0x0001D11E, 0x0}; +const unsigned char utf8Val[5] = {0xF0, 0x9D, 0x84, 0x9E, 0x0}; +const char16_t utf16Val[3] = {0xD834, 0xDD1E, 0x0}; +const char32_t utf32Val[2] = {0x0001D11E, 0x0}; TEST_F(conversion_test, utf_encode_string) { - std::string input(utf8Val); + std::string input((char*)utf8Val); std::u32string expected(utf32Val); auto output = string::encode(input, Encoding::UTF8); @@ -313,17 +313,19 @@ TEST_F(conversion_test, utf_encode_u16string) TEST_F(conversion_test, utf_encode_cstr) { - const char* input = utf8Val; + const char* input = (char*)utf8Val; const size_t inSize = 4; std::u32string expected(utf32Val); auto output = string::encode(input, inSize, Encoding::UTF8); + + EXPECT_EQ(expected, output); } TEST_F(conversion_test, utf_decode_string) { std::u32string input(utf32Val); - std::string expected(utf8Val); + std::string expected((char*)utf8Val); std::string output; string::decode(input, output, Encoding::UTF8); @@ -345,7 +347,7 @@ TEST_F(conversion_test, utf_decode_u16string) TEST_F(conversion_test, utf_decode_cstr) { std::u32string input(utf32Val); - const char* expected = utf8Val; + const char* expected = (char*)utf8Val; char* output = nullptr; size_t outSize = 0; From 7af41c2e99f5eca39573487196438fbb3c9055d4 Mon Sep 17 00:00:00 2001 From: Willy Scheibel Date: Tue, 16 Oct 2018 10:33:24 +0200 Subject: [PATCH 3/3] Fix warnings --- 3rdparty/linenoise-ng/CMakeLists.txt | 4 ++-- source/cppassist/source/string/conversion.cpp | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/3rdparty/linenoise-ng/CMakeLists.txt b/3rdparty/linenoise-ng/CMakeLists.txt index 3f5d1bd..8cfb9c7 100644 --- a/3rdparty/linenoise-ng/CMakeLists.txt +++ b/3rdparty/linenoise-ng/CMakeLists.txt @@ -97,7 +97,7 @@ target_compile_definitions(${target} PRIVATE PUBLIC - $<$>:${target_upper}_STATIC_DEFINE> + ${target_upper}_STATIC_DEFINE ${DEFAULT_COMPILE_DEFINITIONS} INTERFACE @@ -112,7 +112,7 @@ target_compile_options(${target} PRIVATE PUBLIC - ${DEFAULT_COMPILE_OPTIONS} + # ${DEFAULT_COMPILE_OPTIONS} INTERFACE ) diff --git a/source/cppassist/source/string/conversion.cpp b/source/cppassist/source/string/conversion.cpp index 5917c2c..1ca324b 100644 --- a/source/cppassist/source/string/conversion.cpp +++ b/source/cppassist/source/string/conversion.cpp @@ -23,10 +23,9 @@ void convertUTF8toUTF32(const std::string & input, std::u32string & output) uint32_t * outStartSaved = (uint32_t*) output.data(); - // TODO: Error handling auto error = linenoise_ng::ConvertUTF8toUTF32(&inStart, inEnd, &outStart, outEnd, linenoise_ng::strictConversion); - const auto outSize = outStart - outStartSaved; + const auto outSize = error ? 0 : outStart - outStartSaved; output.resize(outSize); } @@ -41,10 +40,9 @@ void convertUTF16toUTF32(const std::u16string & input, std::u32string & output) uint32_t * outStartSaved = (uint32_t*) output.data(); - // TODO: Error handling auto error = linenoise_ng::ConvertUTF16toUTF32(&inStart, inEnd, &outStart, outEnd, linenoise_ng::strictConversion); - const auto outSize = outStart - outStartSaved; + const auto outSize = error ? 0 : outStart - outStartSaved; output.resize(outSize); } @@ -59,10 +57,9 @@ void convertUTF32toUTF8(const std::u32string & input, std::string & output) uint8_t * outStartSaved = (uint8_t*) output.data(); - // TODO: Error handling auto error = linenoise_ng::ConvertUTF32toUTF8(&inStart, inEnd, &outStart, outEnd, linenoise_ng::strictConversion); - const auto outSize = outStart - outStartSaved; + const auto outSize = error ? 0 : outStart - outStartSaved; output.resize(outSize); } @@ -78,10 +75,9 @@ void convertUTF32toUTF16(const std::u32string & input, std::u16string & output) char16_t * outStartSaved = (char16_t*) output.data(); - // TODO: Error handling auto error = linenoise_ng::ConvertUTF32toUTF16(&inStart, inEnd, &outStart, outEnd, linenoise_ng::strictConversion); - const auto outSize = outStart - outStartSaved; + const auto outSize = error ? 0 : outStart - outStartSaved; output.resize(outSize); } @@ -248,6 +244,7 @@ void decode(const std::u32string & input, std::wstring & output, const Encoding std::string temp; decode(input, temp, encoding); output = std::wstring(temp.begin(), temp.end()); + break; } case 2: @@ -255,13 +252,16 @@ void decode(const std::u32string & input, std::wstring & output, const Encoding std::u16string temp; decode(input, temp, encoding); output = std::wstring(temp.begin(), temp.end()); + break; } case 4: output = std::wstring(input.begin(), input.end()); + break; default: assert(false); + break; } }