From c8be12521f5b2eb47532b43e0d2e0c326a03c1b3 Mon Sep 17 00:00:00 2001 From: Tom Walsh Date: Mon, 4 Oct 2021 11:24:08 -0400 Subject: [PATCH 1/9] Fixes #401 Networking capabilities should be documented --- .bumpversion.cfg | 2 +- docs/docs/Best_Practices/.pages | 3 +- .../Best_Practices/Runtime_Environment.md | 62 +++++++++++++ docs/docs/Best_Practices/Strings.md | 90 +++++++++++++++++++ docs/docs/Best_Practices/Unicode_Data.md | 29 ------ docs/docs/References/Platform_Libraries.md | 4 +- docs/readme.md | 17 ++-- 7 files changed, 169 insertions(+), 38 deletions(-) create mode 100644 docs/docs/Best_Practices/Runtime_Environment.md create mode 100644 docs/docs/Best_Practices/Strings.md delete mode 100644 docs/docs/Best_Practices/Unicode_Data.md diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 8213c309..1aca6827 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 3.1.0 +current_version = 3.1.0.01 commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? diff --git a/docs/docs/Best_Practices/.pages b/docs/docs/Best_Practices/.pages index 65a6ee2d..ceea0305 100644 --- a/docs/docs/Best_Practices/.pages +++ b/docs/docs/Best_Practices/.pages @@ -4,7 +4,8 @@ arrange: - Managing_Scripts_For_Remote_Execution.md - User_Visible_Errors.md - Sensitive_Data.md - - Unicode_Data.md + - Strings.md + - Runtime_Environment.md - Working_with_Powershell.md - Scratch_Paths.md - Message_Limits.md diff --git a/docs/docs/Best_Practices/Runtime_Environment.md b/docs/docs/Best_Practices/Runtime_Environment.md new file mode 100644 index 00000000..c95e16f7 --- /dev/null +++ b/docs/docs/Best_Practices/Runtime_Environment.md @@ -0,0 +1,62 @@ +# Plugin Runtime Environment + +## Process Lifetime +Plugin code runs inside of a Python interpreter process on the Delphix Engine. + +A fair question to ask is "What is the lifetime of this interpreter process?" After all, if the interpreter +process runs for a long time, then the plugin might be able to store things in memory for later access. + +Unfortunately, **there are no guarantees about process lifetime**. Your interpreter process could last two years, or it could last 400 microseconds. There is no way to know or predict this ahead of time. + +So, do not make any assumptions about interpreter process lifetime in your plugin code. + + +## Available Modules +Our Python 2.7 runtime environment only contains the [Python Standard Library](https://docs.python.org/2/library/). No additional Python modules/libraries are available. + +If you want to use some Python module that is not part of the standard library, you might be able to do so. +You would need to include that library as part of your plugin. That would involve downloading the source +code for that module, and copying it into your source directory. + +### Warnings +There are two major things to watch out for if you decide to incorporate a 3rd-party library. + +1) Make sure you're legally allowed to do so! The licensing agreement on the module will decide if, and +under what circumstances, you're allowed to make copies of, and redistribute the module. Some modules will +allow this, some will disallow this, and some will allow this for a fee. + +2) Some Python libraries include native code (often written in C or C++). There is no support for using +such libraries with plugin code. The reason for this is that native code needs to be +specially compiled and built for the machine that it the library will be running on. And, unfortunately, +the machine your plugin runs on (the Delphix Engine) is likely very different from the machine you use +to develop and build your plugin. + +## Network Access +As of Delphix Engine version 6.0.11, plugin code is able to use the network directly. No network access is +possible in earlier versions. + +For example, suppose your plugin wants to talk to some DBMS running on some remote host. +If the DBMS supports it, your plugin code might be able to connect to the DBMS server and talk to the +DBMS directly. This can avoid the need to do DBMS operations via running Bash/Powershell code on the remote host. + +What your plugin can access depends entirely on the customer. Some customers will set up their Delphix +Engines such that plugins have full access to the entire internet. Some will completely restrict the network +so that the plugin can only access a small handful of remote hosts. + +If your plugin has any specific network requirements, it's recommended to try, in your code, to confirm that these requirements are met. For example, the plugin could make such a check in the +`discovery.repository()` operation, and throw an error if the check fails. + +### Example +```python +import httplib +import json + +dbms_port = 5432 + +# Directly contact our DBMS's REST server to get a list of databases +def list_databases(remote_ip): + cx = httplib.HTTPConnection(remote_ip, dbms_port) + cx.request("GET", "/databases") + response = cx.getresponse() + return json.loads(response.read()) +``` diff --git a/docs/docs/Best_Practices/Strings.md b/docs/docs/Best_Practices/Strings.md new file mode 100644 index 00000000..d5060f0a --- /dev/null +++ b/docs/docs/Best_Practices/Strings.md @@ -0,0 +1,90 @@ +# Working With Strings + +Unfortunately, Python 2.7 makes it very easy to accidentally write string-related code that will sometimes work, but sometimes fail (especially for people who are not using English). Read on for some tips for how to avoid this. + +## The Two String Types +Python 2.7 has two different types that are both called "strings". One represents +a sequence of **bytes**, and the other represents a sequence of **characters**. + +```python +# The default string (aka 'str object') represents bytes, +my_bytes = "This string is a sequence of bytes" + +# A 'Unicode object' represents characters (note the u just before the quote) +my_characters = u"This string is a sequence of characters" +``` + +## Unicode Strings Are Preferred + +There are a couple of reasons to prefer the "unicode object" over the "str object". + +First, in most cases, we care about characters, and we're not particularly interested in which bytes +are used to represent those characters. That is, we might care that we have a "letter H" followed by a "letter I", but it's usually irrelevant to us what byte values happen to be used. + +Second, there are lots of different schemes available which give rules for how to represent characters as bytes. These schemes are called "encodings"; some examples include "ASCII", "UTF-8", "Shift-JIS", and "UCS-2". Each encoding uses different rules about which characters are represented by which bytes. + +A "str object" doesn't know anything about encodings... it is just a sequence of bytes. So, when a programmer is working with one of these byte strings, they have to know which encoding rules are in play. + +In order to avoid problems, **we recommend using Unicode strings everywhere** in your plugin code. + +## Delphix I/O + +Your plugin will sometimes need to send strings back and forth to Delphix code. There are two supported formats for doing this. Any time you receive a string from Delphix, it will be in one of the two following forms. This includes arguments to your plugin operations, and return values from "Delphix Libs" functions. Likewise, any time you send a string to Delphix, it must be in one of these two forms. + +Acceptable forms: + +1. A Unicode string (recommended) +2. A "str object" (byte string) that uses the UTF-8 encoding + +## Converting Between Types + +Sometimes (hopefully rarely!), you might find yourself needing to convert back and forth between byte strings and character strings. For example, you might need to read or write a file on a remote system that is required to use some specific encoding. Here's how to do that: + +```python +# Converting from a character string ("unicode") to a byte string ("str") +my_utf8_byte_string = my_character_string.encode("utf-8") +my_utf16_byte_string = my_character_string.encode("utf-16") + +# Converting from a byte string to a character string +my_character_string1 = my_utf8_byte_string.decode("utf-8") +my_character_string2 = my_utf16_byte_string.decode("utf-16") +my_character_string3 = my_ascii_byte_string.decode("ascii") +``` + +Things to note: + +- `encode` goes from characters to bytes. `decode` goes from bytes to characters. +- If you try to `encode` a character string using the `ascii` encoding, but your character string contains non-ascii characters, you'll get an error. More generally: some encodings will error out with some characters. +- If you don't specify an encoding, Python will supply a default. But, there's a good chance the default will be wrong for your use case. So, always specify the encoding! +- Don't try to `encode` a byte string. If you do this, Python will "helpfully" insert an implicit `decode` first, which tends to cause very confusing error messages. Likewise, don't try to `decode` a character string. +- `utf-8` is likely the best encoding to use for most situations. It accepts all characters, does not have issues with byte ordering, and is understood by most systems. This is not true of most other encodings. + +## Using Non-ASCII characters in Python files + +Python 2.7 source code files are assumed to use the "ASCII" encoding, unless told otherwise. Unfortunately, ASCII is an obsolete encoding that only knows how to deal with a small number of characters, and only really supports American English. + +In order to use non-ASCII characters in your source code, you need to use a different encoding than ASCII, and you need to tell the Python interpreter which encoding you're using. In Python 2.7, this is done with a "magic" comment at the very top of each file. + +Here is an example of the first line of a Python file that uses the UTF-8 encoding: +```python +# -*- coding: utf-8 -*- +``` + +If you do not specify an encoding, and the source code contains any non-ASCII characters, you will get errors + when building the plugin using [dvp build](/References/CLI.md#build) or during the execution of a plugin operation. + +### Example + +```python +# -*- coding: utf-8 -*- +from dlpx.virtualization.platform import Plugin +from dlpx.virtualization import libs +from generated.definitions import RepositoryDefinition + +plugin = Plugin() + +@plugin.discovery.repository() +def repository_discovery(source_connection): + # Create a repository with name that uses non-ASCII characters + return [RepositoryDefinition(name=u"Théâtre")] +``` diff --git a/docs/docs/Best_Practices/Unicode_Data.md b/docs/docs/Best_Practices/Unicode_Data.md deleted file mode 100644 index 06b6f88a..00000000 --- a/docs/docs/Best_Practices/Unicode_Data.md +++ /dev/null @@ -1,29 +0,0 @@ -# Working with Unicode Data - -To use unicode characters in the plugin code, the following lines should be included at top of the plugin code: - -```python -#!/usr/bin/env python -# -*- coding: utf-8 -*- -``` - -Otherwise, there may be errors when building the plugin using [dvp build](/References/CLI.md#build) or during the execution of a plugin operation. - -## Example - -```python -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from dlpx.virtualization.platform import Plugin -from dlpx.virtualization import libs -from generated.definitions import RepositoryDefinition - -plugin = Plugin() - -@plugin.discovery.repository() -def repository_discovery(source_connection): - # Create a repository with name ☃ - command = 'echo ☃' - result = libs.run_bash(source_connection, command) - return [RepositoryDefinition(name=result.stdout)] -``` \ No newline at end of file diff --git a/docs/docs/References/Platform_Libraries.md b/docs/docs/References/Platform_Libraries.md index bc940db6..672fc105 100644 --- a/docs/docs/References/Platform_Libraries.md +++ b/docs/docs/References/Platform_Libraries.md @@ -1,5 +1,5 @@ # Platform Libraries -Set of functions that plugins can use these for executing remote commands, etc. +Delphix provides a set of functions that plugins can use for executing remote commands, etc. ## retrieve_credentials @@ -101,7 +101,7 @@ response = libs.run_bash(connection, command) Running a bash script that is saved in a directory. ```python - + import pkgutil from dlpx.virtualization import libs diff --git a/docs/readme.md b/docs/readme.md index ece74f2f..89fa73bf 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -2,6 +2,13 @@ This is the Markdown-based documentation for the Virtualization SDK. +## Important Note On Building Docs + +As of this writing, the rest of the Virtualization SDK codebase is based on Python 2. +However, our docs infrastructure is based on Python 3! So, **all of the below commands +must be run in a Python 3 environment**. It's recommended to use a totally separate +virtual environment for docs work than the one you use in the rest of the SDK codebase. + ## Local Testing Install dependencies for building documentation and run `pipenv run mkdocs serve` @@ -13,8 +20,8 @@ To activate this project's virtualenv, run pipenv shell. Alternatively, run a command inside the virtualenv with pipenv run. $ pipenv run mkdocs serve -INFO - Building documentation... -INFO - Cleaning site directory +INFO - Building documentation... +INFO - Cleaning site directory [I 200424 15:54:06 server:292] Serving on http://127.0.0.1:8000 [I 200424 15:54:06 handlers:59] Start watching changes [I 200424 15:54:06 handlers:61] Start detecting changes @@ -59,7 +66,7 @@ Install `setuptools==45` to get around a deprecated API in version 46. $ pip install setuptools==45 Collecting setuptools==45 Downloading setuptools-45.0.0-py2.py3-none-any.whl (583 kB) - |████████████████████████████████| 583 kB 2.7 MB/s + |████████████████████████████████| 583 kB 2.7 MB/s Installing collected packages: setuptools Attempting uninstall: setuptools Found existing installation: setuptools 46.1.3 @@ -85,13 +92,13 @@ This will generate the `site` directory which will contain all the gererated doc 5. Go to your individual virtualization-sdk repo's settings, scroll to the bottom and verify under the GitHub Pages section the `Source` is set to `gh-pages branch`. 6. Right above this will be a link explaining where your docs are published. -You can also utilize the GitHub workflow for publishing docs (`.github/workflows/publish-docs.yml`) associated with a pull request. +You can also utilize the GitHub workflow for publishing docs (`.github/workflows/publish-docs.yml`) associated with a pull request. The workflow is present on the `develop` branch. Create a branch called `docs/x.y.z` off `develop` on your fork of the repository to ensure that your docs branch triggers the workflow. If you have more than one `docs/x.y.z` branch in your fork, you have to push your doc changes to the docs branch with the latest `x.y.z` version. Otherwise, the workflow won't run. You also have to make sure to choose `gh-pages` branch on your fork as the [publishing source](https://help.github.com/en/github/working-with-github-pages/configuring-a-publishing-source-for-your-github-pages-site#choosing-a-publishing-source). Once you push doc changes to the `docs/.x.y.z` branch, the docs site should be available under -`.github.io/virtualization-sdk` shortly after. You can see the status of publishing under +`.github.io/virtualization-sdk` shortly after. You can see the status of publishing under `https://github.com//virtualization-sdk/actions`. This is a fast way to give a preview of your changes in a pull request. From f641967c9492eb7a5201b21f2f8f0f72a903a4d6 Mon Sep 17 00:00:00 2001 From: Tom Walsh Date: Mon, 4 Oct 2021 14:08:59 -0400 Subject: [PATCH 2/9] revert bad version change --- .bumpversion.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 1aca6827..8213c309 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 3.1.0.01 +current_version = 3.1.0 commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? From e5b290fd539959856a23ea84ac56a4ac36097384 Mon Sep 17 00:00:00 2001 From: Tom Walsh Date: Mon, 4 Oct 2021 14:09:17 -0400 Subject: [PATCH 3/9] Bump to first dev patch version --- .bumpversion.cfg | 2 +- common/src/main/python/dlpx/virtualization/common/VERSION | 2 +- dvp/src/main/python/dlpx/virtualization/VERSION | 2 +- libs/src/main/python/dlpx/virtualization/libs/VERSION | 2 +- platform/src/main/python/dlpx/virtualization/platform/VERSION | 2 +- tools/src/main/python/dlpx/virtualization/_internal/VERSION | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 8213c309..2109f8cb 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 3.1.0 +current_version = 3.1.1.dev0 commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? diff --git a/common/src/main/python/dlpx/virtualization/common/VERSION b/common/src/main/python/dlpx/virtualization/common/VERSION index fd2a0186..701ebec1 100644 --- a/common/src/main/python/dlpx/virtualization/common/VERSION +++ b/common/src/main/python/dlpx/virtualization/common/VERSION @@ -1 +1 @@ -3.1.0 +3.1.1.dev0 diff --git a/dvp/src/main/python/dlpx/virtualization/VERSION b/dvp/src/main/python/dlpx/virtualization/VERSION index fd2a0186..701ebec1 100644 --- a/dvp/src/main/python/dlpx/virtualization/VERSION +++ b/dvp/src/main/python/dlpx/virtualization/VERSION @@ -1 +1 @@ -3.1.0 +3.1.1.dev0 diff --git a/libs/src/main/python/dlpx/virtualization/libs/VERSION b/libs/src/main/python/dlpx/virtualization/libs/VERSION index fd2a0186..701ebec1 100644 --- a/libs/src/main/python/dlpx/virtualization/libs/VERSION +++ b/libs/src/main/python/dlpx/virtualization/libs/VERSION @@ -1 +1 @@ -3.1.0 +3.1.1.dev0 diff --git a/platform/src/main/python/dlpx/virtualization/platform/VERSION b/platform/src/main/python/dlpx/virtualization/platform/VERSION index fd2a0186..701ebec1 100644 --- a/platform/src/main/python/dlpx/virtualization/platform/VERSION +++ b/platform/src/main/python/dlpx/virtualization/platform/VERSION @@ -1 +1 @@ -3.1.0 +3.1.1.dev0 diff --git a/tools/src/main/python/dlpx/virtualization/_internal/VERSION b/tools/src/main/python/dlpx/virtualization/_internal/VERSION index fd2a0186..701ebec1 100644 --- a/tools/src/main/python/dlpx/virtualization/_internal/VERSION +++ b/tools/src/main/python/dlpx/virtualization/_internal/VERSION @@ -1 +1 @@ -3.1.0 +3.1.1.dev0 From 3cf4c96068101c7f75144692507211162b38ebff Mon Sep 17 00:00:00 2001 From: Tom Walsh Date: Tue, 5 Oct 2021 08:32:45 -0400 Subject: [PATCH 4/9] Fix version test --- .../python/dlpx/virtualization/_internal/test_package_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/src/test/python/dlpx/virtualization/_internal/test_package_util.py b/tools/src/test/python/dlpx/virtualization/_internal/test_package_util.py index 87f63c08..db5e5a3f 100644 --- a/tools/src/test/python/dlpx/virtualization/_internal/test_package_util.py +++ b/tools/src/test/python/dlpx/virtualization/_internal/test_package_util.py @@ -10,7 +10,7 @@ class TestPackageUtil: @staticmethod def test_get_version(): - assert package_util.get_version() == '3.1.0' + assert package_util.get_version() == '3.1.1.dev0' @staticmethod def test_get_virtualization_api_version(): From 97fda46e3e450268a0bc429499992df81cf558e1 Mon Sep 17 00:00:00 2001 From: Tom Walsh Date: Fri, 8 Oct 2021 11:14:04 -0400 Subject: [PATCH 5/9] Review feedback --- docs/docs/Best_Practices/Code_Sharing.md | 10 +++++----- docs/docs/Best_Practices/Runtime_Environment.md | 17 +++++++++-------- docs/docs/Best_Practices/Strings.md | 4 ++-- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/docs/docs/Best_Practices/Code_Sharing.md b/docs/docs/Best_Practices/Code_Sharing.md index 5640e6a2..64f4c4a0 100644 --- a/docs/docs/Best_Practices/Code_Sharing.md +++ b/docs/docs/Best_Practices/Code_Sharing.md @@ -2,7 +2,7 @@ All Python modules inside of `srcDir` can be imported just as they would be if the plugin was executing locally. When a plugin operation is executed `srcDir` is the current working directory so all imports need to be relative to `srcDir` regardless of the path of the module doing the import. -Please refer to Python's [documentation on modules](https://docs.python.org/2/tutorial/modules.html#modules) to learn more about modules and imports. +Please refer to Python's [documentation on modules](https://docs.python.org/2/tutorial/modules.html#modules) to learn more about modules and imports. For more information about using others' code inside your plugin, see [Available Modules](/Best_Practices/Runtime_Environment.md#Available_Modules) ## Example @@ -31,7 +31,7 @@ Any module in the plugin could import `execution_util.py` with `from utils impor !!! warning "Gotcha" Since the platform uses Python 2.7, every directory needs to have an `__init__.py` file in it otherwise the modules and resources in the folder will not be found at runtime. For more information on `__init__.py` files refer to Python's [documentation on packages](https://docs.python.org/2/tutorial/modules.html#packages). - + Note that the `srcDir` in the plugin config file (`src` in this example) does _not_ need an `__init__.py` file. Assume `schema.json` contains: @@ -117,8 +117,8 @@ def find_schemas(source_connection, repository): return [SourceConfigDefinition(name=name) for name in schema_names] ``` !!! note - Even though `discovery.py` is in the `operations` package, the import for `execution_util` is still relative to the `srcDir` specified in the plugin config file. `execution_util` is in the `utils` package so it is imported with `from utils import execution_util`. - + Even though `discovery.py` is in the `operations` package, the import for `execution_util` is still relative to the `srcDir` specified in the plugin config file. `execution_util` is in the `utils` package so it is imported with `from utils import execution_util`. + ### execution_util.py `execution_util.py ` has two methods `execute_sql` and `execute_shell`. `execute_sql` takes the name of a SQL script in `resources/` and executes it with `resources/execute_sql.sh`. `execute_shell` takes the name of a shell script in `resources/` and executes it. @@ -147,4 +147,4 @@ def execute_shell(source_connection, script_name): ``` !!! note - Both `execute_sql` and `execute_shell` use the `check` parameter which will cause an error to be raised if the exit code is non-zero. For more information refer to the `run_bash` [documentation](/References/Platform_Libraries.md#run_bash). \ No newline at end of file + Both `execute_sql` and `execute_shell` use the `check` parameter which will cause an error to be raised if the exit code is non-zero. For more information refer to the `run_bash` [documentation](/References/Platform_Libraries.md#run_bash). diff --git a/docs/docs/Best_Practices/Runtime_Environment.md b/docs/docs/Best_Practices/Runtime_Environment.md index c95e16f7..aadaf9cf 100644 --- a/docs/docs/Best_Practices/Runtime_Environment.md +++ b/docs/docs/Best_Practices/Runtime_Environment.md @@ -16,7 +16,7 @@ Our Python 2.7 runtime environment only contains the [Python Standard Library](h If you want to use some Python module that is not part of the standard library, you might be able to do so. You would need to include that library as part of your plugin. That would involve downloading the source -code for that module, and copying it into your source directory. +code for that module, and copying it into your source directory. For more information on how to lay out code in your source directory, see [Code Sharing](/Best_Practices/Code_Sharing.md). ### Warnings There are two major things to watch out for if you decide to incorporate a 3rd-party library. @@ -32,19 +32,13 @@ the machine your plugin runs on (the Delphix Engine) is likely very different fr to develop and build your plugin. ## Network Access -As of Delphix Engine version 6.0.11, plugin code is able to use the network directly. No network access is +As of Delphix Engine version 6.0.11.0, plugin code is able to use the network directly. No network access is possible in earlier versions. For example, suppose your plugin wants to talk to some DBMS running on some remote host. If the DBMS supports it, your plugin code might be able to connect to the DBMS server and talk to the DBMS directly. This can avoid the need to do DBMS operations via running Bash/Powershell code on the remote host. -What your plugin can access depends entirely on the customer. Some customers will set up their Delphix -Engines such that plugins have full access to the entire internet. Some will completely restrict the network -so that the plugin can only access a small handful of remote hosts. - -If your plugin has any specific network requirements, it's recommended to try, in your code, to confirm that these requirements are met. For example, the plugin could make such a check in the -`discovery.repository()` operation, and throw an error if the check fails. ### Example ```python @@ -60,3 +54,10 @@ def list_databases(remote_ip): response = cx.getresponse() return json.loads(response.read()) ``` + +What your plugin can access depends entirely on the customer. Some customers will set up their Delphix +Engines such that plugins have full access to the entire internet. Some will completely restrict the network +so that the plugin can only access a small handful of remote hosts. + +If your plugin has any specific network requirements, it's recommended to try, in your code, to confirm that these requirements are met. For example, the plugin could make such a check in the +`discovery.repository()` operation, and throw an error if the check fails. Like any other requirement, this should of course be documented. diff --git a/docs/docs/Best_Practices/Strings.md b/docs/docs/Best_Practices/Strings.md index d5060f0a..1dfb5f5c 100644 --- a/docs/docs/Best_Practices/Strings.md +++ b/docs/docs/Best_Practices/Strings.md @@ -7,7 +7,7 @@ Python 2.7 has two different types that are both called "strings". One represent a sequence of **bytes**, and the other represents a sequence of **characters**. ```python -# The default string (aka 'str object') represents bytes, +# The default string (aka 'str object') represents bytes my_bytes = "This string is a sequence of bytes" # A 'Unicode object' represents characters (note the u just before the quote) @@ -63,7 +63,7 @@ Things to note: Python 2.7 source code files are assumed to use the "ASCII" encoding, unless told otherwise. Unfortunately, ASCII is an obsolete encoding that only knows how to deal with a small number of characters, and only really supports American English. -In order to use non-ASCII characters in your source code, you need to use a different encoding than ASCII, and you need to tell the Python interpreter which encoding you're using. In Python 2.7, this is done with a "magic" comment at the very top of each file. +In order to include non-ASCII characters in your source code, you need to use a different encoding than ASCII, and you need to tell the Python interpreter which encoding you're using. In Python 2.7, this is done with a "magic" comment at the very top of each file. Here is an example of the first line of a Python file that uses the UTF-8 encoding: ```python From ca840e89fa49eb9907e132c1b5848f484a3739c4 Mon Sep 17 00:00:00 2001 From: Tom Walsh Date: Tue, 26 Oct 2021 12:31:22 -0400 Subject: [PATCH 6/9] Bump dev version after merge --- .bumpversion.cfg | 7 ++++--- common/src/main/python/dlpx/virtualization/common/VERSION | 2 +- dvp/src/main/python/dlpx/virtualization/VERSION | 2 +- libs/src/main/python/dlpx/virtualization/libs/VERSION | 2 +- .../src/main/python/dlpx/virtualization/platform/VERSION | 2 +- .../src/main/python/dlpx/virtualization/_internal/VERSION | 2 +- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 4a0abcca..a2376d5e 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,16 +1,16 @@ [bumpversion] -current_version = 3.2.0.dev0 +current_version = 3.2.0.dev1 commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? -serialize = +serialize = {major}.{minor}.{patch}.{release}{dev} {major}.{minor}.{patch} [bumpversion:part:release] optional_value = prod first_value = dev -values = +values = dev prod @@ -25,3 +25,4 @@ values = [bumpversion:file:./libs/src/main/python/dlpx/virtualization/libs/VERSION] [bumpversion:file:./tools/src/main/python/dlpx/virtualization/_internal/VERSION] + diff --git a/common/src/main/python/dlpx/virtualization/common/VERSION b/common/src/main/python/dlpx/virtualization/common/VERSION index 745e2a57..36559fa8 100644 --- a/common/src/main/python/dlpx/virtualization/common/VERSION +++ b/common/src/main/python/dlpx/virtualization/common/VERSION @@ -1 +1 @@ -3.2.0.dev0 +3.2.0.dev1 diff --git a/dvp/src/main/python/dlpx/virtualization/VERSION b/dvp/src/main/python/dlpx/virtualization/VERSION index 745e2a57..36559fa8 100644 --- a/dvp/src/main/python/dlpx/virtualization/VERSION +++ b/dvp/src/main/python/dlpx/virtualization/VERSION @@ -1 +1 @@ -3.2.0.dev0 +3.2.0.dev1 diff --git a/libs/src/main/python/dlpx/virtualization/libs/VERSION b/libs/src/main/python/dlpx/virtualization/libs/VERSION index 745e2a57..36559fa8 100644 --- a/libs/src/main/python/dlpx/virtualization/libs/VERSION +++ b/libs/src/main/python/dlpx/virtualization/libs/VERSION @@ -1 +1 @@ -3.2.0.dev0 +3.2.0.dev1 diff --git a/platform/src/main/python/dlpx/virtualization/platform/VERSION b/platform/src/main/python/dlpx/virtualization/platform/VERSION index 745e2a57..36559fa8 100644 --- a/platform/src/main/python/dlpx/virtualization/platform/VERSION +++ b/platform/src/main/python/dlpx/virtualization/platform/VERSION @@ -1 +1 @@ -3.2.0.dev0 +3.2.0.dev1 diff --git a/tools/src/main/python/dlpx/virtualization/_internal/VERSION b/tools/src/main/python/dlpx/virtualization/_internal/VERSION index 745e2a57..36559fa8 100644 --- a/tools/src/main/python/dlpx/virtualization/_internal/VERSION +++ b/tools/src/main/python/dlpx/virtualization/_internal/VERSION @@ -1 +1 @@ -3.2.0.dev0 +3.2.0.dev1 From c02c8e07388caa0f28321174af62ecf22d951bb8 Mon Sep 17 00:00:00 2001 From: Tom Walsh Date: Tue, 26 Oct 2021 12:32:00 -0400 Subject: [PATCH 7/9] Bump dev version in test --- .../python/dlpx/virtualization/_internal/test_package_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/src/test/python/dlpx/virtualization/_internal/test_package_util.py b/tools/src/test/python/dlpx/virtualization/_internal/test_package_util.py index 143362b5..faece936 100644 --- a/tools/src/test/python/dlpx/virtualization/_internal/test_package_util.py +++ b/tools/src/test/python/dlpx/virtualization/_internal/test_package_util.py @@ -10,7 +10,7 @@ class TestPackageUtil: @staticmethod def test_get_version(): - assert package_util.get_version() == '3.2.0.dev0' + assert package_util.get_version() == '3.2.0.dev1' @staticmethod def test_get_virtualization_api_version(): From bbdc8201acd30dd00558a05bbd4c7dcfd5fff33f Mon Sep 17 00:00:00 2001 From: Tom Walsh Date: Thu, 16 Dec 2021 15:47:44 -0500 Subject: [PATCH 8/9] Fixes #412 Make it easier to construct simple Bash commands --- docs/docs/References/Platform_Libraries.md | 33 +++++++- .../python/dlpx/virtualization/libs/libs.py | 76 +++++++++++++++++++ .../python/dlpx/virtualization/test_libs.py | 32 ++++++++ 3 files changed, 139 insertions(+), 2 deletions(-) diff --git a/docs/docs/References/Platform_Libraries.md b/docs/docs/References/Platform_Libraries.md index 82392d48..38494823 100644 --- a/docs/docs/References/Platform_Libraries.md +++ b/docs/docs/References/Platform_Libraries.md @@ -88,12 +88,13 @@ print response.stderr ##### Using parameters to construct a bash command. +Note: see the helper function [construct_bash_command_string](#construct_bash_command_string) ```python from dlpx.virtualization import libs name = virtual_source.parameters.username port = virtual_source.parameters.port -command = "mysqldump -u {} -p {}".format(name,port) +command = libs.construct_bash_command_string("mysqldump", "-u", name, "-p", port) response = libs.run_bash(connection, command) ``` @@ -124,6 +125,34 @@ response = libs.run_bash(connection, command) ``` For more information please go to [Managing Scripts for Remote Execution](/Best_Practices/Managing_Scripts_For_Remote_Execution.md) section. +## construct_bash_command_string + +Constructs a full Bash command string from an executable name and list of args. + +This helper function is intended to help with the simple case of running a remote +command with a set of arguments. By using this function, you won't have to worry +about the details of escaping/quoting special characters. + +### Signature +`def construct_bash_command_string(executable, args)` + +### Arguments +Argument | Type | Description +-------- | ---- | ----------- +executable | String | Name of the command. This can be the full path to the command, the name of a command that is found on the path, or a shell builtin. +args | List of Strings | Arguments to the command + +### Returns +A string representing a full Bash command line, that can be passed to run_bash. + +### Example +```python +old_name = "A filename with spaces in it" +new_name = "The file's new $100 name" # <-- note special characters! +move_command = libs.construct_bash_command_string("mv", old_name, new_name) +libs.run_bash(cx, move_command) +``` + ## run_expect Executes a tcl command or script on a remote Unix host. @@ -151,7 +180,7 @@ stderr | String | Stderr from the command. ### Example -Calling expect with an inline command. +Calling expect with an inline command. ```python from dlpx.virtualization import libs diff --git a/libs/src/main/python/dlpx/virtualization/libs/libs.py b/libs/src/main/python/dlpx/virtualization/libs/libs.py index 50b57d5c..01ad2473 100644 --- a/libs/src/main/python/dlpx/virtualization/libs/libs.py +++ b/libs/src/main/python/dlpx/virtualization/libs/libs.py @@ -43,6 +43,7 @@ __all__ = [ "run_bash", + "construct_bash_command_string", "run_sync", "run_powershell", "run_expect", @@ -95,6 +96,81 @@ def _check_exit_code(response, check): response.return_value.stdout, response.return_value.stderr)) +def _quote_bash_word(unquoted_string): + """ + This function takes the given input string, and returns a string that can + be passed to Bash, such that Bash will interpret it as a unitary word. + + So, the incoming string can have quotes, dollar signs, spaces, etc. These + characters will be quoted/escaped such that Bash does not interpret them + specially. + + The returned string can be used as part of a command string passed to + run_bash. + + The technique is to enclose everything in single quotes, except for single- + quote characters, which are enclosed by double quotes. + + For example, consider this python string that we want to pass to a program: + a'b"c$f$o$o + + We cannot simply pass this string to Bash as-is, because Bash would treat + the dollar signs and quote characters specially. + + For this case, this function would then return the following string: + 'a'"'"'c$f$o$o' + + Breaking this down, the returned string contains three separate items: + 1) 'a' (a single-quoted string containing no special characters) + 2) "'" (a double-quoted string containing a special character) + 3) 'c$f$o$o' (a single-quoted string containing some special characters) + Because of the quoting, none of the special characters above will be + interpreted by Bash, and will be left as-is. Bash will remove the outer + quotes on each of these three parts, and then mash the three parts together + into one string. + + Thus, once Bash does its interpretation of the string, the result will have + the same content as the original Python string: + a'b"c$f$o$o + """ + + # We want to enclose the whole string in single quotes. + # But, any time we see a single-quote in the given string, we need to: + # 1) Close the existing single-quoted section + # 2) Start a new double-quoted section + # 3) Insert the original single-quote character + # 4) Close the double-quoted section that we just opened + # 5) Open a new single-quoted section for the remainder of the string + return "'{}'".format(unquoted_string.replace("'", "'\"'\"'")) + + +def construct_bash_command_string(executable, *args): + """ + Helper function to assemble a full command string to pass to run_bash. + + run_bash expects a single string representing the entire command/script that + is to be run. This is sometimes inconvenient and bug-prone for the simple + case when you want to run a single command with some arguments. + + For example, suppose you want to change a filename. You might be tempted to + write this: + run_bash(cx, "mv {} {}".format(oldname, newname)) + + That code will fail to work if either of the two names has a space or other + special character in it. This function is meant to help with such cases. + So, the above example can change to this: + run_bash(cx, construct_bash_command_string("mv", oldname, newname)) + + This method will worry about all of the quoting and escaping necessary. + """ + assert executable + exec_string = _quote_bash_word(executable) + if args: + arg_string = " ".join([_quote_bash_word(a) for a in args]) + return "{} {}".format(exec_string, arg_string) + else: + return exec_string + def run_bash(remote_connection, command, variables=None, use_login_shell=False, check=False): diff --git a/libs/src/test/python/dlpx/virtualization/test_libs.py b/libs/src/test/python/dlpx/virtualization/test_libs.py index abf8eba0..f997218f 100644 --- a/libs/src/test/python/dlpx/virtualization/test_libs.py +++ b/libs/src/test/python/dlpx/virtualization/test_libs.py @@ -236,6 +236,38 @@ def test_run_bash_bad_use_login_shell(remote_connection): " class 'str' but should be of class 'bool' if defined.") +class TestLibsConstructBashCommand: + @staticmethod + def test_no_args(): + result = libs.construct_bash_command_string("/path/to/executable") + assert result == "'/path/to/executable'" + + @staticmethod + def test_single_arg(): + result = libs.construct_bash_command_string("foo", "bar") + assert result == "'foo' 'bar'" + + @staticmethod + def test_many_args(): + result = libs.construct_bash_command_string("a", "b", "c", "d", "e") + assert result == "'a' 'b' 'c' 'd' 'e'" + + @staticmethod + def test_single_quote_escaping(): + result = libs.construct_bash_command_string("a'b'c", "e'f'g") + assert result == "'a'\"'\"'b'\"'\"'c' 'e'\"'\"'f'\"'\"'g'" + + @staticmethod + def test_double_quote_escaping(): + result = libs.construct_bash_command_string("a\"b\"c", "e\"f\"g") + assert result == "'a\"b\"c' 'e\"f\"g'" + + @staticmethod + def test_combined_escaping(): + result = libs.construct_bash_command_string("a'b\"c$f$o$o") + assert result == "'a'\"'\"'b\"c$f$o$o'" + + class TestLibsRunSync: @staticmethod def test_run_sync(remote_connection): From 3167ab130305e3bd2a2a2ad6c36aa3133ac4046c Mon Sep 17 00:00:00 2001 From: Tom Walsh Date: Fri, 17 Dec 2021 09:32:40 -0500 Subject: [PATCH 9/9] Fix lint issue --- libs/src/main/python/dlpx/virtualization/libs/libs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/src/main/python/dlpx/virtualization/libs/libs.py b/libs/src/main/python/dlpx/virtualization/libs/libs.py index 01ad2473..f5d11be5 100644 --- a/libs/src/main/python/dlpx/virtualization/libs/libs.py +++ b/libs/src/main/python/dlpx/virtualization/libs/libs.py @@ -96,6 +96,7 @@ def _check_exit_code(response, check): response.return_value.stdout, response.return_value.stderr)) + def _quote_bash_word(unquoted_string): """ This function takes the given input string, and returns a string that can