Skip to content

Commit

Permalink
Merge pull request #79 from fsteggink/hide_pwd_in_pg_string
Browse files Browse the repository at this point in the history
Hide passwords in Postgres connection strings which are logged to the console
  • Loading branch information
justb4 committed Sep 10, 2018
2 parents abd1041 + f8f5bdd commit 4d07e58
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 13 deletions.
12 changes: 6 additions & 6 deletions stetl/inputs/ogrinput.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def gdal_error_handler(err_class, err_num, err_msg):

# Report failure if failed
if self.data_source_p is None:
log.error("Cannot open OGR datasource: %s with the following drivers." % self.data_source)
log.error("Cannot open OGR datasource: %s with the following drivers." % Util.safe_string_value(self.data_source))

for iDriver in range(self.ogr.GetDriverCount()):
log.info(" -> " + self.ogr.GetDriver(iDriver).GetName())
Expand All @@ -126,11 +126,11 @@ def gdal_error_handler(err_class, err_num, err_msg):
self.layer_count = self.data_source_p.GetLayerCount()
self.layer_idx = 0

log.info("Opened OGR source ok: %s layer count=%d" % (self.data_source, self.layer_count))
log.info("Opened OGR source ok: %s layer count=%d" % (Util.safe_string_value(self.data_source), self.layer_count))

def read(self, packet):
if not self.data_source_p:
log.info("End reading from: %s" % self.data_source)
log.info("End reading from: %s" % Util.safe_string_value(self.data_source))
return packet

if self.layer is None:
Expand All @@ -145,11 +145,11 @@ def read(self, packet):
if self.layer is None:
log.error("Could not fetch layer %d" % 0)
raise Exception()
log.info("Start reading from OGR Source: %s, Layer: %s" % (self.data_source, self.layer.GetName()))
log.info("Start reading from OGR Source: %s, Layer: %s" % (Util.safe_string_value(self.data_source), self.layer.GetName()))
else:
# No more Layers left: cleanup
packet.set_end_of_stream()
log.info("Closing OGR source: %s" % self.data_source)
log.info("Closing OGR source: %s" % Util.safe_string_value(self.data_source))
# Destroy not required anymore: http://trac.osgeo.org/gdal/wiki/PythonGotchas
# self.data_source_p.Destroy()
self.data_source_p = None
Expand Down Expand Up @@ -314,7 +314,7 @@ def init(self):
self.cmd = self.cmd.split('|')

def exec_cmd(self):
log.info("start ogr2ogr cmd = %s" % repr(self.cmd))
log.info("start ogr2ogr cmd = %s" % Util.safe_string_value(repr(self.cmd)))
self.ogr_process = subprocess.Popen(self.cmd,
shell=False,
stdout=subprocess.PIPE,
Expand Down
2 changes: 1 addition & 1 deletion stetl/outputs/execoutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def execute_cmd(self, cmd):

try:
os.environ.update(env_vars)
log.info("executing cmd=%s" % cmd)
log.info("executing cmd=%s" % Util.safe_string_value(cmd))
subprocess.call(cmd, shell=True)
log.info("execute done")
finally:
Expand Down
8 changes: 4 additions & 4 deletions stetl/outputs/ogroutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def gdal_error_handler(err_class, err_num, err_msg):
if self.dest_fd is None:
self.dest_fd = self.dest_driver.CreateDataSource(self.dest_data_source, options=self.dest_create_options)
if self.dest_fd is None:
log.error("%s driver failed to create %s" % (self.dest_format, self.dest_data_source))
log.error("%s driver failed to create %s" % (self.dest_format, Util.safe_string_value(self.dest_data_source)))
raise Exception()

# /* -------------------------------------------------------------------- */
Expand All @@ -218,7 +218,7 @@ def gdal_error_handler(err_class, err_num, err_msg):
self.layer_create_options)
self.feature_def = None

log.info("Opened OGR dest ok: %s " % self.dest_data_source)
log.info("Opened OGR dest ok: %s " % Util.safe_string_value(self.dest_data_source))

def write(self, packet):

Expand All @@ -228,7 +228,7 @@ def write(self, packet):
return packet

if self.layer is None:
log.info("No Layer, end writing to: %s" % self.dest_data_source)
log.info("No Layer, end writing to: %s" % Util.safe_string_value(self.dest_data_source))
return packet

# Assume ogr_feature_array input, otherwise convert ogr_feature to list
Expand Down Expand Up @@ -268,7 +268,7 @@ def write_feature(self, feature):
def write_end(self, packet):
# Destroy not required anymore: http://trac.osgeo.org/gdal/wiki/PythonGotchas
# self.dest_fd.Destroy()
log.info("End writing to: %s" % self.dest_data_source)
log.info("End writing to: %s" % Util.safe_string_value(self.dest_data_source))
self.dest_fd = None
self.layer = None
return packet
Expand Down
37 changes: 35 additions & 2 deletions stetl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,26 @@
#
# Author:Just van den Broecke

import glob
import logging
import os
import glob
import re
import types
from time import time
from ConfigParser import ConfigParser

logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(name)s %(levelname)s %(message)s')

# Constants for precompiled regular expressions
RE_PG_START = re.compile(r'\bPG:', flags=re.IGNORECASE)
RE_PG_PWD = re.compile(r'\bpassword=[^\'"]\S*', flags=re.IGNORECASE)
RE_PG_PWD_DBL = re.compile(r'\bpassword="(?:[^"\\]|\\.)*"', flags=re.IGNORECASE)
RE_PG_PWD_SNG = re.compile(r'\bpassword=\'(?:[^\'\\]|\\.)*\'', flags=re.IGNORECASE)
RE_PG_USER = re.compile(r'\buser=[^\'"]\S*', flags=re.IGNORECASE)
RE_PG_USER_DBL = re.compile(r'\buser="(?:[^"\\]|\\.)*"', flags=re.IGNORECASE)
RE_PG_USER_SNG = re.compile(r'\buser=\'(?:[^\'\\]|\\.)*\'', flags=re.IGNORECASE)


# Static utility methods
class Util:
Expand Down Expand Up @@ -348,6 +358,24 @@ def xpath_get(mydict, path):

return elem

# Hide user names and passwords in string values, like the Postgres connection string as used by GDAL/OGR
# See https://stackoverflow.com/questions/249791/regex-for-quoted-string-with-escaping-quotes for the escaped quotes expressions
@staticmethod
def safe_string_value(value, hide_value='***'):
# PostgreSQL connection strings as used by GDAL/OGR
if RE_PG_START.search(value) is not None:
value = RE_PG_PWD.sub('password=%s' % hide_value, value)
value = RE_PG_PWD_DBL.sub('password="%s"' % hide_value, value)
value = RE_PG_PWD_SNG.sub('password=\'%s\'' % hide_value, value)

value = RE_PG_USER.sub('user=%s' % hide_value, value)
value = RE_PG_USER_DBL.sub('user="%s"' % hide_value, value)
value = RE_PG_USER_SNG.sub('user=\'%s\'' % hide_value, value)

# Add more cases as needed ...

return value


log = Util.get_log("util")

Expand Down Expand Up @@ -488,9 +516,14 @@ def to_string(self):
# Need to hide some sensitive values, usually used for logging
safe_copy = self.config_dict.copy()
hides = ['passw', 'pasw', 'token', 'user']
hide_value = '<hidden>'

for key in safe_copy:
for hide_key in hides:
if hide_key in key.lower():
safe_copy[key] = '<hidden>'
safe_copy[key] = hide_value

# Also hide usernames/passwords in string values, like Postgres connection strings used by GDAL/OGR
safe_copy[key] = Util.safe_string_value(safe_copy[key], hide_value)

return repr(safe_copy)
47 changes: 47 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# testing: to be called by nosetests

import os
from ast import literal_eval

from stetl.etl import ETL
from stetl.util import ConfigSection
from tests.stetl_test_case import StetlTestCase


class UtilTest(StetlTestCase):
"""Basic util tests"""

def setUp(self):
super(UtilTest, self).setUp()

def test_configsection_to_string(self):
cfg = {
'name': 'Stetl',
'password': 'something',
'paswoord': 'iets',
'token': 'abc123',
'user': 'John',
'username': 'Jane',
'gebruiker': 'Jan',
'ogrconn': 'PG:dbname=mydb host=myhost port=myport user=myuser password=mypassword active_schema=myschema',
'ogrconn_singlequotes': 'PG:dbname=\'mydb\' host=\'myhost\' port=\'myport\' user=\'myuser\' password=\'mypassword\' active_schema=\'myschema\'',
'ogrconn_doublequotes': 'PG:dbname="mydb" host="myhost" port="myport" user="myuser" password="mypassword" active_schema="myschema"',
'ogrconn_crazypwd1': 'PG:dbname=\'mydb\' host=\'myhost\' port=\'myport\' user=\'myuser\' password=\'my\\\'crazy\\"password\' active_schema=\'myschema\'',
'ogrconn_crazypwd2': 'PG:dbname="mydb" host="myhost" port="myport" user="myuser" password="my\\\'crazy\\"password" active_schema="myschema"',
'ogrconn_dkk': '"PG:dbname=mydb host=myhost port=myport user=myuser password=mypassword active_schema=myschema"',
}
obj = literal_eval(ConfigSection(cfg).to_string())

self.assertEqual('Stetl', obj['name'])
self.assertEqual('<hidden>', obj['password'])
self.assertEqual('<hidden>', obj['paswoord'])
self.assertEqual('<hidden>', obj['token'])
self.assertEqual('<hidden>', obj['user'])
self.assertEqual('<hidden>', obj['username'])
self.assertEqual('Jan', obj['gebruiker'])
self.assertEqual('PG:dbname=mydb host=myhost port=myport user=<hidden> password=<hidden> active_schema=myschema', obj['ogrconn'])
self.assertEqual('PG:dbname=\'mydb\' host=\'myhost\' port=\'myport\' user=\'<hidden>\' password=\'<hidden>\' active_schema=\'myschema\'', obj['ogrconn_singlequotes'])
self.assertEqual('PG:dbname="mydb" host="myhost" port="myport" user="<hidden>" password="<hidden>" active_schema="myschema"', obj['ogrconn_doublequotes'])
self.assertEqual('PG:dbname=\'mydb\' host=\'myhost\' port=\'myport\' user=\'<hidden>\' password=\'<hidden>\' active_schema=\'myschema\'', obj['ogrconn_crazypwd1'])
self.assertEqual('PG:dbname="mydb" host="myhost" port="myport" user="<hidden>" password="<hidden>" active_schema="myschema"', obj['ogrconn_crazypwd2'])
self.assertEqual('"PG:dbname=mydb host=myhost port=myport user=<hidden> password=<hidden> active_schema=myschema"', obj['ogrconn_dkk'])

0 comments on commit 4d07e58

Please sign in to comment.