From 9ffaf0ba811a896a3e328a311ba61a4c7cfe43d1 Mon Sep 17 00:00:00 2001 From: Alexey Porotnikov Date: Sun, 19 Mar 2017 18:57:13 +0200 Subject: [PATCH 1/2] encode/decode UTF-8 in POMs --- jip/cache.py | 5 +++-- jip/maven.py | 2 +- jip/util.py | 1 + test/install_test.py | 5 +++++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/jip/cache.py b/jip/cache.py index 45ada51..7e4503b 100644 --- a/jip/cache.py +++ b/jip/cache.py @@ -19,6 +19,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # +import codecs from jip.repository import MavenRepos from jip.util import get_virtual_home @@ -54,7 +55,7 @@ def download_jar(self, artifact, local_path=None): def download_pom(self, artifact): path = self.get_artifact_uri(artifact, 'pom') if os.path.exists(path): - f = open(path, 'r') + f = codecs.open(path, mode='r', encoding='utf-8') data = f.read() f.close() return data @@ -63,7 +64,7 @@ def download_pom(self, artifact): def put_pom(self, artifact, data): path = self.get_artifact_uri(artifact, 'pom') - f = open(path, 'w') + f = codecs.open(path, mode='w', encoding='utf-8') f.write(data) f.close() diff --git a/jip/maven.py b/jip/maven.py index 73ed6a3..14a1c5a 100644 --- a/jip/maven.py +++ b/jip/maven.py @@ -110,7 +110,7 @@ def get_element_tree(self): ## we use this dirty method to remove namesapce attribute so that elementtree will use default empty namespace pom_string = re.sub(r"", '', self.pom_string, 1) parser = ElementTree.XMLParser(target=WhitespaceNormalizer()) - parser.feed(pom_string) + parser.feed(pom_string.encode('utf-8')) self.eletree = parser.close() return self.eletree diff --git a/jip/util.py b/jip/util.py index bdefdc2..5e29ae5 100644 --- a/jip/util.py +++ b/jip/util.py @@ -71,6 +71,7 @@ def download_string(url): import requests try: response = requests.get(url, headers={ 'User-Agent': JIP_USER_AGENT}) + response.encoding = 'utf-8' response.raise_for_status() return response.text except requests.exceptions.RequestException: diff --git a/test/install_test.py b/test/install_test.py index 16ce314..7b2039a 100644 --- a/test/install_test.py +++ b/test/install_test.py @@ -18,6 +18,11 @@ def testResolve(self): artifacts = _resolve_artifacts([junit]) self.assertEqual(len(artifacts), 1) + def testResolve2(self): + junit = Artifact('de.l3s.boilerpipe', 'boilerpipe', '1.1.0') + artifacts = _resolve_artifacts([junit]) + self.assertEqual(len(artifacts), 1) + def testExclusion(self): pig = Artifact('org.apache.pig', 'pig', '0.8.3') exclusion = [Artifact(*x.split(":")) for x in ['ant:ant', 'junit:junit','org.eclipse.jdt:core']] From abc7dc2765982348e9bbc200b896d2e92ce4d9c6 Mon Sep 17 00:00:00 2001 From: Alexey Porotnikov Date: Mon, 20 Mar 2017 11:56:09 +0200 Subject: [PATCH 2/2] Correcting for PR comments --- jip/util.py | 1 - test/install_test.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/jip/util.py b/jip/util.py index 5e29ae5..bdefdc2 100644 --- a/jip/util.py +++ b/jip/util.py @@ -71,7 +71,6 @@ def download_string(url): import requests try: response = requests.get(url, headers={ 'User-Agent': JIP_USER_AGENT}) - response.encoding = 'utf-8' response.raise_for_status() return response.text except requests.exceptions.RequestException: diff --git a/test/install_test.py b/test/install_test.py index 7b2039a..f7d81a3 100644 --- a/test/install_test.py +++ b/test/install_test.py @@ -18,9 +18,9 @@ def testResolve(self): artifacts = _resolve_artifacts([junit]) self.assertEqual(len(artifacts), 1) - def testResolve2(self): - junit = Artifact('de.l3s.boilerpipe', 'boilerpipe', '1.1.0') - artifacts = _resolve_artifacts([junit]) + def testResolveArtifactWithUmlautsInPom(self): + artifact = Artifact('de.l3s.boilerpipe', 'boilerpipe', '1.1.0') + artifacts = _resolve_artifacts([artifact]) self.assertEqual(len(artifacts), 1) def testExclusion(self):