From f72f9059849a5c1524e6ed3d28657e1a1d4eb64d Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Thu, 16 Mar 2023 13:44:44 +0100 Subject: [PATCH] Drop support for Python 2.7, fixes #40 - update README - drop support for Python 2.x module urlparse (replaced by urllib.parse) --- README.md | 2 +- server_ip_address.py | 7 +------ wat_extract_links.py | 7 +------ 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 5d329e7..e48cc09 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ pip install -r requirements.txt ## Compatibility and Requirements -Tested with Spark 2.1.0 – 2.4.6 in combination with Python 2.7 or 3.5, 3.6, 3.7, and with Spark 3.0.0 - 3.2.1 in combination with Python 3.7, 3.8 and 3.9. +Tested with with Spark 3.2.3 and 3.3.2 in combination with Python 3.8, 3.9 and 3.10. See the branch [python-2.7](/commoncrawl/cc-pyspark/tree/python-2.7) if you want to run the job on Python 2.7 and older Spark versions. ## Get Sample Data diff --git a/server_ip_address.py b/server_ip_address.py index 673792c..41f19cb 100644 --- a/server_ip_address.py +++ b/server_ip_address.py @@ -1,11 +1,6 @@ import ujson as json -try: - # Python2 - from urlparse import urlparse -except ImportError: - # Python3 - from urllib.parse import urlparse +from urllib.parse import urlparse from pyspark.sql.types import StructType, StructField, StringType, LongType diff --git a/wat_extract_links.py b/wat_extract_links.py index 9d7b3de..ad1e74d 100644 --- a/wat_extract_links.py +++ b/wat_extract_links.py @@ -4,12 +4,7 @@ import ujson as json -try: - # Python2 - from urlparse import urljoin, urlparse -except ImportError: - # Python3 - from urllib.parse import urljoin, urlparse +from urllib.parse import urljoin, urlparse from pyspark.sql.types import StructType, StructField, StringType