From 81ec1849bd52fd2a56df11e944ea8ab5295ade16 Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Fri, 12 Dec 2025 18:03:42 +0000 Subject: [PATCH 1/8] Add ruff to dev dependencies --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index e2734e9..9a45832 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,10 +31,14 @@ dependencies = [ [project.optional-dependencies] dev = [ + "ruff", "pytest", "lxml", ] +[tool.ruff] +line-length = 88 + [tool.setuptools.packages.find] where = ["."] # Or wherever your 'scraperwiki' package folder is located include = ["scraperwiki*"] From 24106926977789800a8879f34569d66fc835bfe8 Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Fri, 12 Dec 2025 18:04:00 +0000 Subject: [PATCH 2/8] Run `ruff check --fix` --- scraperwiki/sql.py | 3 +-- scraperwiki/utils.py | 1 - tests/test_scraperwiki.py | 4 +--- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/scraperwiki/sql.py b/scraperwiki/sql.py index 3138e68..cef012a 100644 --- a/scraperwiki/sql.py +++ b/scraperwiki/sql.py @@ -1,4 +1,3 @@ -from collections import OrderedDict from collections.abc import Iterable, Mapping import atexit @@ -345,7 +344,7 @@ def fit_row(connection, row, unique_keys): new_column = sqlalchemy.Column(column_name, get_column_type(column_value)) - if not column_name in list(_State.table.columns.keys()): + if column_name not in list(_State.table.columns.keys()): new_columns.append(new_column) _State.table.append_column(new_column) diff --git a/scraperwiki/utils.py b/scraperwiki/utils.py index 3dea8b6..7a3eb54 100644 --- a/scraperwiki/utils.py +++ b/scraperwiki/utils.py @@ -4,7 +4,6 @@ ''' import os import shutil -import sys import warnings import tempfile import urllib.parse diff --git a/tests/test_scraperwiki.py b/tests/test_scraperwiki.py index d4751a0..ef6dfe8 100644 --- a/tests/test_scraperwiki.py +++ b/tests/test_scraperwiki.py @@ -1,15 +1,13 @@ import datetime import json import os -import re -import shutil import sqlite3 import warnings from subprocess import Popen, PIPE from textwrap import dedent -from unittest import TestCase, main +from unittest import TestCase import scraperwiki From 0104d550cae03eb450658d47eef97f59e5a94ab4 Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Fri, 12 Dec 2025 18:20:20 +0000 Subject: [PATCH 3/8] Remove unused names based on Ruff suggestions --- scraperwiki/sql.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scraperwiki/sql.py b/scraperwiki/sql.py index cef012a..eac694e 100644 --- a/scraperwiki/sql.py +++ b/scraperwiki/sql.py @@ -226,7 +226,6 @@ def show_tables(): """ _State.connection() _State.reflect_metadata() - metadata = _State.metadata response = select('name, sql from sqlite_master where type="table"') @@ -311,7 +310,6 @@ def create_index(column_names, unique=False): a list of strings. If unique is True, it will be a unique index. """ - connection = _State.connection() _State.reflect_metadata() table_name = _State.table.name From b0f3c24dbef49ca39798c52b3e70a749d445ad8a Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Fri, 12 Dec 2025 18:22:30 +0000 Subject: [PATCH 4/8] Fix use of imports in `__init__.py` --- scraperwiki/__init__.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scraperwiki/__init__.py b/scraperwiki/__init__.py index a854c2c..20ab48e 100644 --- a/scraperwiki/__init__.py +++ b/scraperwiki/__init__.py @@ -9,6 +9,15 @@ # Compatibility sqlite = sql +__all__ = [ + 'scrape', + 'pdftoxml', + 'status', + 'swimport', + 'utils', + 'sql', +] + class Error(Exception): """All ScraperWiki exceptions are instances of this class (usually via a subclass).""" From 3ed260a3b076c58a79d2128827675821896cf9da Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Fri, 12 Dec 2025 18:23:55 +0000 Subject: [PATCH 5/8] Fix comparisons as highlighted by Ruff --- tests/test_scraperwiki.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_scraperwiki.py b/tests/test_scraperwiki.py index ef6dfe8..05e0b6a 100644 --- a/tests/test_scraperwiki.py +++ b/tests/test_scraperwiki.py @@ -120,7 +120,7 @@ def test_insert(self): class SaveAndCheck(DBTestCase): def save_and_check(self, dataIn, tableIn, dataOut, tableOut=None, twice=True): - if tableOut == None: + if tableOut is None: tableOut = '[' + tableIn + ']' # Insert @@ -140,7 +140,7 @@ def save_and_check(self, dataIn, tableIn, dataOut, tableOut=None, twice=True): # Check expected1 = dataOut - expected2 = [dataIn] if type(dataIn) == dict else dataIn + expected2 = [dataIn] if type(dataIn) is dict else dataIn self.assertListEqual(observed1, expected1) self.assertListEqual(observed2, expected2) From b25e03a27211e6b7eb3634abe29ce6e86ee96a84 Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Fri, 12 Dec 2025 18:27:09 +0000 Subject: [PATCH 6/8] Reformat with `ruff` --- benchmark.py | 8 +- save_speedtest.py | 4 +- scraperwiki/__init__.py | 21 ++-- scraperwiki/sql.py | 109 ++++++++-------- scraperwiki/utils.py | 36 +++--- tests/test_scraperwiki.py | 257 ++++++++++++++++++++------------------ 6 files changed, 232 insertions(+), 203 deletions(-) diff --git a/benchmark.py b/benchmark.py index 49468bb..e8b5e60 100755 --- a/benchmark.py +++ b/benchmark.py @@ -2,14 +2,14 @@ import scraperwiki import os -rows = [{'id': i, 'test': i * 2, 's': "abc"} for i in range(1000)] +rows = [{"id": i, "test": i * 2, "s": "abc"} for i in range(1000)] try: - os.remove('scraperwiki.sqlite') + os.remove("scraperwiki.sqlite") except FileNotFoundError: pass -scraperwiki.sql.save(['id'], rows) +scraperwiki.sql.save(["id"], rows) for i, row in enumerate(rows): - scraperwiki.sql.save(['id'], row) + scraperwiki.sql.save(["id"], row) diff --git a/save_speedtest.py b/save_speedtest.py index c26b09a..add2338 100755 --- a/save_speedtest.py +++ b/save_speedtest.py @@ -1,7 +1,7 @@ #! /usr/bin/env python3 import scraperwiki -rows = [{'id': i, 'test': i * 2, 's': "xx"*i} for i in range(10)] +rows = [{"id": i, "test": i * 2, "s": "xx" * i} for i in range(10)] for i, row in enumerate(rows): - scraperwiki.sql.save(['id'], row) + scraperwiki.sql.save(["id"], row) diff --git a/scraperwiki/__init__.py b/scraperwiki/__init__.py index 20ab48e..bb072d3 100644 --- a/scraperwiki/__init__.py +++ b/scraperwiki/__init__.py @@ -1,7 +1,8 @@ -''' +""" Local version of ScraperWiki Utils, documentation here: https://scraperwiki.com/docs/python/python_help_documentation/ -''' +""" + from .utils import scrape, pdftoxml, status, swimport from . import utils from . import sql @@ -10,19 +11,23 @@ sqlite = sql __all__ = [ - 'scrape', - 'pdftoxml', - 'status', - 'swimport', - 'utils', - 'sql', + "scrape", + "pdftoxml", + "status", + "swimport", + "utils", + "sql", ] + class Error(Exception): """All ScraperWiki exceptions are instances of this class (usually via a subclass).""" + pass + class CPUTimeExceededError(Error): """CPU time limit exceeded.""" + pass diff --git a/scraperwiki/sql.py b/scraperwiki/sql.py index eac694e..d97461f 100644 --- a/scraperwiki/sql.py +++ b/scraperwiki/sql.py @@ -10,8 +10,9 @@ import alembic.ddl import sqlalchemy -DATABASE_NAME = os.environ.get("SCRAPERWIKI_DATABASE_NAME", - "sqlite:///scraperwiki.sqlite") +DATABASE_NAME = os.environ.get( + "SCRAPERWIKI_DATABASE_NAME", "sqlite:///scraperwiki.sqlite" +) DATABASE_TIMEOUT = float(os.environ.get("SCRAPERWIKI_DATABASE_TIMEOUT", 300)) SECONDS_BETWEEN_COMMIT = 2 @@ -19,31 +20,33 @@ # The scraperwiki.sqlite.SqliteError exception SqliteError = sqlalchemy.exc.SQLAlchemyError + class Blob(bytes): """ Represents a blob as a string. """ + pass + PYTHON_SQLITE_TYPE_MAP = { str: sqlalchemy.types.Text, int: sqlalchemy.types.BigInteger, bool: sqlalchemy.types.Boolean, float: sqlalchemy.types.Float, - datetime.date: sqlalchemy.types.Date, datetime.datetime: sqlalchemy.types.DateTime, - bytes: sqlalchemy.types.LargeBinary, Blob: sqlalchemy.types.LargeBinary, } -class _State: +class _State: """ This class maintains global state relating to the database such as connection. It does not form part of the public interface. """ + db_path = DATABASE_NAME engine = None _connection = None @@ -54,7 +57,7 @@ class _State: # _set_table(); it's left unassigned here to catch # accidental uses of it. # table_pending = None - vars_table_name = 'swvariables' + vars_table_name = "swvariables" last_commit = None echo = False @@ -62,14 +65,16 @@ class _State: def connection(cls): if cls._connection is None: create = sqlalchemy.create_engine - cls.engine = create(cls.db_path, echo=cls.echo, - connect_args={'timeout': DATABASE_TIMEOUT}) + cls.engine = create( + cls.db_path, echo=cls.echo, connect_args={"timeout": DATABASE_TIMEOUT} + ) cls._connection = cls.engine.connect() cls.new_transaction() if cls.table is None: cls.reflect_metadata() - cls.table = sqlalchemy.Table('swdata', _State.metadata, - extend_existing=True) + cls.table = sqlalchemy.Table( + "swdata", _State.metadata, extend_existing=True + ) if cls._transaction is None: cls.new_transaction() return cls._connection @@ -94,7 +99,6 @@ def check_last_committed(cls): class Transaction: - """ This context manager must be used when other services need to connect to the database. @@ -147,9 +151,9 @@ def execute(query, data=None): pass if not result.returns_rows: - return {'data': [], 'keys': []} + return {"data": [], "keys": []} - return {'data': result.fetchall(), 'keys': list(result.keys())} + return {"data": result.fetchall(), "keys": list(result.keys())} def select(query, data=None): @@ -167,7 +171,7 @@ def select(query, data=None): elif not isinstance(data, (tuple, dict)): data = (data,) - result = connection.exec_driver_sql('select ' + query, data) + result = connection.exec_driver_sql("select " + query, data) rows = [] for row in result: @@ -176,7 +180,7 @@ def select(query, data=None): return rows -def save(unique_keys, data, table_name='swdata'): +def save(unique_keys, data, table_name="swdata"): """ Save the given data to the table specified by `table_name` (which defaults to 'swdata'). The data must be a mapping @@ -192,14 +196,14 @@ def save(unique_keys, data, table_name='swdata'): # Is a single datum data = [data] elif not isinstance(data, Iterable): - raise TypeError("Data must be a single mapping or an iterable " - "of mappings") + raise TypeError("Data must be a single mapping or an iterable of mappings") - insert = sqlalchemy.insert(_State.table).prefix_with('OR REPLACE') + insert = sqlalchemy.insert(_State.table).prefix_with("OR REPLACE") for row in data: if not isinstance(row, Mapping): - raise TypeError("Elements of data must be mappings, got {}".format( - type(row))) + raise TypeError( + "Elements of data must be mappings, got {}".format(type(row)) + ) fit_row(connection, row, unique_keys) connection.execute(insert.values(row)) _State.check_last_committed() @@ -211,8 +215,7 @@ def _set_table(table_name): """ _State.connection() _State.reflect_metadata() - _State.table = sqlalchemy.Table(table_name, _State.metadata, - extend_existing=True) + _State.table = sqlalchemy.Table(table_name, _State.metadata, extend_existing=True) if list(_State.table.columns.keys()) == []: _State.table_pending = True @@ -229,7 +232,7 @@ def show_tables(): response = select('name, sql from sqlite_master where type="table"') - return {row['name']: row['sql'] for row in response} + return {row["name"]: row["sql"] for row in response} def save_var(name, value): @@ -241,11 +244,12 @@ def save_var(name, value): _State.reflect_metadata() vars_table = sqlalchemy.Table( - _State.vars_table_name, _State.metadata, - sqlalchemy.Column('name', sqlalchemy.types.Text, primary_key=True), - sqlalchemy.Column('value_blob', sqlalchemy.types.LargeBinary), - sqlalchemy.Column('type', sqlalchemy.types.Text), - keep_existing=True + _State.vars_table_name, + _State.metadata, + sqlalchemy.Column("name", sqlalchemy.types.Text, primary_key=True), + sqlalchemy.Column("value_blob", sqlalchemy.types.LargeBinary), + sqlalchemy.Column("type", sqlalchemy.types.Text), + keep_existing=True, ) vars_table.create(connection, checkfirst=True) @@ -255,29 +259,34 @@ def save_var(name, value): if column_type == sqlalchemy.types.LargeBinary: value_blob = value else: - value_blob = str(value).encode('utf-8') + value_blob = str(value).encode("utf-8") - values = dict(name=name, - value_blob=value_blob, - # value_blob=Blob(value), - type=column_type.__visit_name__.lower()) + values = dict( + name=name, + value_blob=value_blob, + # value_blob=Blob(value), + type=column_type.__visit_name__.lower(), + ) - stmt = sqlalchemy.insert(vars_table).prefix_with('OR REPLACE').values(**values) + stmt = sqlalchemy.insert(vars_table).prefix_with("OR REPLACE").values(**values) connection.execute(stmt) _State.new_transaction() + def get_var(name, default=None): """ Returns the variable with the provided key from the table specified by _State.vars_table_name. """ - alchemytypes = {"text": lambda x: x.decode('utf-8'), - "big_integer": lambda x: int(x), - "date": lambda x: x.decode('utf-8'), - "datetime": lambda x: x.decode('utf-8'), - "float": lambda x: float(x), - "large_binary": lambda x: x, - "boolean": lambda x: x==b'True'} + alchemytypes = { + "text": lambda x: x.decode("utf-8"), + "big_integer": lambda x: int(x), + "date": lambda x: x.decode("utf-8"), + "datetime": lambda x: x.decode("utf-8"), + "float": lambda x: float(x), + "large_binary": lambda x: x, + "boolean": lambda x: x == b"True", + } connection = _State.connection() _State.new_transaction() @@ -299,9 +308,9 @@ def get_var(name, default=None): execute = connection.execute execute(f"CREATE TEMPORARY TABLE _sw_tmp ('value' {result.type})") execute("INSERT INTO _sw_tmp VALUES (:value)", value=result.value_blob) - var = execute('SELECT value FROM _sw_tmp').fetchone().value + var = execute("SELECT value FROM _sw_tmp").fetchone().value execute("DROP TABLE _sw_tmp") - return var.decode('utf-8') + return var.decode("utf-8") def create_index(column_names, unique=False): @@ -315,12 +324,11 @@ def create_index(column_names, unique=False): table = _State.table - index_name = re.sub(r'[^a-zA-Z0-9]', '', table_name) + '_' - index_name += '_'.join(re.sub(r'[^a-zA-Z0-9]', '', x) - for x in column_names) + index_name = re.sub(r"[^a-zA-Z0-9]", "", table_name) + "_" + index_name += "_".join(re.sub(r"[^a-zA-Z0-9]", "", x) for x in column_names) if unique: - index_name += '_unique' + index_name += "_unique" columns = [] for column_name in column_names: @@ -339,8 +347,7 @@ def fit_row(connection, row, unique_keys): """ new_columns = [] for column_name, column_value in list(row.items()): - new_column = sqlalchemy.Column(column_name, - get_column_type(column_value)) + new_column = sqlalchemy.Column(column_name, get_column_type(column_value)) if column_name not in list(_State.table.columns.keys()): new_columns.append(new_column) @@ -379,9 +386,7 @@ def get_column_type(column_value): """ Return the appropriate SQL column type for the given value. """ - return PYTHON_SQLITE_TYPE_MAP.get(type(column_value), - sqlalchemy.types.Text) - + return PYTHON_SQLITE_TYPE_MAP.get(type(column_value), sqlalchemy.types.Text) def commit(): diff --git a/scraperwiki/utils.py b/scraperwiki/utils.py index 7a3eb54..b4d6124 100644 --- a/scraperwiki/utils.py +++ b/scraperwiki/utils.py @@ -1,7 +1,8 @@ -''' +""" Local version of ScraperWiki Utils, documentation here: https://scraperwiki.com/docs/python/python_help_documentation/ -''' +""" + import os import shutil import warnings @@ -11,19 +12,19 @@ def scrape(url, params=None, user_agent=None): - ''' + """ Scrape a URL optionally with parameters. This is effectively a wrapper around urllib.request.urlopen. - ''' + """ headers = {} if user_agent: - headers['User-Agent'] = user_agent + headers["User-Agent"] = user_agent data = None if params: - data = urllib.parse.urlencode(params).encode('utf-8') + data = urllib.parse.urlencode(params).encode("utf-8") req = urllib.request.Request(url, data=data, headers=headers) @@ -35,27 +36,28 @@ def scrape(url, params=None, user_agent=None): def pdftoxml(pdfdata, options=""): """converts pdf file to xml file""" - if not shutil.which('pdftohtml'): + if not shutil.which("pdftohtml"): warnings.warn( - 'scraperwiki.pdftoxml requires pdftohtml, but pdftohtml was not found ' - 'in the PATH. If you wish to use this function, you probably need to ' - 'install pdftohtml.' + "scraperwiki.pdftoxml requires pdftohtml, but pdftohtml was not found " + "in the PATH. If you wish to use this function, you probably need to " + "install pdftohtml." ) return None - pdffout = tempfile.NamedTemporaryFile(suffix='.pdf') + pdffout = tempfile.NamedTemporaryFile(suffix=".pdf") pdffout.write(pdfdata) pdffout.flush() - xmlin = tempfile.NamedTemporaryFile(mode='r', suffix='.xml', encoding="utf-8") + xmlin = tempfile.NamedTemporaryFile(mode="r", suffix=".xml", encoding="utf-8") tmpxml = xmlin.name # "temph.xml" cmd = 'pdftohtml -xml -nodrm -zoom 1.5 -enc UTF-8 -noframes {} "{}" "{}"'.format( - options, pdffout.name, os.path.splitext(tmpxml)[0]) + options, pdffout.name, os.path.splitext(tmpxml)[0] + ) # can't turn off output, so throw away even stderr yeuch cmd = cmd + " >/dev/null 2>&1" os.system(cmd) pdffout.close() - #xmlfin = open(tmpxml) + # xmlfin = open(tmpxml) xmldata = xmlin.read() xmlin.close() return xmldata @@ -63,7 +65,11 @@ def pdftoxml(pdfdata, options=""): def status(type, message=None): """Retained for backwards compatibility.""" - warnings.warn("status() is no longer in use following ScraperWiki/Quickcode application shutdown", DeprecationWarning, stacklevel=2) + warnings.warn( + "status() is no longer in use following ScraperWiki/Quickcode application shutdown", + DeprecationWarning, + stacklevel=2, + ) return diff --git a/tests/test_scraperwiki.py b/tests/test_scraperwiki.py index 05e0b6a..d09f42a 100644 --- a/tests/test_scraperwiki.py +++ b/tests/test_scraperwiki.py @@ -12,13 +12,16 @@ import scraperwiki import sys + # scraperwiki.sql._State.echo = True -DB_NAME = 'scraperwiki.sqlite' +DB_NAME = "scraperwiki.sqlite" + class DBTestCase(TestCase): """ Ensures database cleanup. """ + def setUp(self): self.clean_db() super().setUp() @@ -53,16 +56,18 @@ class TestAAAWarning(DBTestCase): def test_save_no_warn(self): with warnings.catch_warnings(): warnings.simplefilter("error") - scraperwiki.sql.save(['id'], dict(id=4, tumble='weed'), - table_name="warning_test") + scraperwiki.sql.save( + ["id"], dict(id=4, tumble="weed"), table_name="warning_test" + ) + class TestSaveGetVar(DBTestCase): def savegetvar(self, var): - scraperwiki.sql.save_var(u"weird\u1234", var) - self.assertEqual(scraperwiki.sql.get_var(u"weird\u1234"), var) + scraperwiki.sql.save_var("weird\u1234", var) + self.assertEqual(scraperwiki.sql.get_var("weird\u1234"), var) def test_string(self): - self.savegetvar(u"asdio\u1234") + self.savegetvar("asdio\u1234") def test_int(self): self.savegetvar(1) @@ -76,52 +81,59 @@ def test_bool(self): def test_bool2(self): self.savegetvar(True) - def test_bytes(self): self.savegetvar(b"asodpa\x00\x22") - def test_date(self): date1 = datetime.datetime.now() date2 = datetime.date.today() - scraperwiki.sql.save_var(u"weird\u1234", date1) - self.assertEqual(scraperwiki.sql.get_var(u"weird\u1234"), str(date1)) - scraperwiki.sql.save_var(u"weird\u1234", date2) - self.assertEqual(scraperwiki.sql.get_var(u"weird\u1234"), str(date2)) + scraperwiki.sql.save_var("weird\u1234", date1) + self.assertEqual(scraperwiki.sql.get_var("weird\u1234"), str(date1)) + scraperwiki.sql.save_var("weird\u1234", date2) + self.assertEqual(scraperwiki.sql.get_var("weird\u1234"), str(date2)) def test_save_multiple_values(self): - scraperwiki.sql.save_var(u'foo\xc3', u'hello') - scraperwiki.sql.save_var(u'bar', u'goodbye\u1234') + scraperwiki.sql.save_var("foo\xc3", "hello") + scraperwiki.sql.save_var("bar", "goodbye\u1234") + + self.assertEqual("hello", scraperwiki.sql.get_var("foo\xc3")) + self.assertEqual("goodbye\u1234", scraperwiki.sql.get_var("bar")) - self.assertEqual(u'hello', scraperwiki.sql.get_var(u'foo\xc3')) - self.assertEqual(u'goodbye\u1234', scraperwiki.sql.get_var(u'bar')) class TestGetNonexistantVar(DBTestCase): def test_get(self): - self.assertIsNone(scraperwiki.sql.get_var(u'meatball\xff')) + self.assertIsNone(scraperwiki.sql.get_var("meatball\xff")) + class TestSaveVar(DBTestCase): def setUp(self): super(TestSaveVar, self).setUp() - scraperwiki.sql.save_var(u"birthday\xfe", u"\u1234November 30, 1888") + scraperwiki.sql.save_var("birthday\xfe", "\u1234November 30, 1888") connection = sqlite3.connect(DB_NAME) self.cursor = connection.cursor() def test_insert(self): - self.cursor.execute(u""" + self.cursor.execute(""" SELECT name, value_blob, type FROM `swvariables` WHERE name == "birthday\xfe" """) ((colname, value, _type),) = self.cursor.fetchall() - expected = [(u"birthday\xfe", u"\u1234November 30, 1888", "text",)] - observed = [(colname, value.decode('utf-8'), _type)] + expected = [ + ( + "birthday\xfe", + "\u1234November 30, 1888", + "text", + ) + ] + observed = [(colname, value.decode("utf-8"), _type)] self.assertEqual(observed, expected) + class SaveAndCheck(DBTestCase): def save_and_check(self, dataIn, tableIn, dataOut, tableOut=None, twice=True): if tableOut is None: - tableOut = '[' + tableIn + ']' + tableOut = "[" + tableIn + "]" # Insert with scraperwiki.sql.Transaction(): @@ -130,13 +142,13 @@ def save_and_check(self, dataIn, tableIn, dataOut, tableOut=None, twice=True): # Observe with pysqlite connection = sqlite3.connect(DB_NAME) cursor = connection.cursor() - cursor.execute(u"SELECT * FROM %s" % tableOut) + cursor.execute("SELECT * FROM %s" % tableOut) observed1 = cursor.fetchall() connection.close() if twice: # Observe using this module - observed2 = scraperwiki.sql.select(u'* FROM %s' % tableOut) + observed2 = scraperwiki.sql.select("* FROM %s" % tableOut) # Check expected1 = dataOut @@ -145,41 +157,46 @@ def save_and_check(self, dataIn, tableIn, dataOut, tableOut=None, twice=True): self.assertListEqual(observed1, expected1) self.assertListEqual(observed2, expected2) + class SaveAndSelect(DBTestCase): def save_and_select(self, d): - scraperwiki.sql.save([], {u"foo\xdd": d}) - observed = scraperwiki.sql.select(u'* FROM swdata')[0][u'foo\xdd'] + scraperwiki.sql.save([], {"foo\xdd": d}) + observed = scraperwiki.sql.select("* FROM swdata")[0]["foo\xdd"] self.assertEqual(d, observed) class TestUniqueKeys(SaveAndSelect): def test_empty(self): - scraperwiki.sql.save([], {u"foo\xde": 3}, table_name=u"Chico\xcc") - observed = scraperwiki.sql.execute(u'PRAGMA index_list(Chico\xcc)') - self.assertEqual(observed, {u'data': [], u'keys': ['seq', 'name', 'unique', 'origin', 'partial']}) + scraperwiki.sql.save([], {"foo\xde": 3}, table_name="Chico\xcc") + observed = scraperwiki.sql.execute("PRAGMA index_list(Chico\xcc)") + self.assertEqual( + observed, + {"data": [], "keys": ["seq", "name", "unique", "origin", "partial"]}, + ) def test_two(self): - scraperwiki.sql.save([u'foo\xdc', u'bar\xcd'], {u'foo\xdc': 3, u'bar\xcd': 9}, u'Harpo\xbb') - observed = scraperwiki.sql.execute( - u'PRAGMA index_info(Harpo_foo_bar_unique)') + scraperwiki.sql.save( + ["foo\xdc", "bar\xcd"], {"foo\xdc": 3, "bar\xcd": 9}, "Harpo\xbb" + ) + observed = scraperwiki.sql.execute("PRAGMA index_info(Harpo_foo_bar_unique)") # Indexness self.assertIsNotNone(observed) # Indexed columns expected1 = { - u'keys': [u'seqno', u'cid', u'name'], - u'data': [ - (0, 0, u'foo\xdc'), - (1, 1, u'bar\xcd'), - ] + "keys": ["seqno", "cid", "name"], + "data": [ + (0, 0, "foo\xdc"), + (1, 1, "bar\xcd"), + ], } expected2 = { - u'keys': [u'seqno', u'cid', u'name'], - u'data': [ - (0, 1, u'foo\xdc'), - (1, 0, u'bar\xcd'), - ] + "keys": ["seqno", "cid", "name"], + "data": [ + (0, 1, "foo\xdc"), + (1, 0, "bar\xcd"), + ], } try: self.assertDictEqual(observed, expected1) @@ -187,17 +204,18 @@ def test_two(self): self.assertDictEqual(observed, expected2) # Uniqueness - indices = scraperwiki.sql.execute(u'PRAGMA index_list(Harpo\xbb)') - namecol = indices[u"keys"].index(u'name') - for index in indices[u"data"]: - if index[namecol] == u'Harpo_foo_bar_unique': + indices = scraperwiki.sql.execute("PRAGMA index_list(Harpo\xbb)") + namecol = indices["keys"].index("name") + for index in indices["data"]: + if index[namecol] == "Harpo_foo_bar_unique": break else: index = {} - uniquecol = indices[u"keys"].index(u'unique') + uniquecol = indices["keys"].index("unique") self.assertEqual(index[uniquecol], 1) + class TestSaveColumn(DBTestCase): def test_add_column(self): # Indicative for @@ -209,33 +227,33 @@ def test_add_column(self): # need to run a subprocess. connection = sqlite3.connect(DB_NAME) cursor = connection.cursor() - cursor.execute(u'CREATE TABLE frigled\xaa (a TEXT);') - cursor.execute(u'INSERT INTO frigled\xaa VALUES ("boo\xaa")') + cursor.execute("CREATE TABLE frigled\xaa (a TEXT);") + cursor.execute('INSERT INTO frigled\xaa VALUES ("boo\xaa")') connection.close() - script = dedent(u""" + script = dedent(""" import scraperwiki scraperwiki.sql.save(['id'], dict(id=1, a=u"bar\xaa", b=u"foo\xaa")) """) with open("/dev/null") as null: - process = Popen([sys.executable, "-c", script], - stdout=PIPE, stderr=PIPE, stdin=null) + process = Popen( + [sys.executable, "-c", script], stdout=PIPE, stderr=PIPE, stdin=null + ) stdout, stderr = process.communicate() assert process.returncode == 0 - self.assertEqual(stdout, "".encode('utf-8')) - self.assertEqual(stderr, "".encode('utf-8')) + self.assertEqual(stdout, "".encode("utf-8")) + self.assertEqual(stderr, "".encode("utf-8")) class TestSave(SaveAndCheck): def test_save_int(self): - self.save_and_check( - {u"model-number\xaa": 293}, u"model-numbers\xaa", [(293,)] - ) + self.save_and_check({"model-number\xaa": 293}, "model-numbers\xaa", [(293,)]) def test_save_string(self): self.save_and_check( - {u"lastname\xaa": u"LeTourneau\u1234"}, u"diesel-engineers\xaa", [ - (u'LeTourneau\u1234',)] + {"lastname\xaa": "LeTourneau\u1234"}, + "diesel-engineers\xaa", + [("LeTourneau\u1234",)], ) # Ensure we can round-trip a string and then json encode it. @@ -245,22 +263,16 @@ def test_save_string(self): json.dumps(data) def test_save_twice(self): + self.save_and_check({"modelNumber\xaa": 293}, "modelNumbers", [(293,)]) self.save_and_check( - {u"modelNumber\xaa": 293}, u"modelNumbers", [(293,)] - ) - self.save_and_check( - {u"modelNumber\xaa": 293}, u"modelNumbers\xaa", [(293,), (293,)], twice=False + {"modelNumber\xaa": 293}, "modelNumbers\xaa", [(293,), (293,)], twice=False ) def test_save_true(self): - self.save_and_check( - {u"a": True}, u"true", [(1,)] - ) + self.save_and_check({"a": True}, "true", [(1,)]) def test_save_false(self): - self.save_and_check( - {u"a": False}, u"false", [(0,)] - ) + self.save_and_check({"a": False}, "false", [(0,)]) def test_save_table_name(self): """ @@ -268,12 +280,11 @@ def test_save_table_name(self): subsequent .save without table_name= uses the `swdata` table again. """ - scraperwiki.sql.save(['id'], dict(id=1, stuff=1), - table_name=u'sticky\u1234') - scraperwiki.sql.save(['id'], dict(id=2, stuff=2)) - results = scraperwiki.sql.select(u'* FROM sticky\u1234') + scraperwiki.sql.save(["id"], dict(id=1, stuff=1), table_name="sticky\u1234") + scraperwiki.sql.save(["id"], dict(id=2, stuff=2)) + results = scraperwiki.sql.select("* FROM sticky\u1234") self.assertEqual(1, len(results)) - (row, ) = results + (row,) = results self.assertDictEqual(dict(id=1, stuff=1), row) def test_lxml_string(self): @@ -286,106 +297,108 @@ def test_lxml_string(self): # Careful, this looks like a string (eg, when printed or # repr()d), but is actually an instance of some class # internal to lxml. - s = lxml.html.fromstring(b'HelloӒ/b>').xpath(b'//b')[0].text_content() - self.save_and_check( - {"text": s}, - "lxml", - [(str(s),)] - ) + s = lxml.html.fromstring(b"HelloӒ/b>").xpath(b"//b")[0].text_content() + self.save_and_check({"text": s}, "lxml", [(str(s),)]) def test_save_and_drop(self): - scraperwiki.sql.save([], dict(foo=7), table_name=u"dropper\xaa") - scraperwiki.sql.execute(u"DROP TABLE dropper\xaa") - scraperwiki.sql.save([], dict(foo=9), table_name=u"dropper\xaa") + scraperwiki.sql.save([], dict(foo=7), table_name="dropper\xaa") + scraperwiki.sql.execute("DROP TABLE dropper\xaa") + scraperwiki.sql.save([], dict(foo=9), table_name="dropper\xaa") + class TestQuestionMark(DBTestCase): def test_one_question_mark_with_nonlist(self): - scraperwiki.sql.execute(u'CREATE TABLE zhuozi\xaa (\xaa TEXT);') - scraperwiki.sql.execute(u'INSERT INTO zhuozi\xaa VALUES (?)', u'apple\xff') - observed = scraperwiki.sql.select(u'* FROM zhuozi\xaa') - self.assertListEqual(observed, [{u'\xaa': u'apple\xff'}]) - scraperwiki.sql.execute(u'DROP TABLE zhuozi\xaa') + scraperwiki.sql.execute("CREATE TABLE zhuozi\xaa (\xaa TEXT);") + scraperwiki.sql.execute("INSERT INTO zhuozi\xaa VALUES (?)", "apple\xff") + observed = scraperwiki.sql.select("* FROM zhuozi\xaa") + self.assertListEqual(observed, [{"\xaa": "apple\xff"}]) + scraperwiki.sql.execute("DROP TABLE zhuozi\xaa") def test_one_question_mark_with_list(self): - scraperwiki.sql.execute(u'CREATE TABLE zhuozi\xaa (\xaa TEXT);') - scraperwiki.sql.execute(u'INSERT INTO zhuozi\xaa VALUES (?)', [u'apple\xff']) - observed = scraperwiki.sql.select(u'* FROM zhuozi\xaa') - self.assertListEqual(observed, [{u'\xaa': u'apple\xff'}]) - scraperwiki.sql.execute(u'DROP TABLE zhuozi\xaa') + scraperwiki.sql.execute("CREATE TABLE zhuozi\xaa (\xaa TEXT);") + scraperwiki.sql.execute("INSERT INTO zhuozi\xaa VALUES (?)", ["apple\xff"]) + observed = scraperwiki.sql.select("* FROM zhuozi\xaa") + self.assertListEqual(observed, [{"\xaa": "apple\xff"}]) + scraperwiki.sql.execute("DROP TABLE zhuozi\xaa") def test_multiple_question_marks(self): - scraperwiki.sql.execute('CREATE TABLE zhuozi (a TEXT, b TEXT);') - scraperwiki.sql.execute( - 'INSERT INTO zhuozi VALUES (?, ?)', ['apple', 'banana']) - observed = scraperwiki.sql.select('* FROM zhuozi') - self.assertListEqual(observed, [{'a': 'apple', 'b': 'banana'}]) - scraperwiki.sql.execute('DROP TABLE zhuozi') + scraperwiki.sql.execute("CREATE TABLE zhuozi (a TEXT, b TEXT);") + scraperwiki.sql.execute("INSERT INTO zhuozi VALUES (?, ?)", ["apple", "banana"]) + observed = scraperwiki.sql.select("* FROM zhuozi") + self.assertListEqual(observed, [{"a": "apple", "b": "banana"}]) + scraperwiki.sql.execute("DROP TABLE zhuozi") class TestDateTime(DBTestCase): def rawdate(self, table="swdata", column="datetime"): connection = sqlite3.connect(DB_NAME) cursor = connection.cursor() - cursor.execute(u"SELECT {} FROM {}".format(column, table)) + cursor.execute("SELECT {} FROM {}".format(column, table)) rawdate = cursor.fetchall()[0][0] connection.close() return rawdate def test_save_date(self): - d = datetime.datetime.strptime('1991-03-30', '%Y-%m-%d').date() + d = datetime.datetime.strptime("1991-03-30", "%Y-%m-%d").date() with scraperwiki.sql.Transaction(): - scraperwiki.sql.save([], {u"birthday\xaa": d}) + scraperwiki.sql.save([], {"birthday\xaa": d}) self.assertEqual( - [{u'birthday\xaa': str(d)}], - scraperwiki.sql.select("* FROM swdata")) + [{"birthday\xaa": str(d)}], scraperwiki.sql.select("* FROM swdata") + ) self.assertEqual( - {u'keys': [u'birthday\xaa'], u'data': [(str(d),)]}, - scraperwiki.sql.execute("SELECT * FROM swdata")) + {"keys": ["birthday\xaa"], "data": [(str(d),)]}, + scraperwiki.sql.execute("SELECT * FROM swdata"), + ) - self.assertEqual(str(d), self.rawdate(column=u"birthday\xaa")) + self.assertEqual(str(d), self.rawdate(column="birthday\xaa")) def test_save_datetime(self): - d = datetime.datetime.strptime('1990-03-30', '%Y-%m-%d') + d = datetime.datetime.strptime("1990-03-30", "%Y-%m-%d") with scraperwiki.sql.Transaction(): - scraperwiki.sql.save([], {"birthday": d}, - table_name="datetimetest") + scraperwiki.sql.save([], {"birthday": d}, table_name="datetimetest") exemplar = str(d) # SQLAlchemy appears to convert with extended precision. exemplar += ".000000" self.assertEqual( - [{u'birthday': exemplar}], - scraperwiki.sql.select("* FROM datetimetest")) + [{"birthday": exemplar}], scraperwiki.sql.select("* FROM datetimetest") + ) self.assertDictEqual( - {u'keys': [u'birthday'], u'data': [(exemplar,)]}, - scraperwiki.sql.execute("SELECT * FROM datetimetest")) + {"keys": ["birthday"], "data": [(exemplar,)]}, + scraperwiki.sql.execute("SELECT * FROM datetimetest"), + ) + + self.assertEqual( + exemplar, self.rawdate(table="datetimetest", column="birthday") + ) - self.assertEqual(exemplar, self.rawdate(table="datetimetest", column="birthday")) class TestStatus(TestCase): - 'Test that the status endpoint works.' + "Test that the status endpoint works." def test_status(self): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) - self.assertEqual(scraperwiki.status('ok'), None) + self.assertEqual(scraperwiki.status("ok"), None) + class TestUnicodeColumns(DBTestCase): maxDiff = None + def test_add_column_once_only(self): - scraperwiki.sqlite.save(data = {"i": 1, u"a\xa0b": 1}, unique_keys = ['i']) - scraperwiki.sqlite.save(data = {"i": 2, u"a\xa0b": 2}, unique_keys = ['i']) + scraperwiki.sqlite.save(data={"i": 1, "a\xa0b": 1}, unique_keys=["i"]) + scraperwiki.sqlite.save(data={"i": 2, "a\xa0b": 2}, unique_keys=["i"]) -class TestImports(TestCase): - 'Test that all module contents are imported.' +class TestImports(TestCase): + "Test that all module contents are imported." def setUp(self): - self.sw = __import__('scraperwiki') + self.sw = __import__("scraperwiki") def test_import_scraperwiki_root(self): self.sw.scrape From cb606330d9cc3aae30a366ea623d38033daccaab Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Sat, 13 Dec 2025 00:02:57 +0000 Subject: [PATCH 7/8] Update Makefile to run tests and `ruff` --- Makefile | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 8b7ac1b..7dc4f28 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,16 @@ -run: build - @docker run \ - --rm \ - -ti \ - scraperwiki-python +test: + @pytest -build: - @docker build -t scraperwiki-python . +fix: + @ruff check --fix -.PHONY: run build +check-fix: + @ruff check + +format: + @ruff format + +check-format: + @ruff format --check + +.PHONY: test fix check-fix format check-format From c9002af1c95eaae832f8332bbdb46241b79ec77b Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Sat, 13 Dec 2025 00:03:24 +0000 Subject: [PATCH 8/8] Check formatting and linting in CI --- .github/workflows/ci-build.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml index b44ef4e..1109983 100644 --- a/.github/workflows/ci-build.yml +++ b/.github/workflows/ci-build.yml @@ -26,5 +26,13 @@ jobs: run: | pip install .[dev] + - name: Check formatting + run: | + make check-format + + - name: Check linting + run: | + make check-fix + - name: Run tests run: pytest