From 1b03bef64d4750d8cdbfaf06fd7c797a53f5fa88 Mon Sep 17 00:00:00 2001 From: barrust Date: Sun, 7 Jan 2024 20:07:19 -0500 Subject: [PATCH 01/10] first configuration pass --- mediawiki/configuraton.py | 220 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 mediawiki/configuraton.py diff --git a/mediawiki/configuraton.py b/mediawiki/configuraton.py new file mode 100644 index 0000000..656c574 --- /dev/null +++ b/mediawiki/configuraton.py @@ -0,0 +1,220 @@ +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Dict, Optional, Union + +from mediawiki.mediawiki import URL, VERSION + + +@dataclass +class Configuration: + _lang: str = field(default="en", init=False, repr=False) + _api_url: str = field(default="https://{lang}.wikipedia.org/w/api.php", init=False, repr=False) + _category_prefix: str = field(default="Category", init=False, repr=False) + _timeout: Optional[float] = field(default=15.0, init=False, repr=False) + _user_agent: str = field(default=f"python-mediawiki/VERSION-{VERSION}/({URL})/BOT", init=False, repr=False) + _proxies: Optional[Dict] = field(default=None, init=False, repr=False) + _verify_ssl: Union[bool, str] = field(default=True, init=False, repr=False) + _rate_limit: bool = field(default=False, init=False, repr=False) + _rate_limit_min_wait: timedelta = field(default=timedelta(milliseconds=50), init=False, repr=False) + _username: Optional[str] = field(default=None, init=False, repr=False) + _password: Optional[str] = field(default=None, init=False, repr=False) + _refresh_interval: Optional[int] = field(default=None, init=False, repr=False) + _use_cache: bool = field(default=True, init=False, repr=False) + + # not in repr + _reset_session: bool = field(default=True, init=False, repr=False) + _clear_memoized: bool = field(default=False, init=False, repr=False) + _rate_limit_last_call: Optional[datetime] = field(default=None, init=False, repr=False) + _login: bool = field(default=False, init=False, repr=False) + + def __init__( + self, + lang: Optional[str] = None, + api_url: Optional[str] = None, + category_prefix: Optional[str] = None, + timeout: Optional[float] = None, + user_agent: Optional[str] = None, + proxies: Optional[Dict] = None, + verify_ssl: Union[bool, str, None] = None, + rate_limit: bool = False, + rate_limit_wait: Optional[timedelta] = None, + username: Optional[str] = None, + password: Optional[str] = None, + refresh_interval: Optional[int] = None, + use_cache: bool = True, + ): + if api_url: + self._api_url = api_url + + if lang: + self.lang = lang + + if category_prefix: + self.category_prefix = category_prefix + + if user_agent: + self._user_agent = user_agent + + if proxies: + self.proxies = proxies + + if verify_ssl: + self.verify_ssl = verify_ssl + + if rate_limit: + self.rate_limit = rate_limit + + if rate_limit_wait: + self._rate_limit_min_wait = rate_limit_wait + + if username: + self.username = username + + if password: + self.password = password + + if refresh_interval: + self.refresh_interval = refresh_interval + + if use_cache: + self.use_cache = use_cache + + def __repr__(self): + keys = [ + x.replace("_", "", 1) + for x in sorted(self.__dataclass_fields__.keys()) + if x not in ["_login", "_rate_limit_last_call", "_clear_memoized", "_reset_session"] + ] + full = [f"{x}={self.__getattribute__(x)}" for x in keys] + return f"Configuration({', '.join(full)})" + + @property + def lang(self) -> str: + return self._lang + + @lang.setter + def lang(self, lang: str): + lang = lang.lower() + if self._lang == lang: + return + url = self._api_url + tmp = url.replace(f"/{self._lang}.", f"/{lang}.") + + self.api_url = tmp + self._lang = lang + self._clear_memoized - True + + @property + def api_url(self) -> str: + return self._api_url + + @api_url.setter + def api_url(self, api_url: str): + self._lang = self.lang.lower() + self._api_url = api_url.format(lang=self._lang) + + # reset session + self._reset_session = True + + @property + def category_prefix(self) -> str: + return self._category_prefix + + @category_prefix.setter + def category_prefix(self, category_prefix: str): + self._category_prefix = category_prefix[:-1] if category_prefix[-1:] == ":" else category_prefix + + @property + def user_agent(self) -> str: + return self._user_agent + + @user_agent.setter + def user_agent(self, user_agent: str): + self._user_agent = user_agent + + @property + def proxies(self) -> Optional[Dict]: + return self._proxies + + @proxies.setter + def proxies(self, proxies: Optional[Dict]): + self._proxies = proxies if isinstance(proxies, dict) else None + + # reset session + self._reset_session = True + + @property + def verify_ssl(self) -> Union[bool, str]: + return self._verify_ssl + + @verify_ssl.setter + def verify_ssl(self, verify_ssl: Union[bool, str, None]): + self._verify_ssl = verify_ssl if isinstance(verify_ssl, (bool, str)) else True + + # reset session + self._reset_session = True + + @property + def rate_limit(self) -> bool: + return self._rate_limit + + @rate_limit.setter + def rate_limit(self, rate_limit: bool): + self._rate_limit = bool(rate_limit) + self._rate_limit_last_call = None + self._clear_memoized = True + + @property + def rate_limit_min_wait(self) -> timedelta: + return self._rate_limit_min_wait + + @rate_limit_min_wait.setter + def rate_limit_min_wait(self, min_wait: timedelta): + self._rate_limit_min_wait = min_wait + self._rate_limit_last_call = None + + @property + def username(self) -> Optional[str]: + return self._username + + @username.setter + def username(self, username: Optional[str]): + self._username = username + if self.username and self.password: + self._login = True + + @property + def password(self) -> Optional[str]: + return self._password + + @password.setter + def password(self, password: Optional[str]): + self._password = password + if self.username and self.password: + self._login = True + + @property + def refresh_interval(self) -> Optional[int]: + return self._rate_limit + + @refresh_interval.setter + def refresh_interval(self, refresh_interval: Optional[int]): + self._refresh_interval = ( + refresh_interval if isinstance(refresh_interval, int) and refresh_interval > 0 else None + ) + + @property + def use_cache(self) -> bool: + return self._use_cache + + @use_cache.setter + def use_cache(self, use_cache: bool): + self._use_cache = bool(use_cache) + + @property + def timeout(self) -> Optional[float]: + return self._timeout + + @timeout.setter + def timeout(self, timeout: Optional[float]): + self._timeout = None if timeout is None else float(timeout) From 086ab215e35d3f30181c977d5e937c6d9dc522a4 Mon Sep 17 00:00:00 2001 From: barrust Date: Sun, 7 Jan 2024 21:53:28 -0500 Subject: [PATCH 02/10] move over to configuration usage; mostly complete --- mediawiki/__init__.py | 3 +- mediawiki/configuraton.py | 10 ++- mediawiki/mediawiki.py | 167 +++++++++++++++++--------------------- mediawiki/utilities.py | 4 +- tests/mediawiki_test.py | 16 ++-- 5 files changed, 92 insertions(+), 108 deletions(-) diff --git a/mediawiki/__init__.py b/mediawiki/__init__.py index e34bc36..0515c4e 100644 --- a/mediawiki/__init__.py +++ b/mediawiki/__init__.py @@ -1,6 +1,7 @@ """ mediawiki module initialization """ +from mediawiki.configuraton import URL, VERSION from mediawiki.exceptions import ( DisambiguationError, HTTPTimeoutError, @@ -12,7 +13,7 @@ PageError, RedirectError, ) -from mediawiki.mediawiki import URL, VERSION, MediaWiki +from mediawiki.mediawiki import MediaWiki from mediawiki.mediawikipage import MediaWikiPage __author__ = "Tyler Barrus" diff --git a/mediawiki/configuraton.py b/mediawiki/configuraton.py index 656c574..f0a337e 100644 --- a/mediawiki/configuraton.py +++ b/mediawiki/configuraton.py @@ -2,13 +2,14 @@ from datetime import datetime, timedelta from typing import Dict, Optional, Union -from mediawiki.mediawiki import URL, VERSION +URL: str = "https://github.com/barrust/mediawiki" +VERSION: str = "0.7.4" @dataclass class Configuration: _lang: str = field(default="en", init=False, repr=False) - _api_url: str = field(default="https://{lang}.wikipedia.org/w/api.php", init=False, repr=False) + _api_url: str = field(default="https://en.wikipedia.org/w/api.php", init=False, repr=False) _category_prefix: str = field(default="Category", init=False, repr=False) _timeout: Optional[float] = field(default=15.0, init=False, repr=False) _user_agent: str = field(default=f"python-mediawiki/VERSION-{VERSION}/({URL})/BOT", init=False, repr=False) @@ -79,6 +80,9 @@ def __init__( if use_cache: self.use_cache = use_cache + if timeout: + self.timeout = timeout + def __repr__(self): keys = [ x.replace("_", "", 1) @@ -195,7 +199,7 @@ def password(self, password: Optional[str]): @property def refresh_interval(self) -> Optional[int]: - return self._rate_limit + return self._refresh_interval @refresh_interval.setter def refresh_interval(self, refresh_interval: Optional[int]): diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index abcc392..87f9580 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -13,6 +13,7 @@ import requests import requests.exceptions as rex +from mediawiki.configuraton import VERSION, Configuration from mediawiki.exceptions import ( HTTPTimeoutError, MediaWikiAPIURLError, @@ -25,9 +26,6 @@ from mediawiki.mediawikipage import MediaWikiPage from mediawiki.utilities import memoize -URL: str = "https://github.com/barrust/mediawiki" -VERSION: str = "0.7.4" - class MediaWiki: """MediaWiki API Wrapper Instance @@ -49,27 +47,16 @@ class MediaWiki: __slots__ = [ "_version", - "_lang", - "_api_url", - "_cat_prefix", - "_timeout", - "_user_agent", + "_config", "_session", - "_rate_limit", - "_rate_limit_last_call", - "_min_wait", "_extensions", "_api_version", "_api_version_str", "_base_url", "__supported_languages", "__available_languages", - "_cache", - "_refresh_interval", - "_use_cache", "_is_logged_in", - "_proxies", - "_verify_ssl", + "_cache", ] def __init__( @@ -88,27 +75,24 @@ def __init__( ): """Init Function""" self._version = VERSION - self._lang = lang.lower() - self._api_url = url.format(lang=self._lang) - self._cat_prefix = "" - self.category_prefix = cat_prefix - self._timeout = 15.0 - self.timeout = timeout + url.format(lang=lang.lower()) + self._config = Configuration( + lang=lang, + api_url=url.format(lang=lang), + category_prefix=cat_prefix, + timeout=timeout, + proxies=proxies, + user_agent=user_agent, + verify_ssl=verify_ssl, + rate_limit=rate_limit, + rate_limit_wait=rate_limit_wait, + ) + # requests library parameters self._session: Optional[requests.Session] = None - self._user_agent = f"python-mediawiki/VERSION-{VERSION}/({URL})/BOT" - self._proxies: Optional[Dict] = None - self._verify_ssl: Union[bool, str] = True - self.verify_ssl = verify_ssl + # set libary parameters - if user_agent is not None: - self.user_agent = user_agent - self.proxies = proxies # this will call self._reset_session() - - self._rate_limit = False - self.rate_limit = bool(rate_limit) - self._rate_limit_last_call: Optional[datetime] = None - self._min_wait = rate_limit_wait + self._extensions = None self._api_version = None self._api_version_str = None @@ -118,8 +102,6 @@ def __init__( # for memoized results self._cache: Dict = {} - self._refresh_interval: Optional[int] = None - self._use_cache = True # for login information self._is_logged_in = False @@ -168,35 +150,37 @@ def extensions(self) -> List[str]: @property def rate_limit(self) -> bool: """bool: Turn on or off Rate Limiting""" - return self._rate_limit + return self._config.rate_limit @rate_limit.setter def rate_limit(self, rate_limit: bool): """Turn on or off rate limiting""" - self._rate_limit = bool(rate_limit) - self._rate_limit_last_call = None - self.clear_memoized() + self._config.rate_limit = rate_limit + if self._config._clear_memoized: + self.clear_memoized() @property def proxies(self) -> Optional[Dict]: """dict: Turn on, off, or set proxy use with the Requests library""" - return self._proxies + return self._config.proxies @proxies.setter def proxies(self, proxies: Optional[Dict]): """Turn on, off, or set proxy use through the Requests library""" - self._proxies = proxies if isinstance(proxies, dict) else None - self._reset_session() + self._config.proxies = proxies + if self._config._reset_session: + self._reset_session() + self._config._reset_session = False @property def use_cache(self) -> bool: """bool: Whether caching should be used; on (**True**) or off (**False**)""" - return self._use_cache + return self._config.use_cache @use_cache.setter def use_cache(self, use_cache: bool): """toggle using the cache or not""" - self._use_cache = bool(use_cache) + self._config.use_cache = use_cache @property def rate_limit_min_wait(self) -> timedelta: @@ -204,37 +188,39 @@ def rate_limit_min_wait(self) -> timedelta: Note: Only used if rate_limit is **True**""" - return self._min_wait + return self._config.rate_limit_min_wait @rate_limit_min_wait.setter def rate_limit_min_wait(self, min_wait: timedelta): """Set minimum wait to use for rate limiting""" - self._min_wait = min_wait - self._rate_limit_last_call = None + self._config.rate_limit_min_wait = min_wait + self._config._rate_limit_last_call = None @property - def timeout(self) -> float: + def timeout(self) -> Optional[float]: """float: Response timeout for API requests Note: Use **None** for no response timeout""" - return self._timeout + return self._config.timeout @timeout.setter - def timeout(self, timeout: float): + def timeout(self, timeout: Optional[float]): """Set request timeout in seconds (or fractions of a second)""" - self._timeout = None if timeout is None else float(timeout) + self._config.timeout = timeout @property def verify_ssl(self) -> Union[bool, str]: """bool | str: Verify SSL when using requests or path to cert file""" - return self._verify_ssl + return self._config.verify_ssl @verify_ssl.setter def verify_ssl(self, verify_ssl: Union[bool, str]): """Set request verify SSL parameter; defaults to True if issue""" - self._verify_ssl = verify_ssl if isinstance(verify_ssl, (bool, str)) else True - self._reset_session() + self._config.verify_ssl = verify_ssl + if self._config._reset_session: + self._reset_session() + self._config._reset_session = False @property def language(self) -> str: @@ -244,21 +230,15 @@ def language(self) -> str: Use correct language titles with the updated API URL Note: Some API URLs do not encode language; unable to update if this is the case""" - return self._lang + return self._config.lang @language.setter def language(self, lang: str): """Set the language to use; attempts to change the API URL""" - lang = lang.lower() - if self._lang == lang: - return - - url = self._api_url - tmp = url.replace(f"/{self._lang}.", f"/{lang}.") - - self._api_url = tmp - self._lang = lang - self.clear_memoized() + self._config.lang == lang + if self._config._clear_memoized: + self.clear_memoized() + self._config._clear_memoized = False @property def category_prefix(self) -> str: @@ -266,27 +246,28 @@ def category_prefix(self) -> str: Note: Use the correct category name for the language selected""" - return self._cat_prefix + return self._config.category_prefix @category_prefix.setter def category_prefix(self, prefix: str): """Set the category prefix correctly""" - self._cat_prefix = prefix[:-1] if prefix[-1:] == ":" else prefix + self._config.category_prefix = prefix @property def user_agent(self) -> str: """str: User agent string Note: If using in as part of another project, this should be changed""" - return self._user_agent + return self._config.user_agent @user_agent.setter def user_agent(self, user_agent: str): """Set the new user agent string Note: Will need to re-log into the MediaWiki if user agent string is changed""" - self._user_agent = user_agent - self._reset_session() + self._config.user_agent = user_agent + if self._config._reset_session: + self._reset_session() @property def api_url(self) -> str: @@ -294,7 +275,7 @@ def api_url(self) -> str: Note: Not settable; See :py:func:`mediawiki.MediaWiki.set_api_url`""" - return self._api_url + return self._config.api_url @property def memoized(self) -> Dict[Any, Any]: @@ -308,14 +289,12 @@ def memoized(self) -> Dict[Any, Any]: @property def refresh_interval(self) -> Optional[int]: """int: The interval at which the memoize cache is to be refresh""" - return self._refresh_interval + return self._config.refresh_interval @refresh_interval.setter def refresh_interval(self, refresh_interval: int): """Set the new cache refresh interval""" - self._refresh_interval = ( - refresh_interval if isinstance(refresh_interval, int) and refresh_interval > 0 else None - ) + self._config.refresh_interval = refresh_interval def login(self, username: str, password: str, strict: bool = True) -> bool: """Login as specified user @@ -381,10 +360,10 @@ def set_api_url( :py:func:`mediawiki.exceptions.MediaWikiAPIURLError`: if the \ url is not a valid MediaWiki site or login fails """ - old_api_url = self._api_url - old_lang = self._lang - self._lang = lang.lower() - self._api_url = api_url.format(lang=self._lang) + old_api_url = self._config.api_url + old_lang = self._config.lang + self._config.lang = lang.lower() + self._config.api_url = api_url.format(lang=self._config.lang) self._is_logged_in = False try: @@ -395,8 +374,8 @@ def set_api_url( self.__available_languages = None # reset this except (rex.ConnectTimeout, MediaWikiException) as exc: # reset api url and lang in the event that the exception was caught - self._api_url = old_api_url - self._lang = old_lang + self._config.api_url = old_api_url + self._config.lang = old_lang raise MediaWikiAPIURLError(api_url) from exc self.clear_memoized() @@ -405,12 +384,12 @@ def _reset_session(self): if self._session: self._session.close() - headers = {"User-Agent": self._user_agent} + headers = {"User-Agent": self._config.user_agent} self._session = requests.Session() self._session.headers.update(headers) - if self._proxies is not None: - self._session.proxies.update(self._proxies) - self._session.verify = self._verify_ssl + if self._config.proxies is not None: + self._session.proxies.update(self._config.proxies) + self._session.verify = self._config.verify_ssl self._is_logged_in = False def clear_memoized(self): @@ -852,17 +831,17 @@ def wiki_request(self, params: Dict[str, Any]) -> Dict[Any, Any]: if "action" not in params: params["action"] = "query" - limit = self._rate_limit - last_call = self._rate_limit_last_call - if limit and last_call and last_call + self._min_wait > datetime.now(): + limit = self._config.rate_limit + last_call = self._config._rate_limit_last_call + if limit and last_call and last_call + self._config.rate_limit_min_wait > datetime.now(): # call time to quick for rate limited api requests, wait - wait_time = (last_call + self._min_wait) - datetime.now() + wait_time = (last_call + self._config.rate_limit_min_wait) - datetime.now() time.sleep(wait_time.total_seconds()) req = self._get_response(params) - if self._rate_limit: - self._rate_limit_last_call = datetime.now() + if self._config.rate_limit: + self._config._rate_limit_last_call = datetime.now() return req @@ -993,7 +972,7 @@ def _get_response(self, params: Dict[str, Any]) -> Dict[str, Any]: """wrap the call to the requests package""" try: if self._session is not None: - return self._session.get(self._api_url, params=params, timeout=self._timeout).json() + return self._session.get(self._config.api_url, params=params, timeout=self._config.timeout).json() return {} except JSONDecodeError: return {} @@ -1002,7 +981,7 @@ def _post_response(self, params: Dict[str, Any]) -> Dict[str, Any]: """wrap a post call to the requests package""" try: if self._session is not None: - return self._session.post(self._api_url, data=params, timeout=self._timeout).json() + return self._session.post(self._config.api_url, data=params, timeout=self._config.timeout).json() return {} except JSONDecodeError: return {} diff --git a/mediawiki/utilities.py b/mediawiki/utilities.py index b35e791..4f232b1 100644 --- a/mediawiki/utilities.py +++ b/mediawiki/utilities.py @@ -30,8 +30,8 @@ def memoize(func: Callable) -> Callable: def wrapper(*args, **kwargs): """wrap it up and store info in a cache""" cache = args[0].memoized - refresh = args[0].refresh_interval - use_cache = args[0].use_cache + refresh = args[0]._config.refresh_interval + use_cache = args[0]._config.use_cache # short circuit if not using cache if use_cache is False: diff --git a/tests/mediawiki_test.py b/tests/mediawiki_test.py index a0b9bad..0db6e62 100644 --- a/tests/mediawiki_test.py +++ b/tests/mediawiki_test.py @@ -203,7 +203,7 @@ def test_rate_limit(self): site = MediaWikiOverloaded() site.rate_limit = True self.assertEqual(site.rate_limit, True) - self.assertEqual(site._rate_limit_last_call, None) + self.assertEqual(site._config._rate_limit_last_call, None) self.assertEqual(site.rate_limit_min_wait, timedelta(milliseconds=50)) def test_rate_limit_min_wait(self): @@ -211,15 +211,15 @@ def test_rate_limit_min_wait(self): site = MediaWikiOverloaded() site.rate_limit_min_wait = timedelta(milliseconds=150) self.assertEqual(site.rate_limit, False) - self.assertEqual(site._rate_limit_last_call, None) + self.assertEqual(site._config._rate_limit_last_call, None) self.assertEqual(site.rate_limit_min_wait, timedelta(milliseconds=150)) def test_rate_limit_min_wait_reset(self): """test setting rate limiting""" site = MediaWikiOverloaded(rate_limit=True) - self.assertNotEqual(site._rate_limit_last_call, None) # should be set + self.assertNotEqual(site._config._rate_limit_last_call, None) # should be set site.rate_limit_min_wait = timedelta(milliseconds=150) - self.assertEqual(site._rate_limit_last_call, None) + self.assertEqual(site._config._rate_limit_last_call, None) self.assertEqual(site.rate_limit, True) self.assertEqual(site.rate_limit_min_wait, timedelta(milliseconds=150)) @@ -1026,16 +1026,16 @@ class TestMediaWikiRequests(unittest.TestCase): def test_wiki_request(self): """test wiki request by testing the timing....""" site = MediaWikiOverloaded() - # self.assertEqual(site._rate_limit_last_call, None) + # self.assertEqual(site._config._rate_limit_last_call, None) site.rate_limit = True site.rate_limit_min_wait = timedelta(seconds=2) site.search("chest set") - start_time = site._rate_limit_last_call + start_time = site._config._rate_limit_last_call site.opensearch("new york") site.prefixsearch("ar") - end_time = site._rate_limit_last_call + end_time = site._config._rate_limit_last_call self.assertGreater(end_time - start_time, timedelta(seconds=2)) - self.assertNotEqual(site._rate_limit_last_call, None) + self.assertNotEqual(site._config._rate_limit_last_call, None) class TestMediaWikiPage(unittest.TestCase): From 78862840139670bdf9975246f9b1624d89cb0912 Mon Sep 17 00:00:00 2001 From: m29538 Date: Mon, 8 Jan 2024 17:05:33 -0500 Subject: [PATCH 03/10] fix configuration and tests --- docs/source/conf.py | 5 +++-- mediawiki/configuraton.py | 11 +++++++---- mediawiki/mediawiki.py | 5 +++-- scripts/generate_test_data.py | 4 ++-- setup.py | 2 +- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 3b0dbdc..8cabad6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -15,6 +15,7 @@ import os import sys +from typing import Dict, List # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -88,7 +89,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = [] +exclude_patterns: List[str] = [] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -224,7 +225,7 @@ # -- Options for LaTeX output --------------------------------------------- -latex_elements = { +latex_elements: Dict[str, str] = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). diff --git a/mediawiki/configuraton.py b/mediawiki/configuraton.py index f0a337e..89cb066 100644 --- a/mediawiki/configuraton.py +++ b/mediawiki/configuraton.py @@ -98,14 +98,17 @@ def lang(self) -> str: @lang.setter def lang(self, lang: str): - lang = lang.lower() - if self._lang == lang: + print(f"lang setter: {lang}") + t_lang = lang.lower() + print(f"t_lang setter: {t_lang}") + if self._lang == t_lang: + print("non-change") return url = self._api_url - tmp = url.replace(f"/{self._lang}.", f"/{lang}.") + tmp = url.replace(f"/{self._lang}.", f"/{t_lang}.") self.api_url = tmp - self._lang = lang + self._lang = t_lang self._clear_memoized - True @property diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index 87f9580..513703e 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -78,7 +78,7 @@ def __init__( url.format(lang=lang.lower()) self._config = Configuration( lang=lang, - api_url=url.format(lang=lang), + api_url=url.format(lang=lang.lower()), category_prefix=cat_prefix, timeout=timeout, proxies=proxies, @@ -235,7 +235,8 @@ def language(self) -> str: @language.setter def language(self, lang: str): """Set the language to use; attempts to change the API URL""" - self._config.lang == lang + print(f"language setter: {lang}") + self._config.lang = lang if self._config._clear_memoized: self.clear_memoized() self._config._clear_memoized = False diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index f5c2e3d..0eb5de7 100644 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -269,12 +269,12 @@ def _post_response(self, params): if PULL_ALL is True or PULL_CATEGORYTREE is True: site.rate_limit = True - ct = site.categorytree(["Chess", "Ebola"], depth=None) + ct = site.categorytree(["Chess", "Ebola"], depth=None) # type: ignore with open(CATTREE_FILE, "w") as fp: json.dump(ct, fp, ensure_ascii=False, sort_keys=True) try: - site.categorytree("Chess Ebola", depth=None) + site.categorytree("Chess Ebola", depth=None) # type: ignore except Exception as ex: responses[site.api_url]["missing_categorytree"] = str(ex) site.rate_limit = False diff --git a/setup.py b/setup.py index 6068493..0936043 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,3 @@ -from setuptools import setup +from setuptools import setup # type: ignore setup() From 59d89ded92cab0c6e4a7cd4e2f8e07816a041dc8 Mon Sep 17 00:00:00 2001 From: m29538 Date: Mon, 8 Jan 2024 17:37:27 -0500 Subject: [PATCH 04/10] remove print statements --- mediawiki/configuraton.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/mediawiki/configuraton.py b/mediawiki/configuraton.py index 89cb066..45d8205 100644 --- a/mediawiki/configuraton.py +++ b/mediawiki/configuraton.py @@ -98,11 +98,8 @@ def lang(self) -> str: @lang.setter def lang(self, lang: str): - print(f"lang setter: {lang}") t_lang = lang.lower() - print(f"t_lang setter: {t_lang}") if self._lang == t_lang: - print("non-change") return url = self._api_url tmp = url.replace(f"/{self._lang}.", f"/{t_lang}.") From 536c838825797b22504017e6b5bfdc244e927a82 Mon Sep 17 00:00:00 2001 From: m29538 Date: Mon, 8 Jan 2024 18:06:06 -0500 Subject: [PATCH 05/10] minor cleanup --- mediawiki/mediawiki.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index 513703e..9ba409f 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -81,18 +81,21 @@ def __init__( api_url=url.format(lang=lang.lower()), category_prefix=cat_prefix, timeout=timeout, - proxies=proxies, user_agent=user_agent, + proxies=proxies, verify_ssl=verify_ssl, rate_limit=rate_limit, rate_limit_wait=rate_limit_wait, + username=username, + password=password, + # refresh_interval=None, + # use_cache=True, ) # requests library parameters self._session: Optional[requests.Session] = None - # set libary parameters - + # reset libary parameters self._extensions = None self._api_version = None self._api_version_str = None @@ -105,8 +108,8 @@ def __init__( # for login information self._is_logged_in = False - if password is not None and username is not None: - self.login(username, password) + if self._config.username is not None and self._config.password is not None: + self.login(self._config.username, self._config.password) try: self._get_site_info() @@ -170,7 +173,6 @@ def proxies(self, proxies: Optional[Dict]): self._config.proxies = proxies if self._config._reset_session: self._reset_session() - self._config._reset_session = False @property def use_cache(self) -> bool: @@ -220,7 +222,6 @@ def verify_ssl(self, verify_ssl: Union[bool, str]): self._config.verify_ssl = verify_ssl if self._config._reset_session: self._reset_session() - self._config._reset_session = False @property def language(self) -> str: @@ -239,7 +240,6 @@ def language(self, lang: str): self._config.lang = lang if self._config._clear_memoized: self.clear_memoized() - self._config._clear_memoized = False @property def category_prefix(self) -> str: @@ -365,11 +365,12 @@ def set_api_url( old_lang = self._config.lang self._config.lang = lang.lower() self._config.api_url = api_url.format(lang=self._config.lang) - + self._config.username = username + self._config.password = password self._is_logged_in = False try: - if username is not None and password is not None: - self.login(username, password) + if self._config.username is not None and self._config.password is not None: + self.login(self._config.username, self._config.password) self._get_site_info() self.__supported_languages = None # reset this self.__available_languages = None # reset this @@ -391,12 +392,15 @@ def _reset_session(self): if self._config.proxies is not None: self._session.proxies.update(self._config.proxies) self._session.verify = self._config.verify_ssl + self._is_logged_in = False + self._config._reset_session = False def clear_memoized(self): """Clear memoized (cached) values""" if hasattr(self, "_cache"): self._cache.clear() + self._config._clear_memoized = False # non-setup functions @property From 86c4473ad50b8e2973ab9b8ede3ec6a361588b11 Mon Sep 17 00:00:00 2001 From: barrust Date: Wed, 10 Jan 2024 09:09:17 -0500 Subject: [PATCH 06/10] updates for pylint --- .pylintrc | 6 ++-- mediawiki/configuraton.py | 65 ++++++++++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/.pylintrc b/.pylintrc index ca08a93..2c555b7 100644 --- a/.pylintrc +++ b/.pylintrc @@ -151,7 +151,9 @@ disable=raw-checker-failed, suppressed-message, useless-suppression, deprecated-pragma, - use-symbolic-message-instead + use-symbolic-message-instead, + too-many-arguments, + protected-access, # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option @@ -598,7 +600,7 @@ variable-naming-style=snake_case [EXCEPTIONS] # Exceptions that will emit a warning when caught. -overgeneral-exceptions=builtins.BaseException, builtins.Exception +overgeneral-exceptions=mediawiki.exceptions.BaseException,builtins.Exception [LOGGING] diff --git a/mediawiki/configuraton.py b/mediawiki/configuraton.py index 45d8205..8bc239d 100644 --- a/mediawiki/configuraton.py +++ b/mediawiki/configuraton.py @@ -1,4 +1,5 @@ -from dataclasses import dataclass, field +"""Configuration module""" +from dataclasses import asdict, dataclass, field from datetime import datetime, timedelta from typing import Dict, Optional, Union @@ -8,6 +9,8 @@ @dataclass class Configuration: + """Configuration class""" + _lang: str = field(default="en", init=False, repr=False) _api_url: str = field(default="https://en.wikipedia.org/w/api.php", init=False, repr=False) _category_prefix: str = field(default="Category", init=False, repr=False) @@ -84,9 +87,10 @@ def __init__( self.timeout = timeout def __repr__(self): + """repr""" keys = [ x.replace("_", "", 1) - for x in sorted(self.__dataclass_fields__.keys()) + for x in sorted(asdict(self).keys()) if x not in ["_login", "_rate_limit_last_call", "_clear_memoized", "_reset_session"] ] full = [f"{x}={self.__getattribute__(x)}" for x in keys] @@ -94,22 +98,32 @@ def __repr__(self): @property def lang(self) -> str: + """str: The API URL language, if possible this will update the API URL + + Note: + Use correct language titles with the updated API URL + Note: + Some API URLs do not encode language; unable to update if this is the case""" return self._lang @lang.setter - def lang(self, lang: str): - t_lang = lang.lower() - if self._lang == t_lang: + def lang(self, language: str): + """Set the language to use; attempts to change the API URL""" + if self._lang == language.lower(): return url = self._api_url - tmp = url.replace(f"/{self._lang}.", f"/{t_lang}.") + tmp = url.replace(f"/{self._lang}.", f"/{language.lower()}.") self.api_url = tmp - self._lang = t_lang + self._lang = language.lower() self._clear_memoized - True @property def api_url(self) -> str: + """str: API URL of the MediaWiki site + + Note: + Not settable; See :py:func:`mediawiki.MediaWiki.set_api_url`""" return self._api_url @api_url.setter @@ -122,26 +136,41 @@ def api_url(self, api_url: str): @property def category_prefix(self) -> str: + """str: The category prefix to use when using category based functions + + Note: + Use the correct category name for the language selected""" return self._category_prefix @category_prefix.setter def category_prefix(self, category_prefix: str): + """Set the category prefix correctly""" self._category_prefix = category_prefix[:-1] if category_prefix[-1:] == ":" else category_prefix @property def user_agent(self) -> str: + """str: User agent string + + Note: + If using in as part of another project, this should be changed""" return self._user_agent @user_agent.setter def user_agent(self, user_agent: str): + """Set the new user agent string + + Note: + Will need to re-log into the MediaWiki if user agent string is changed""" self._user_agent = user_agent @property def proxies(self) -> Optional[Dict]: + """dict: Turn on, off, or set proxy use with the Requests library""" return self._proxies @proxies.setter def proxies(self, proxies: Optional[Dict]): + """Turn on, off, or set proxy use through the Requests library""" self._proxies = proxies if isinstance(proxies, dict) else None # reset session @@ -149,10 +178,12 @@ def proxies(self, proxies: Optional[Dict]): @property def verify_ssl(self) -> Union[bool, str]: + """bool | str: Verify SSL when using requests or path to cert file""" return self._verify_ssl @verify_ssl.setter def verify_ssl(self, verify_ssl: Union[bool, str, None]): + """Set request verify SSL parameter; defaults to True if issue""" self._verify_ssl = verify_ssl if isinstance(verify_ssl, (bool, str)) else True # reset session @@ -160,65 +191,85 @@ def verify_ssl(self, verify_ssl: Union[bool, str, None]): @property def rate_limit(self) -> bool: + """bool: Turn on or off Rate Limiting""" return self._rate_limit @rate_limit.setter def rate_limit(self, rate_limit: bool): + """Turn on or off rate limiting""" self._rate_limit = bool(rate_limit) self._rate_limit_last_call = None self._clear_memoized = True @property def rate_limit_min_wait(self) -> timedelta: + """timedelta: Time to wait between calls + + Note: + Only used if rate_limit is **True**""" return self._rate_limit_min_wait @rate_limit_min_wait.setter def rate_limit_min_wait(self, min_wait: timedelta): + """Set minimum wait to use for rate limiting""" self._rate_limit_min_wait = min_wait self._rate_limit_last_call = None @property def username(self) -> Optional[str]: + """str | None: Username to use to log into the mediawiki site""" return self._username @username.setter def username(self, username: Optional[str]): + """set the username, if needed, to log into the mediawiki site""" self._username = username if self.username and self.password: self._login = True @property def password(self) -> Optional[str]: + """str | None: Password to use to log into the mediawiki site""" return self._password @password.setter def password(self, password: Optional[str]): + """set the password, if needed, to log into the mediawiki site""" self._password = password if self.username and self.password: self._login = True @property def refresh_interval(self) -> Optional[int]: + """int | None: The interval at which the memoize cache is to be refresh""" return self._refresh_interval @refresh_interval.setter def refresh_interval(self, refresh_interval: Optional[int]): + "Set the new cache refresh interval" "" self._refresh_interval = ( refresh_interval if isinstance(refresh_interval, int) and refresh_interval > 0 else None ) @property def use_cache(self) -> bool: + """bool: Whether caching should be used; on (**True**) or off (**False**)""" return self._use_cache @use_cache.setter def use_cache(self, use_cache: bool): + """toggle using the cache or not""" self._use_cache = bool(use_cache) @property def timeout(self) -> Optional[float]: + """float: Response timeout for API requests + + Note: + Use **None** for no response timeout""" return self._timeout @timeout.setter def timeout(self, timeout: Optional[float]): + """Set request timeout in seconds (or fractions of a second)""" self._timeout = None if timeout is None else float(timeout) From 1ef724cfc4b5acf0a98e08ff6fbec63c2abc55ce Mon Sep 17 00:00:00 2001 From: barrust Date: Wed, 10 Jan 2024 09:17:34 -0500 Subject: [PATCH 07/10] minor changes --- mediawiki/configuraton.py | 7 ++++--- mediawiki/mediawiki.py | 9 +++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/mediawiki/configuraton.py b/mediawiki/configuraton.py index 8bc239d..8f78d83 100644 --- a/mediawiki/configuraton.py +++ b/mediawiki/configuraton.py @@ -109,13 +109,14 @@ def lang(self) -> str: @lang.setter def lang(self, language: str): """Set the language to use; attempts to change the API URL""" - if self._lang == language.lower(): + t_lang = language.lower() + if self._lang == t_lang: return url = self._api_url - tmp = url.replace(f"/{self._lang}.", f"/{language.lower()}.") + tmp = url.replace(f"/{self._lang}.", f"/{t_lang}.") self.api_url = tmp - self._lang = language.lower() + self._lang = t_lang self._clear_memoized - True @property diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index 9ba409f..5ebda2f 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -88,8 +88,8 @@ def __init__( rate_limit_wait=rate_limit_wait, username=username, password=password, - # refresh_interval=None, - # use_cache=True, + refresh_interval=None, + use_cache=True, ) # requests library parameters @@ -114,7 +114,7 @@ def __init__( try: self._get_site_info() except MediaWikiException as exc: - raise MediaWikiAPIURLError(url) from exc + raise MediaWikiAPIURLError(self._config.api_url) from exc # non-settable properties @property @@ -853,8 +853,9 @@ def wiki_request(self, params: Dict[str, Any]) -> Dict[Any, Any]: # Protected functions def _get_site_info(self): """Parse out the Wikimedia site information including API Version and Extensions""" - response = self.wiki_request({"meta": "siteinfo", "siprop": "extensions|general"}) + response = self.wiki_request({"meta": "siteinfo", "siprop": "extensions|general"}) + print(response) # parse what we need out here! query = response.get("query", None) if query is None or query.get("general", None) is None: From cc7d86703996b56d27c530cb0825459b5fd9700d Mon Sep 17 00:00:00 2001 From: barrust Date: Wed, 10 Jan 2024 09:38:11 -0500 Subject: [PATCH 08/10] set session --- mediawiki/configuraton.py | 9 ++++----- mediawiki/mediawiki.py | 5 +++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/mediawiki/configuraton.py b/mediawiki/configuraton.py index 8f78d83..b5fd197 100644 --- a/mediawiki/configuraton.py +++ b/mediawiki/configuraton.py @@ -109,15 +109,14 @@ def lang(self) -> str: @lang.setter def lang(self, language: str): """Set the language to use; attempts to change the API URL""" - t_lang = language.lower() - if self._lang == t_lang: + if self._lang == language.lower(): return url = self._api_url - tmp = url.replace(f"/{self._lang}.", f"/{t_lang}.") + tmp = url.replace(f"/{self._lang}.", f"/{language.lower()}.") self.api_url = tmp - self._lang = t_lang - self._clear_memoized - True + self._lang = language.lower() + self._clear_memoized = True @property def api_url(self) -> str: diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index 5ebda2f..b4d9f04 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -106,6 +106,8 @@ def __init__( # for memoized results self._cache: Dict = {} + self._reset_session() + # for login information self._is_logged_in = False if self._config.username is not None and self._config.password is not None: @@ -236,7 +238,6 @@ def language(self) -> str: @language.setter def language(self, lang: str): """Set the language to use; attempts to change the API URL""" - print(f"language setter: {lang}") self._config.lang = lang if self._config._clear_memoized: self.clear_memoized() @@ -855,7 +856,7 @@ def _get_site_info(self): """Parse out the Wikimedia site information including API Version and Extensions""" response = self.wiki_request({"meta": "siteinfo", "siprop": "extensions|general"}) - print(response) + # parse what we need out here! query = response.get("query", None) if query is None or query.get("general", None) is None: From a0c9015652127f75758c5ceca4c0b985361d2a4d Mon Sep 17 00:00:00 2001 From: barrust Date: Wed, 10 Jan 2024 18:20:14 -0500 Subject: [PATCH 09/10] minor changes --- mediawiki/mediawiki.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index b4d9f04..6eb7344 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -93,7 +93,7 @@ def __init__( ) # requests library parameters - self._session: Optional[requests.Session] = None + self._session: requests.Session = requests.Session() # reset libary parameters self._extensions = None @@ -198,7 +198,6 @@ def rate_limit_min_wait(self) -> timedelta: def rate_limit_min_wait(self, min_wait: timedelta): """Set minimum wait to use for rate limiting""" self._config.rate_limit_min_wait = min_wait - self._config._rate_limit_last_call = None @property def timeout(self) -> Optional[float]: @@ -978,18 +977,14 @@ def __cat_tree_rec( def _get_response(self, params: Dict[str, Any]) -> Dict[str, Any]: """wrap the call to the requests package""" try: - if self._session is not None: - return self._session.get(self._config.api_url, params=params, timeout=self._config.timeout).json() - return {} + return self._session.get(self._config.api_url, params=params, timeout=self._config.timeout).json() except JSONDecodeError: return {} def _post_response(self, params: Dict[str, Any]) -> Dict[str, Any]: """wrap a post call to the requests package""" try: - if self._session is not None: - return self._session.post(self._config.api_url, data=params, timeout=self._config.timeout).json() - return {} + return self._session.post(self._config.api_url, data=params, timeout=self._config.timeout).json() except JSONDecodeError: return {} From 655d8d573e6167ad68a54e595de52c20c507a3c7 Mon Sep 17 00:00:00 2001 From: barrust Date: Wed, 10 Jan 2024 20:52:29 -0500 Subject: [PATCH 10/10] additional tests --- CHANGELOG.md | 5 +++++ mediawiki/configuraton.py | 7 +------ mediawiki/mediawiki.py | 1 + tests/mediawiki_test.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da6b872..5b5cf73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # MediaWiki Changelog +## Version 0.7.5 + +* Move configuration items to a configuration data class + * Will allow for the deprication of some top level properties in lieu of changing against the `Configuration` class + ## Version 0.7.4 * Add typing support diff --git a/mediawiki/configuraton.py b/mediawiki/configuraton.py index b5fd197..2493b81 100644 --- a/mediawiki/configuraton.py +++ b/mediawiki/configuraton.py @@ -29,7 +29,6 @@ class Configuration: _reset_session: bool = field(default=True, init=False, repr=False) _clear_memoized: bool = field(default=False, init=False, repr=False) _rate_limit_last_call: Optional[datetime] = field(default=None, init=False, repr=False) - _login: bool = field(default=False, init=False, repr=False) def __init__( self, @@ -91,7 +90,7 @@ def __repr__(self): keys = [ x.replace("_", "", 1) for x in sorted(asdict(self).keys()) - if x not in ["_login", "_rate_limit_last_call", "_clear_memoized", "_reset_session"] + if x not in ["_rate_limit_last_call", "_clear_memoized", "_reset_session"] ] full = [f"{x}={self.__getattribute__(x)}" for x in keys] return f"Configuration({', '.join(full)})" @@ -224,8 +223,6 @@ def username(self) -> Optional[str]: def username(self, username: Optional[str]): """set the username, if needed, to log into the mediawiki site""" self._username = username - if self.username and self.password: - self._login = True @property def password(self) -> Optional[str]: @@ -236,8 +233,6 @@ def password(self) -> Optional[str]: def password(self, password: Optional[str]): """set the password, if needed, to log into the mediawiki site""" self._password = password - if self.username and self.password: - self._login = True @property def refresh_interval(self) -> Optional[int]: diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index 6eb7344..59ee873 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -335,6 +335,7 @@ def login(self, username: str, password: str, strict: bool = True) -> bool: res = self._post_response(params) if res["login"]["result"] == "Success": self._is_logged_in = True + self._config._login = False return True self._is_logged_in = False reason = res["login"]["reason"] diff --git a/tests/mediawiki_test.py b/tests/mediawiki_test.py index 0db6e62..9c003f7 100644 --- a/tests/mediawiki_test.py +++ b/tests/mediawiki_test.py @@ -37,6 +37,10 @@ def __init__( rate_limit_wait=timedelta(milliseconds=50), cat_prefix="Category", user_agent=None, + username=None, + password=None, + proxies=None, + verify_ssl=True, ): """new init""" @@ -55,8 +59,15 @@ def __init__( rate_limit_wait=rate_limit_wait, cat_prefix=cat_prefix, user_agent=user_agent, + username=username, + password=password, + proxies=proxies, + verify_ssl=verify_ssl, ) + def __repr__(self): + return super().__repr__() + def _get_response(self, params): """override the __get_response method""" new_params = json.dumps(tuple(sorted(params.items()))) @@ -151,6 +162,18 @@ def test_extensions(self): response = site.responses[site.api_url] self.assertEqual(site.extensions, response["extensions"]) + def test_repr_function(self): + """test the config repr function""" + site = MediaWikiOverloaded() + res = ( + "Configuration(api_url=https://en.wikipedia.org/w/api.php, category_prefix=Category, " + "lang=en, password=None, proxies=None, rate_limit=False, rate_limit_min_wait=0:00:00.050000, " + "refresh_interval=None, timeout=15.0, use_cache=True, " + "user_agent=python-mediawiki/VERSION-0.7.4/(https://github.com/barrust/mediawiki)/BOT, username=None, verify_ssl=True)" + ) + print(str(site._config)) + self.assertEqual(str(site._config), res) + def test_change_api_url(self): """test switching the api url""" site = MediaWikiOverloaded() @@ -347,6 +370,11 @@ def test_successful_login(self): self.assertEqual(site.logged_in, True) self.assertEqual(res, True) + def test_successful_login_on_load(self): + """test login success on load!""" + site = MediaWikiOverloaded(username="username", password="fakepassword") + self.assertEqual(site.logged_in, True) + def test_failed_login(self): """test that login failure throws the correct exception""" site = MediaWikiOverloaded()