diff --git a/.claude/skills/mwclient/SKILL.md b/.claude/skills/mwclient/SKILL.md new file mode 100644 index 0000000..a4bafde --- /dev/null +++ b/.claude/skills/mwclient/SKILL.md @@ -0,0 +1,120 @@ +--- +name: mwclient +description: | + Python client for the MediaWiki API. Provides a high-level interface for + interacting with wikis (like Wikipedia). Core abstractions: Site (connection), + Page (wiki pages), and Image (files). + + Covers: connecting to wikis, reading/editing pages, uploading files, querying + lists, authentication (OAuth, login), and error handling. + + Use when: automating wiki edits, scraping wiki content, uploading files, + or building bots for MediaWiki instances. +user-invocable: true +--- + +# mwclient + +Python client for the MediaWiki API. + +## Installation + +```bash +pip install mwclient +``` + +## Quick Start + +```python +import mwclient + +# Connect to a wiki +site = mwclient.Site('en.wikipedia.org') + +# Read a page +page = site.pages['Python (programming language)'] +print(page.text()) + +# Edit a page (requires login) +site.login('username', 'password') +page.edit('New content', summary='Updated via mwclient') +``` + +## Sub-Skills + +| Skill | Description | +| ------------------- | ------------------------------------------------------- | +| [site](site/) | Site connection, authentication, and site-level queries | +| [page](page/) | Page operations: read, edit, move, delete, listings | +| [images](images/) | File/image operations: download, upload, metadata | +| [listing](listing/) | Pagination and iteration over API results | +| [errors](errors/) | Exception handling and error types | + +## Core Concepts + +### Site + +The `Site` class is the entry point. It manages the HTTP session, handles authentication, and provides access to pages. + +### Page + +The `Page` class represents a wiki page. Use it to read content, make edits, and query page relationships (links, categories, backlinks). + +### Image + +The `Image` class (subclass of `Page`) handles file operations. Use it to download files or check file usage. + +### Listings + +All listing methods return lazy iterators that fetch data in chunks. Use `max_items` to limit results and `api_chunk_size` to control chunk size. + +## Common Patterns + +### Connect with authentication + +```python +site = mwclient.Site('mywiki.org') +site.clientlogin(username='bot', password='secret') +``` + +### Read and edit + +```python +page = site.pages['Target Page'] +text = page.text() +page.edit(text + '\nNew line', summary='Added line') +``` + +### Iterate search results + +```python +for page in site.search('python', namespace=0, max_items=10): + print(page.name) +``` + +### Upload a file + +```python +with open('image.png', 'rb') as f: + site.upload(f, filename='image.png', description='My image') +``` + +## Error Handling + +Catch `mwclient.errors.MwClientError` for all library errors, or specific exceptions: + +```python +from mwclient import errors + +try: + page.edit('content') +except errors.ProtectedPageError: + print('Page is protected') +except errors.AssertUserFailedError: + print('Not logged in') +``` + +## Resources + +- Documentation: [ mwclient.readthedocs.io ](https://mwclient.readthedocs.io) +- Source: [github.com/mwclient/mwclient](https://github.com/mwclient/mwclient) diff --git a/.claude/skills/mwclient/errors/SKILL.md b/.claude/skills/mwclient/errors/SKILL.md new file mode 100644 index 0000000..5d45fae --- /dev/null +++ b/.claude/skills/mwclient/errors/SKILL.md @@ -0,0 +1,338 @@ +--- +name: errors +description: | + Handle mwclient exceptions and error conditions. + Covers the exception hierarchy (MwClientError, APIError, EditError, LoginError, + InsufficientPermission, ProtectedPageError, etc.) and proper catching patterns. + + Use when: troubleshooting failures, implementing error handling, or distinguishing + between permission errors, edit conflicts, and API errors. +user-invocable: true +--- + +# Error Handling Skill + +## Overview + +This skill covers exception handling in mwclient. All mwclient-specific exceptions inherit from `MwClientError`, which extends Python's built-in `RuntimeError`. This allows catching all library errors with a single `except MwClientError` clause. + +**Key Files:** + +- `mwclient/errors.py` - Exception class definitions + +## Exception Hierarchy + +``` +RuntimeError +└── MwClientError (base for all mwclient errors) + ├── MediaWikiVersionError + ├── APIDisabledError + ├── MaximumRetriesExceeded + ├── APIError + ├── InsufficientPermission + │ └── UserBlocked + ├── EditError + │ ├── ProtectedPageError (also inherits from InsufficientPermission) + │ └── FileExists + ├── LoginError + │ └── OAuthAuthorizationError + ├── AssertUserFailedError + ├── EmailError + │ └── NoSpecifiedEmail + ├── InvalidResponse + └── InvalidPageTitle +``` + +## Exception Groups + +| Group | Base Class | Covers | +| ------------------------ | ------------------------ | ---------------------------------------------- | +| Connection/compatibility | `MwClientError` | Version errors, API disabled, invalid response | +| API-level errors | `APIError` | Any error object in API JSON response | +| Permission errors | `InsufficientPermission` | Blocked users, protected pages, missing rights | +| Edit errors | `EditError` | Failed edits, captcha, file conflicts | +| Authentication errors | `LoginError` | Login failure, OAuth rejection | + +## Exception Reference + +### MwClientError + +Base class for the entire hierarchy. Catching `MwClientError` catches all library-specific errors. + +```python +try: + # mwclient operation + pass +except mwclient.errors.MwClientError as e: + print(f"Mwclient error: {e}") +``` + +### APIError + +Raised when the MediaWiki API response contains a top-level `"error"` key. + +**Attributes:** + +| Attribute | Type | Description | +| --------- | --------------- | ----------------------------------------------- | +| `code` | `Optional[str]` | The `error.code` field from API response | +| `info` | `str` | The `error.info` field (human-readable message) | +| `kwargs` | `Optional[Any]` | Full raw error dict from response | + +```python +try: + site.post('edit', title='Page', text='Content') +except mwclient.errors.APIError as e: + print(f"API error {e.code}: {e.info}") +``` + +### MediaWikiVersionError + +Raised during site initialization when the MediaWiki version string cannot be parsed or is below the minimum supported (1.16). + +### APIDisabledError + +Raised by `site_init()` when the API response is plain text like `"MediaWiki API is not enabled for this site."` instead of valid JSON. + +### MaximumRetriesExceeded + +Raised when the retry count exceeds `max_retries`. This can occur at: + +- **Transport level** - 5xx errors or database lag retries exhausted +- **Application level** - `DBConnectionError` retries exhausted + +### InsufficientPermission + +Raised when the current user lacks a required permission. + +```python +try: + site.upload(file=f, filename='test.jpg') +except mwclient.errors.InsufficientPermission: + print("User cannot upload files") +``` + +### UserBlocked + +Subclass of `InsufficientPermission`. Raised when an action is attempted while the user account is blocked. + +```python +try: + page.edit('New content') +except mwclient.errors.UserBlocked as e: + print("User is blocked") +``` + +### EditError + +Base class for errors during page editing. Catching `EditError` also catches `ProtectedPageError` and `FileExists`. + +### ProtectedPageError + +Raised when an edit attempt returns protection-related error codes like `protectedpage`, `protectednamespace`, etc. + +**Multiple inheritance:** `ProtectedPageError` inherits from _both_ `EditError` and `InsufficientPermission`. + +**Attributes:** + +| Attribute | Type | Description | +| --------- | --------------- | ------------------------------------------- | +| `page` | `Page` | The page object on which edit was attempted | +| `code` | `Optional[str]` | API error code (e.g., `'protectedpage'`) | +| `info` | `Optional[str]` | API error info string | + +```python +try: + page.edit('Content') +except mwclient.errors.ProtectedPageError as e: + print(f"Cannot edit {e.page.name}: {e.info}") +``` + +### FileExists + +Subclass of `EditError`. Raised by `site.upload()` when a file already exists and `ignore=False`. + +**Attribute:** `file_name` - the name of the conflicting file. + +```python +try: + site.upload(file=f, filename='existing.jpg') +except mwclient.errors.FileExists as e: + print(f"File {e.file_name} exists") + # Retry with ignore=True to overwrite + site.upload(file=f, filename='existing.jpg', ignore=True) +``` + +### LoginError + +Raised by `site.login()` and `site.clientlogin()` when login fails (status `FAIL`). + +**Attributes:** + +| Attribute | Type | Description | +| --------- | --------------- | ----------------------------------------- | +| `site` | `Site` | The Site object where login was attempted | +| `code` | `Optional[str]` | Result/error code from response | +| `info` | `str` | Human-readable failure reason | + +```python +try: + site.login('user', 'pass') +except mwclient.errors.LoginError as e: + print(f"Login failed: {e.info}") +``` + +### OAuthAuthorizationError + +Subclass of `LoginError`. Raised when OAuth-authenticated request receives an authorization error. + +### AssertUserFailedError + +Raised when the API returns `assertuserfailed`. This means an edit was attempted without being logged in while `force_login=True` (the default). + +```python +try: + page.edit('Content') +except mwclient.errors.AssertUserFailedError: + print("Must be logged in to edit") +``` + +### EmailError / NoSpecifiedEmail + +`EmailError` is the base class for email failures. `NoSpecifiedEmail` is raised when attempting to email a user who hasn't set up an email address. + +### InvalidResponse + +Raised when the server returns a response that cannot be decoded as JSON. The raw text is stored in `response_text`. + +**Common cause:** Wrong hostname or path, or server redirecting to login page. + +```python +try: + site = mwclient.Site('invalid.host.example') +except mwclient.errors.InvalidResponse as e: + print(f"Invalid response: {e.response_text[:100]}") +``` + +### InvalidPageTitle + +Raised by `Page.__init__()` when the API returns an `"invalid"` key, indicating the title contains illegal characters. + +## Catching Patterns + +### Catch all mwclient errors + +```python +import mwclient.errors as errors + +try: + # Any mwclient operation + page = site.pages['Page'] + page.edit('Content') +except errors.MwClientError as e: + print(f"Mwclient error: {e}") +``` + +### Specific error handling + +```python +try: + page.edit('New content', summary='Edit') +except errors.ProtectedPageError: + print("Page is protected") +except errors.AssertUserFailedError: + print("Not logged in") +except errors.UserBlocked: + print("User is blocked") +except errors.EditError as e: + print(f"Other edit error: {e}") +``` + +### Permission-based handling + +```python +try: + page.delete() +except errors.InsufficientPermission: + print("Cannot delete page - insufficient rights") +``` + +### Upload error handling + +```python +try: + result = site.upload(file=f, filename='test.jpg') +except errors.InsufficientPermission: + print("No upload permission") +except errors.FileExists as e: + print(f"File {e.file_name} already exists") + # Option to overwrite + site.upload(file=f, filename='test.jpg', ignore=True) +except errors.APIError as e: + print(f"API error: {e.code} - {e.info}") +``` + +### Login error handling + +```python +try: + site.login('username', 'password') +except errors.LoginError as e: + print(f"Login failed: {e.info}") + # Check specific error codes + if e.code == 'Failed': + print("Invalid credentials") + elif e.code == 'Throttled': + print("Too many attempts - wait before retrying") +``` + +### Connection error handling + +```python +try: + site = mwclient.Site('example.org') +except errors.APIDisabledError: + print("API is disabled on this wiki") +except errors.MediaWikiVersionError: + print("MediaWiki version too old") +except errors.InvalidResponse as e: + print(f"Unexpected response: {e.response_text[:200]}") +except errors.MaximumRetriesExceeded: + print("Connection failed after maximum retries") +``` + +## Error Summary Table + +| Scenario | Exception to Catch | Key Attributes | +| ---------------------- | ------------------------ | ---------------------------------------------- | +| Edit on protected page | `ProtectedPageError` | `.page`, `.code`, `.info` | +| Not logged in | `AssertUserFailedError` | message in `.args[0]` | +| Any edit failure | `EditError` | catches `ProtectedPageError` and `FileExists` | +| Any permission failure | `InsufficientPermission` | catches `UserBlocked` and `ProtectedPageError` | +| Upload duplicate | `FileExists` | `.file_name` | +| Wrong credentials | `LoginError` | `.code`, `.info` | +| API returns error | `APIError` | `.code`, `.info` | +| Non-JSON response | `InvalidResponse` | `.response_text` | +| Retries exhausted | `MaximumRetriesExceeded` | — | +| API disabled | `APIDisabledError` | — | +| MW too old | `MediaWikiVersionError` | — | +| Invalid title | `InvalidPageTitle` | — | + +## TYPE_CHECKING Pattern + +The errors module uses `TYPE_CHECKING` to avoid circular imports: + +```python +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import mwclient.page + import mwclient.client +``` + +Type annotations use string literals (forward references): + +- `ProtectedPageError.__init__(self, page: 'mwclient.page.Page', ...)` +- `LoginError.__init__(self, site: 'mwclient.client.Site', ...)` + +This allows the module to be imported anywhere while still providing type information to static checkers. diff --git a/.claude/skills/mwclient/images/SKILL.md b/.claude/skills/mwclient/images/SKILL.md new file mode 100644 index 0000000..7be2f19 --- /dev/null +++ b/.claude/skills/mwclient/images/SKILL.md @@ -0,0 +1,314 @@ +--- +name: images +description: | + Work with mwclient Image objects and file upload functionality. + Covers image metadata (imageinfo), file history, usage tracking, downloading, + and uploading via Site.upload() including chunked uploads and stash mode. + + Use when: downloading files, checking file metadata, finding duplicate files, + uploading images, or checking where files are used. +user-invocable: true +--- + +# Image/File Operations Skill + +## Overview + +This skill covers the `Image` class and file upload functionality in mwclient. The `Image` class extends `Page` to handle file metadata, history, usage, and download operations. Uploading is handled via `Site.upload()`. + +**Key Files:** + +- `mwclient/image.py` - Image class implementation +- `mwclient/client.py` - Site.upload() method +- `mwclient/util.py` - Chunked upload utilities + +## The Image Class + +`Image` extends `Page` and is used for files in namespace 6 (`File:`). When any listing returns a page in namespace 6, the listing machinery dispatches to `Image` rather than plain `Page`. + +``` +Page +└── Image (adds file-specific methods and properties) +``` + +### Obtaining an Image Object + +```python +# Via site.images (namespace prefix omitted) +image = site.images['Example.jpg'] + +# Via site.pages (namespace prefix required) +image = site.pages['File:Example.jpg'] +``` + +> **Note:** `Image.exists` is `True` only if the file exists **locally** on the wiki. Files from shared repositories (e.g., Wikimedia Commons) return `False`. To test availability from any repository, check `image.imageinfo != {}`. + +## Image Properties + +At construction, `Image` requests `imageinfo` data from the API. The result is stored in: + +| Attribute | Type | Description | +| ----------------- | ------ | ---------------------------------------------------- | +| `imagerepository` | `str` | Where file is stored: `'local'`, `'shared'`, or `''` | +| `imageinfo` | `dict` | Metadata for the most recent file revision | + +### imageinfo Structure + +```python +{ + 'comment': 'Uploaded new version', + 'height': 178, + 'metadata': [{'name': 'MEDIAWIKI_EXIF_VERSION', 'value': 1}], + 'mime': 'image/jpeg', + 'sha1': 'd01b79a6781c72ac9bfff93e5e2cfbeef4efc840', + 'size': 9022, + 'timestamp': '2010-03-14T17:20:20Z', + 'url': 'https://upload.wikimedia.org/...Example.jpg', + 'user': 'SomeUser', + 'width': 172 +} +``` + +The `url` field is used by `download()` method. + +## Image Methods + +### imagehistory() + +Returns a `PageProperty` iterator yielding one dict per historical revision. + +```python +for rev in image.imagehistory(): + print(rev['timestamp'], rev['user'], rev['sha1']) +``` + +### imageusage() + +Returns a `List` (or `GeneratorList` when `generator=True`) of pages that embed or link to the file. + +```python +for page in image.imageusage(): + print(page.name) # Page objects + +for title in image.imageusage(generator=False): + print(title) # Title strings +``` + +| Parameter | Type | Default | Description | +| ---------------- | ---------- | ------- | ------------------------------------------------ | +| `namespace` | `int/None` | `None` | Restrict to namespace | +| `filterredir` | `str` | `'all'` | Filter: `'all'`, `'redirects'`, `'nonredirects'` | +| `redirect` | `bool` | `False` | Follow redirect links to file | +| `max_items` | `int/None` | `None` | Hard cap on results | +| `api_chunk_size` | `int/None` | `None` | Results per API call | +| `generator` | `bool` | `True` | Return `Page` objects or title strings | + +### duplicatefiles() + +Returns a `PageProperty` iterator listing other files with the same SHA-1 hash. + +```python +for dup in image.duplicatefiles(): + print(dup['name']) +``` + +| Parameter | Type | Default | Description | +| ---------------- | ---------- | ------- | -------------------- | +| `max_items` | `int/None` | `None` | Hard cap on results | +| `api_chunk_size` | `int/None` | `None` | Results per API call | + +### download(destination=None) + +Downloads the full-resolution file. + +```python +# Load into memory +data = image.download() + +# Stream to file (preferred for large files) +with open('local_copy.jpg', 'wb') as fd: + image.download(fd) +``` + +| Call Form | Return | Behavior | +| -------------------- | ------- | ------------------------------------------------------ | +| `image.download()` | `bytes` | Entire file loaded into memory | +| `image.download(fd)` | `None` | Streams content to file descriptor in 1024-byte chunks | + +## Uploading Files + +Uploading is done via `Site.upload()`, not the `Image` class. + +```python +site.upload( + file=None, + filename='', + description='', + comment='', + ignore=False, + url=None, + filekey=None, + stash=False, + asynchronous=False +) +``` + +### Upload Parameters + +| Parameter | Type | Description | +| -------------- | --------------- | -------------------------------------------------------- | +| `file` | `BinaryIO/None` | File object to upload | +| `filename` | `str` | Destination filename (without `File:` prefix) | +| `description` | `str` | File description (used as initial wikitext) | +| `comment` | `str` | Edit summary for upload log | +| `ignore` | `bool` | Proceed even if warnings exist (duplicate/existing file) | +| `url` | `str/None` | Fetch and upload from URL (server-side fetch) | +| `filekey` | `str/None` | Key of previously stashed upload to finalize | +| `stash` | `bool` | Stash file instead of publishing; returns `filekey` | +| `asynchronous` | `bool` | Perform finalization asynchronously | + +### Upload Modes + +1. **Direct upload** - Small files uploaded in single request +2. **Chunked upload** - Large files (> + chunk_size, default 1 MiB) uploaded in chunks +3. **Stash mode** - Upload bytes without publishing (two-phase workflow) +4. **URL upload** - Server fetches from provided URL +5. **From stash** - Finalize previously stashed upload + +### Upload Examples + +**Basic upload:** + +```python +with open('diagram.png', 'rb') as f: + result = site.upload( + file=f, + filename='MyDiagram.png', + description='Diagram showing system architecture', + comment='Initial upload', + ignore=True + ) +assert result['result'] == 'Success' +``` + +**Two-phase stash workflow:** + +```python +# Phase 1: Upload to stash +resp = site.upload( + file=open('photo.jpg', 'rb'), + filename='photo.jpg', + description='A photo', + stash=True +) +filekey = resp['filekey'] + +# Phase 2: Publish from stash +site.upload(filekey=filekey, filename='photo.jpg') +``` + +**Upload from URL:** + +```python +site.upload( + url='https://example.com/image.jpg', + filename='RemoteImage.jpg', + description='Downloaded from remote source' +) +``` + +## Error Handling + +| Error | When Raised | +| ------------------------ | --------------------------------- | +| `InsufficientPermission` | User lacks `upload` right | +| `FileExists` | File exists and `ignore=False` | +| `APIError` | General API failure during upload | + +**FileExists example:** + +```python +from mwclient import errors + +try: + site.upload(file=f, filename='Existing.jpg') +except errors.FileExists as e: + print(f'File {e.file_name} exists') + # Retry with ignore=True to overwrite + site.upload(file=f, filename='Existing.jpg', ignore=True) +``` + +## Common Patterns + +### Download and process an image + +```python +image = site.images['Example.jpg'] +if image.imageinfo: + print(f"Size: {image.imageinfo['width']}x{image.imageinfo['height']}") + + # Download + with open('local.jpg', 'wb') as f: + image.download(f) +``` + +### Check file usage across wiki + +```python +image = site.images['Template.png'] +for page in image.imageusage(): + print(f"Used on: {page.name}") +``` + +### Upload with progress check + +```python +import os + +file_path = 'large_file.zip' +file_size = os.path.getsize(file_path) + +with open(file_path, 'rb') as f: + # Files > chunk_size (1 MiB) will use chunked upload + result = site.upload( + file=f, + filename='LargeFile.zip', + description='Large file upload', + comment=f'Uploading {file_size} bytes' + ) +``` + +### Get file history + +```python +image = site.images['Document.pdf'] +for rev in image.imagehistory(): + print(f"{rev['timestamp']} - {rev['user']} - {rev.get('comment', '')}") +``` + +### Check for duplicates + +```python +image = site.images['Photo.jpg'] +duplicates = list(image.duplicatefiles()) +if duplicates: + print(f"Found {len(duplicates)} duplicate(s)") +``` + +## Namespace and Object Dispatch + +Access patterns and their resulting types: + +| Access Pattern | Returns | Notes | +| ------------------------- | ---------- | -------------------------------- | +| `site.images['Name']` | `Image` | Prepends `File:` prefix | +| `site.pages['File:Name']` | `Image` | Namespace detected from prefix | +| `site.pages['Name']` | `Page` | Unless name has namespace prefix | +| `site.categories['Name']` | `Category` | Prepends `Category:` prefix | + +When iterating over page lists with `generator=True`, the namespace field determines the class: + +- `ns=6` → `Image` +- `ns=14` → `Category` +- Other → `Page` diff --git a/.claude/skills/mwclient/listing/SKILL.md b/.claude/skills/mwclient/listing/SKILL.md new file mode 100644 index 0000000..d1f16e9 --- /dev/null +++ b/.claude/skills/mwclient/listing/SKILL.md @@ -0,0 +1,323 @@ +--- +name: listing +description: | + Work with mwclient's lazy pagination and listing infrastructure. + covers List, GeneratorList, PageList, PageProperty classes, continuation tokens, + api_chunk_size vs max_items, and namespace-dispatched iteration. + + Use when: iterating over large result sets, configuring pagination, or understanding + generator vs non-generator listing modes. +user-invocable: true +--- + +# Listing and Pagination Skill + +## Overview + +This skill covers the listing infrastructure in mwclient, which provides lazy, paginated iteration over MediaWiki API query results. All listing methods return iterator objects that fetch data in chunks as needed. + +**Key Files:** + +- `mwclient/listing.py` - List, GeneratorList, PageList, PageProperty classes +- `mwclient/util.py` - handle_limit utility + +## Class Hierarchy + +All iterator types descend from `List`: + +``` +List (base) +├── GeneratorList (yields Page/Image/Category objects) +├── PageList (supports __getitem__ access) +│ └── Category (also inherits from Page) +├── PageProperty (prop-based queries) +│ └── RevisionsIterator +└── NestedList (for nested responses) +``` + +## The List Base Class + +`List` is the engine behind all lazy iteration. It holds state for a single paginated query and drives chunk loading on demand. + +### Construction Parameters + +| Parameter | Type | Description | +| ---------------- | ---------------- | ---------------------------------------------- | +| `site` | `Site` | The site to query | +| `list_name` | `str` | MediaWiki list name (e.g., `'allpages'`) | +| `prefix` | `str` | Two-letter API parameter prefix (e.g., `'ap'`) | +| `api_chunk_size` | `int/None` | Items requested per API call | +| `max_items` | `int/None` | Total items to yield before stopping | +| `return_values` | `str/tuple/None` | Field(s) to extract from each dict | +| `limit` | `int/None` | **Deprecated.** Alias for `api_chunk_size` | + +### api_chunk_size vs max_items + +| Parameter | Scope | Effect | +| ---------------- | --------------- | ------------------------------------ | +| `api_chunk_size` | Per API request | Sets `{prefix}limit` in request | +| `max_items` | Total iteration | Stops yielding after this many items | + +Examples: + +- `max_items=2, api_chunk_size=1`: Two API calls, one item per call +- `max_items=2` (no chunk size): Single call returning both items +- `api_chunk_size=2` (no max): All items, two per call + +### return_values + +Controls what fields are yielded from each result dict: + +| `return_values` | Yield Type | Example | +| ----------------- | ---------- | ---------------------------------------- | +| `None` (default) | `dict` | `{'pageid': 1, 'ns': 0, 'title': 'Foo'}` | +| `'title'` | scalar | `'Foo'` | +| `('title', 'ns')` | `tuple` | `('Foo', 0)` | + +## GeneratorList + +`GeneratorList` extends `List` to yield typed page objects instead of raw dicts. + +### Namespace Dispatch + +When iterating, `GeneratorList.__next__()` inspects the `ns` field and instantiates the appropriate class: + +| Namespace ID | Returns | +| ------------ | ---------- | +| `14` | `Category` | +| `6` | `Image` | +| Other | `Page` | + +### Generator Mode Features + +- Renames limit parameter: `{prefix}limit` → `g{prefix}limit` (e.g., `aplimit` → `gaplimit`) +- Sets `generator='generator'` for API calls +- Adds `prop=info|imageinfo` and `inprop=protection` to fetch metadata + +## PageList + +`PageList` is the type behind `site.pages`, `site.images`, and `site.categories`. It supports both iteration and direct item access via `__getitem__`. + +### Namespace Guessing + +When accessing via `site.pages['Name']`, `PageList` guesses the namespace: + +```python +page = site.pages['Template:Infobox'] # Detects namespace 10 +page = site.pages['File:Image.jpg'] # Detects namespace 6, returns Image +page = site.pages['Category:Python'] # Detects namespace 14, returns Category +``` + +## PageProperty and RevisionsIterator + +`PageProperty` is used for per-page API property queries (e.g., `revisions`, `extlinks`). Unlike `List`, it is bound to a specific `Page` object. + +### PageProperty Features + +- Sets `self.generator = 'prop'` +- Searches `data['query']['pages']` to find the specific page by title +- Iterates only over that page's property array + +### RevisionsIterator + +Extends `PageProperty` with special handling for `rvstartid` vs `rvstart` conflict resolution. + +## Listing Methods on Site + +Site-level listing methods return `List` or `GeneratorList` instances: + +```python +# All pages +for page in site.allpages(namespace=0, filterredir='nonredirects'): + print(page.name) + +# Search results +for page in site.search('query', namespace=0): + print(page.name) + +# Recent changes +for change in site.recentchanges(namespace=0): + print(change['title']) + +# User contributions +for contrib in site.usercontributions('Username'): + print(contrib['title']) + +# Log events +for log in site.logevents(logtype='upload'): + print(log['title']) + +# All images/categories/users +for image in site.allimages(): + print(image.name) +for cat in site.allcategories(): + print(cat.name) +``` + +## Listing Methods on Page + +Page-level listing methods that return `GeneratorList` or `List`: + +```python +page = site.pages['Main Page'] + +# Backlinks (pages linking here) +for p in page.backlinks(): + print(p.name) + +# Categories this page belongs to +for cat in page.categories(): + print(cat.name) + +# Pages that embed/transclude this page +for p in page.embeddedin(): + print(p.name) + +# External links from this page +for url in page.extlinks(): + print(url) + +# Images embedded in this page +for img in page.images(): + print(img.name) + +# Interwiki links +for prefix, title in page.iwlinks(): + print(f'{prefix}:{title}') + +# Language links +for lang, title in page.langlinks(): + print(f'{lang}:{title}') + +# Internal links +for p in page.links(): + print(p.name) + +# Templates transcluded +for t in page.templates(): + print(t.name) + +# Revision history +for rev in page.revisions(): + print(rev['timestamp'], rev['user']) +``` + +## The generator Parameter + +Most listing methods accept a `generator` parameter: + +| Mode | Return Type | Yields | +| -------------------------- | --------------------------------------- | --------------------------------- | +| `generator=True` (default) | `GeneratorList`/`PagePropertyGenerator` | `Page`/`Image`/`Category` objects | +| `generator=False` | `List`/`PageProperty` | Raw strings, dicts, or tuples | + +Example: + +```python +# Generator mode - Page objects +for page in page.backlinks(): + print(page.name) + +# Non-generator mode - title strings +for title in page.backlinks(generator=False): + print(title) +``` + +## Continuation and Pagination + +### New-Style Continuation (MediaWiki ≥ 1.26) + +The API returns a top-level `continue` dict with opaque continuation values. `List.load_chunk()` merges these into `self.args`: + +```python +# API response: {'continue': {'apcontinue': 'Next_Page', 'continue': '-||'}} +# Next request automatically includes these parameters +``` + +### Miser Mode Handling + +Some wikis return a `continue` token but zero results. The `__next__` loop handles this by calling `load_chunk()` repeatedly until items are found or `self.last` is `True`. + +## Common Patterns + +### Iterate with limit + +```python +# Get first 10 pages only +for page in site.allpages(namespace=0, max_items=10): + print(page.name) + +# Fetch 5 at a time +for page in site.allpages(namespace=0, max_items=100, api_chunk_size=5): + print(page.name) +``` + +### Filter by namespace + +```python +# Only main namespace (0) +for page in site.allpages(namespace=0): + print(page.name) + +# Multiple namespaces as pipe-separated string +for page in site.recentchanges(namespace='0|1|2'): + print(page['title']) +``` + +### Get only titles + +```python +# Fast - only fetch titles +for title in site.allpages(namespace=0, return_values='title'): + print(title) + +# Multiple fields as tuple +for title, ns in site.allpages(namespace=0, return_values=('title', 'ns')): + print(title, ns) +``` + +### Check if listing has results + +```python +pages = list(site.search('unlikely_term_12345', max_items=1)) +if pages: + print("Found results") +else: + print("No results") +``` + +### Iterate backlinks with filter + +```python +# Only non-redirects in main namespace +for p in page.backlinks( + namespace=0, + filterredir='nonredirects' +): + print(p.name) +``` + +### Handle generator vs non-generator + +```python +# Works with both modes +result = page.links(generator=use_objects) +for item in result: + if use_objects: + print(item.name) # Page object + else: + print(item) # String +``` + +## Class-to-Role Summary + +| Class | `generator` | `result_member` | Primary Use | +| ----------------------- | ------------- | --------------- | ------------------------ | +| `List` | `'list'` | `list_name` | Site-level lists | +| `NestedList` | `'list'` | `list_name` | Nested responses | +| `GeneratorList` | `'generator'` | `'pages'` | Page-level generators | +| `Category` | `'generator'` | `'pages'` | Category members | +| `PageList` | `'generator'` | `'pages'` | Page access | +| `PageProperty` | `'prop'` | prop name | Page properties | +| `PagePropertyGenerator` | `'generator'` | `'pages'` | Page property generators | +| `RevisionsIterator` | `'prop'` | `'revisions'` | Revision history | diff --git a/.claude/skills/mwclient/page/SKILL.md b/.claude/skills/mwclient/page/SKILL.md new file mode 100644 index 0000000..57372ea --- /dev/null +++ b/.claude/skills/mwclient/page/SKILL.md @@ -0,0 +1,305 @@ +--- +name: page +description: | + Work with mwclient Page objects for reading, editing, and managing wiki pages. + Covers text retrieval, revision history, editing (edit/append/prepend), page management + (move/delete/purge), backlinks, categories, and redirect resolution. + + Use when: reading page content, making edits, querying page relationships, or handling + page metadata and permissions. +user-invocable: true +--- + +# Page Operations Skill + +## Overview + +This skill covers the `Page` class in mwclient, which represents a single wiki page. The `Page` class provides methods for reading content, making edits, and querying page relationships. + +**Key Files:** + +- `mwclient/page.py` - Main Page class implementation + +## Page Class + +The `Page` class is the primary object representing a single wiki page. It holds page metadata (fetched at instantiation) and provides methods for content operations. + +### Class Hierarchy + +| Class | Module | Namespace | +| ---------- | --------------------- | ------------------- | +| `Page` | `mwclient/page.py` | All others | +| `Image` | `mwclient/image.py` | File / Image (ns 6) | +| `Category` | `mwclient/listing.py` | Category (ns 14) | + +`Image` inherits from `Page`. `Category` inherits from both `Page` and `GeneratorList`. + +### Instantiating a Page + +**Direct construction:** + +```python +page = mwclient.page.Page(site, name, info=None, extra_properties=None) +``` + +| Parameter | Type | Description | +| ------------------ | ------------------ | -------------------------------------------------- | +| `site` | `Site` | The site the page belongs to | +| `name` | `int`/`str`/`Page` | Page title, page ID, or Page instance to copy | +| `info` | `dict`/`None` | Pre-fetched page info; if `None`, API call is made | +| `extra_properties` | `dict`/`None` | Additional API properties to fetch | + +**Via site.pages (typical):** + +```python +page = site.pages['Main Page'] +image = site.images['Example.jpg'] # namespace 6 +category = site.categories['Python'] # namespace 14 +``` + +## Page Properties + +After construction, these attributes are populated from the API: + +| Attribute | Type | Description | +| ------------------ | ------------------------- | ----------------------------------------------- | +| `name` | `str` | Full title including namespace prefix | +| `namespace` | `int` | Integer namespace ID; 0 = main | +| `exists` | `bool` | `False` if the page does not exist | +| `revision` | `int` | Latest revision ID; 0 if new | +| `pageid` | `int`/`None` | MediaWiki page ID; `None` if page doesn't exist | +| `protection` | `dict` | Maps action string → `(level, expiry)` tuple | +| `redirect` | `bool` | `True` if the page is a redirect | +| `length` | `int`/`None` | Page size in bytes | +| `touched` | `time.struct_time`/`None` | Timestamp of last cache invalidation | +| `contentmodel` | `str`/`None` | e.g. `'wikitext'`, `'json'` | +| `pagelanguage` | `str`/`None` | Language code e.g. `'en'` | +| `restrictiontypes` | `list`/`None` | Which actions can be protected | + +**Derived title properties:** + +- `page_title` - Title without namespace prefix +- `base_title` - Top-level title before first `/` (no namespace) +- `base_name` - Top-level title before first `/` (with namespace) + +## Reading Page Content + +### page.text() + +Primary method for retrieving a page's wikitext content. + +```python +text = page.text( + section: Union[int, str, None] = None, + expandtemplates: bool = False, + cache: bool = True, + slot: str = 'main' +) -> str +``` + +| Parameter | Type | Default | Description | +| ----------------- | ------------------ | -------- | ----------------------------------------- | +| `section` | `int`/`str`/`None` | `None` | Section number or `T-` identifier | +| `expandtemplates` | `bool` | `False` | If `True`, expands all templates | +| `cache` | `bool` | `True` | Use instance-level `_textcache` | +| `slot` | `str` | `'main'` | Content slot (MediaWiki ≥ 1.32) | + +**Behavior:** + +- Checks `can('read')` permission first; raises `InsufficientPermission` if lacking +- Returns empty string `''` if page doesn't exist +- Uses in-memory cache keyed by `(section, expandtemplates)` +- Updates `page.last_rev_time` with revision timestamp + +### page.revisions() + +Returns a `RevisionsIterator` for paginating through revision history. + +```python +revs = page.revisions( + startid=None, endid=None, + start=None, end=None, + dir='older', + user=None, excludeuser=None, + limit=None, # Deprecated + prop='ids|timestamp|flags|comment|user', + expandtemplates=False, + section=None, + diffto=None, + slots=None, + uselang=None, + max_items=None, + api_chunk_size=50 +) -> RevisionsIterator +``` + +Yields dicts containing fields specified by `prop`. Add `content` to `prop` to get wikitext. + +### Redirect Resolution + +```python +page.redirects_to() -> Optional[Page] # Returns target Page or None +page.resolve_redirect() -> Page # Returns target or self +``` + +Example: + +```python +page = site.pages['WP:AN'] # might be a redirect +canonical = page.resolve_redirect() # returns target or page itself +text = canonical.text() +``` + +## Editing Pages + +### Edit Methods + +| Method | Parameters | API Action | Required Permission | +| ----------- | ------------------------------------ | ------------------------- | ------------------- | +| `edit()` | `text, summary, minor, bot, section` | `edit` | `can('edit')` | +| `append()` | `text, summary, minor, bot, section` | `edit` | `can('edit')` | +| `prepend()` | `text, summary, minor, bot, section` | `edit` | `can('edit')` | +| `save()` | same as `edit()` | `edit` | `can('edit')` | +| `touch()` | _(none)_ | `edit` (via `append('')`) | `can('edit')` | + +**Common edit parameters:** + +| Parameter | Type | Default | Effect | +| --------- | --------------- | ------------ | -------------------------------- | +| `text` | `str` | _(required)_ | Page content or delta | +| `summary` | `str` | `''` | Edit summary | +| `minor` | `bool` | `False` | Mark as minor edit | +| `bot` | `bool` | `True` | Mark as bot edit | +| `section` | `Optional[str]` | `None` | Target section number or `'new'` | + +Example: + +```python +page.edit('New page content', summary='Updated content') +page.append('\nNew line', summary='Added line') +page.prepend('Header\n', summary='Added header') +``` + +### Managing Pages + +```python +page.move(new_title, reason='', move_talk=True, no_redirect=False, + move_subpages=False, ignore_warnings=False) +page.delete(reason='', watch=False, unwatch=False, oldimage=None) +page.purge() # Forces MediaWiki to re-render the page +``` + +## Page Listings + +Methods that return iterable objects for page relationships: + +| Method | Has `generator` param | Default Mode | Non-generator yields | +| -------------- | --------------------- | --------------------- | ----------------------- | +| `backlinks()` | Yes | `generator=True` | title string | +| `categories()` | Yes | `generator=True` | title string | +| `embeddedin()` | Yes | `generator=True` | title string | +| `extlinks()` | No | always `PageProperty` | URL string | +| `images()` | Yes | `generator=True` | title string | +| `iwlinks()` | No | always `PageProperty` | `(prefix, title)` tuple | +| `langlinks()` | No | always `PageProperty` | `(lang, title)` tuple | +| `links()` | Yes | `generator=True` | title string | +| `templates()` | Yes | `generator=True` | title string | + +**Generator mode** (`generator=True`): yields `Page`/`Image`/`Category` objects +**Non-generator mode** (`generator=False`): yields raw strings or tuples + +Example: + +```python +# Get backlinks as Page objects +for linking_page in page.backlinks(): + print(linking_page.name) + +# Get categories as title strings +for cat in page.categories(generator=False): + print(cat) + +# Get interwiki links +for prefix, title in page.iwlinks(): + print(f'{prefix}:{title}') +``` + +## Permission Checking + +```python +page.can(action) -> bool +``` + +Checks whether the current user has permission to perform an action on the page. + +```python +if page.can('edit'): + page.edit('New content') +if page.can('move'): + page.move('New Title') +if page.can('delete'): + page.delete() +``` + +## Error Handling + +Common errors when working with pages: + +| Error | When Raised | +| ------------------------ | ------------------------------------ | +| `InsufficientPermission` | User lacks permission for action | +| `ProtectedPageError` | Page is protected against action | +| `EditError` | Edit failed (edit conflict, etc.) | +| `AssertUserFailedError` | Not logged in and `force_login=True` | +| `UserBlocked` | User account is blocked | +| `InvalidPageTitle` | Title contains illegal characters | + +## Token Acquisition + +```python +page.get_token(type, force=False) +``` + +Delegates to `site.get_token()`. Used internally for edit, move, and delete operations. + +## Common Patterns + +### Read, Modify, Save + +```python +page = site.pages['My Page'] +text = page.text() +new_text = text.replace('old', 'new') +page.edit(new_text, summary='Replaced old with new') +``` + +### Check if page exists before editing + +```python +page = site.pages['New Page'] +if not page.exists: + page.edit('Initial content', summary='Created page') +``` + +### Handle redirects + +```python +page = site.pages['Shortcut'] +if page.redirect: + page = page.resolve_redirect() +text = page.text() +``` + +### Iterate backlinks with filter + +```python +for p in page.backlinks(namespace=0, filterredir='nonredirects'): + print(p.name) +``` + +### Get all revisions by a user + +```python +for rev in page.revisions(user='SomeUser', dir='older'): + print(rev['timestamp'], rev['comment']) +``` diff --git a/.claude/skills/mwclient/site/SKILL.md b/.claude/skills/mwclient/site/SKILL.md new file mode 100644 index 0000000..c42cdc4 --- /dev/null +++ b/.claude/skills/mwclient/site/SKILL.md @@ -0,0 +1,371 @@ +--- +name: site +description: | + Work with mwclient Site objects for connecting to and interacting with MediaWiki instances. + Covers connection setup, authentication (OAuth, HTTP Basic, clientlogin, legacy login), + site-level listings, token management, and core API methods. + + Use when: connecting to a wiki, authenticating, querying site metadata, or performing + site-level operations like search and recent changes. +user-invocable: true +--- + +# Site Operations Skill + +## Overview + +This skill covers the `Site` class in mwclient, the central class for interacting with MediaWiki instances. Every operation — reading pages, making edits, uploading files, or querying lists — begins with constructing a `Site` instance. + +**Key Files:** + +- `mwclient/client.py` - Site class implementation +- `mwclient/__init__.py` - Re-exports `Site` + +## The Site Class + +`Site` manages HTTP communication, bootstraps site metadata, exposes page accessors, and provides the core API methods. + +```python +import mwclient +site = mwclient.Site('en.wikipedia.org') +``` + +## Constructor + +```python +Site( + host, # Required: hostname without scheme + path='/w/', # Script path (must end with /) + ext='.php', # File extension for API scripts + pool=None, # Pre-existing requests.Session + retry_timeout=30, # Seconds to sleep per past retry + max_retries=25, # Max retry attempts + wait_callback=None, # Called on each retry + clients_useragent=None, # Prepended to user agent + max_lag=3, # maxlag for index.php calls + compress=True, # Request gzip compression + force_login=True, # Require auth before editing + do_init=True, # Run site_init() during construction + httpauth=None, # HTTP Basic auth tuple or AuthBase + connection_options=None, # Extra kwargs for requests + consumer_token=None, # OAuth1 consumer key + consumer_secret=None, # OAuth1 consumer secret + access_token=None, # OAuth1 access token + access_secret=None, # OAuth1 access secret + client_certificate=None, # Path to PEM or (cert, key) tuple + custom_headers=None, # Additional headers + scheme='https', # 'http' or 'https' + reqs=None # Deprecated: use connection_options +) +``` + +## Key Instance Attributes + +After `site_init()` completes: + +| Attribute | Type | Description | +| ------------- | ---------------- | ------------------------------------------- | +| `host` | `str` | Hostname as passed to constructor | +| `path` | `str` | Script path (e.g., `/w/`) | +| `scheme` | `str` | `'https'` or `'http'` | +| `version` | `tuple` | MediaWiki version, e.g., `(1, 39, 5)` | +| `namespaces` | `dict[int, str]` | Namespace ID → name mapping | +| `site` | `dict` | Raw `query.general` siteinfo | +| `username` | `str` | Currently authenticated user | +| `groups` | `list[str]` | User's groups | +| `rights` | `list[str]` | User's rights | +| `blocked` | `bool/tuple` | `False` or `(blocked_by, reason)` | +| `hasmsg` | `bool` | Unread talk page messages? | +| `logged_in` | `bool` | `True` if not anonymous | +| `tokens` | `dict[str, str]` | Cached API tokens | +| `initialized` | `bool` | `True` after first successful `site_init()` | +| `force_login` | `bool` | Controls unauthenticated edit gating | +| `pages` | `PageList` | Entry point for all pages | +| `images` | `PageList` | Entry point for namespace 6 (File) | +| `categories` | `PageList` | Entry point for namespace 14 (Category) | +| `connection` | `Session` | Underlying HTTP session | +| `chunk_size` | `int` | Upload chunk size (default 1 MiB) | + +## Authentication + +### OAuth 1 + +```python +site = mwclient.Site( + 'en.wikipedia.org', + consumer_token='my_consumer_token', + consumer_secret='my_consumer_secret', + access_token='my_access_token', + access_secret='my_access_secret' +) +``` + +### HTTP Basic Auth + +```python +site = mwclient.Site('mywiki.example.org', httpauth=('user', 'pass')) +``` + +### clientlogin (Recommended for interactive) + +```python +site = mwclient.Site('mywiki.example.org') +site.clientlogin(username='myuser', password='secret') +``` + +### Legacy login (for bot passwords) + +```python +site = mwclient.Site('mywiki.example.org') +site.login(username='myuser', password='botpassword') +``` + +### SSL Client Certificate + +```python +# Single combined PEM +site = mwclient.Site('mywiki.example.org', client_certificate='/path/to/client.pem') + +# Separate cert and key +site = mwclient.Site('mywiki.example.org', client_certificate=('client.pem', 'key.pem')) +``` + +## Core API Methods + +### site.get(action, \*\*kwargs) + +Shorthand for `api(action, 'GET', ...)`. Use for idempotent read operations. + +```python +result = site.get('query', meta='siteinfo') +``` + +### site.post(action, \*\*kwargs) + +Shorthand for `api(action, 'POST', ...)`. Use for write operations. + +```python +result = site.post('edit', title='Page', text='Content', token=token) +``` + +### site.api(action, http_method='POST', \*\*kwargs) + +The primary API call method. + +```python +result = site.api('query', http_method='GET', meta='userinfo') +``` + +## Site-Level Listing Methods + +### allpages() + +Iterate over all pages on the wiki. + +```python +for page in site.allpages(namespace=0, filterredir='nonredirects'): + print(page.name) +``` + +### search() + +Search the wiki. + +```python +for page in site.search('python', namespace=0): + print(page.name) +``` + +### recentchanges() + +Get recent changes. + +```python +for change in site.recentchanges(namespace=0, limit=50): + print(change['title'], change['timestamp']) +``` + +### usercontributions() + +Get contributions by a user. + +```python +for contrib in site.usercontributions('Username'): + print(contrib['title']) +``` + +### logevents() + +Get log events. + +```python +for log in site.logevents(logtype='upload'): + print(log['title']) +``` + +### allimages(), allcategories(), allusers() + +Similar patterns for other list types. + +## Token Management + +### get_token(type, force=False) + +Retrieves and caches API tokens. + +```python +csrf_token = site.get_token('csrf') # For edits +login_token = site.get_token('login') # For login +email_token = site.get_token('email') # For email +``` + +Tokens are cached in `site.tokens` and cleared on `site_init()`. + +## Version Checking + +### require(major, minor, revision=None) + +Check if connected MediaWiki meets minimum version. + +```python +if site.require(1, 35, raise_error=False): + # Use features requiring MW 1.35+ + pass +``` + +## Page Access + +### site.pages + +Access any page by title: + +```python +page = site.pages['Main Page'] +page = site.pages['Template:Infobox'] +page = site.pages['Category:Python'] +``` + +### site.images + +Access files (namespace 6), prepends `File:` automatically: + +```python +image = site.images['Example.jpg'] # Equivalent to site.pages['File:Example.jpg'] +``` + +### site.categories + +Access categories (namespace 14), prepends `Category:` automatically: + +```python +cat = site.categories['Python'] # Equivalent to site.pages['Category:Python'] +``` + +## Utility Methods + +### expandtemplates(text, title=None) + +Expands templates in wikitext. + +```python +expanded = site.expandtemplates('{{CURRENTYEAR}}') +``` + +### parse(title=None, text=None, page=None) + +Parse wikitext and return HTML. + +```python +result = site.parse(text='== Heading ==\nParagraph') +html = result['text']['*'] +``` + +### email(user, subject, text) + +Send email to a user. + +```python +site.email('TargetUser', subject='Hello', text='Message body') +``` + +## Error Handling + +| Error | When Raised | +| ------------------------- | -------------------------------------- | +| `MediaWikiVersionError` | Version parsing fails or below minimum | +| `APIDisabledError` | API is disabled on target site | +| `InvalidResponse` | Server returns non-JSON | +| `MaximumRetriesExceeded` | Retry limit exhausted | +| `APIError` | General API error | +| `LoginError` | Login attempt fails | +| `OAuthAuthorizationError` | OAuth authentication fails | +| `InsufficientPermission` | Operation requires missing permission | + +## Common Patterns + +### Connect and authenticate + +```python +import mwclient + +site = mwclient.Site('en.wikipedia.org') +site.login('Username', 'password') +print(f"Logged in as {site.username}") +print(f"Rights: {site.rights}") +``` + +### Check if user can perform action + +```python +if 'upload' in site.rights: + # Can upload files + pass + +if 'delete' in site.rights: + # Can delete pages + pass +``` + +### Iterate all pages in namespace + +```python +for page in site.allpages(namespace=10, prefix='Infobox'): # Templates + print(page.name) +``` + +### Search with limit + +```python +for page in site.search('python', namespace=0, max_items=10): + print(page.name) +``` + +### Handle blocked user + +```python +if site.blocked: + blocked_by, reason = site.blocked + print(f"Blocked by {blocked_by}: {reason}") +``` + +### Raw API call for custom actions + +```python +result = site.get('query', list='allusers', auprop='editcount') +for user in result['query']['allusers']: + print(user['name'], user['editcount']) +``` + +## Connection Options + +Pass custom options to all requests: + +```python +site = mwclient.Site( + 'mywiki.example.org', + connection_options={ + 'timeout': 30, + 'verify': '/path/to/ca-bundle.crt' + } +) +``` diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 39b23a0..270d22a 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -1,29 +1,29 @@ name: Run Tests on: - pull_request: - branches: - - main + pull_request: + branches: + - main jobs: - test: - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 + test: + runs-on: ubuntu-latest - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' + steps: + - name: Checkout code + uses: actions/checkout@v4 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pytest - if [ -f requirements.in ]; then pip install -r requirements.in; fi + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" - - name: Run tests - run: | - python -m pytest tests -v --tb=short + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements-dev.txt + pip install -r requirements.in + + - name: Run tests + run: | + python -m pytest tests -v --tb=short diff --git a/merge.md b/merge.md index eea68ea..3686e1d 100644 --- a/merge.md +++ b/merge.md @@ -46,7 +46,6 @@ This document provides a comprehensive plan to migrate all functionality from th | `bot.py` | ~150 | `LOGIN_HELPS` | ⏳ Pending | | `params_help.py` | ~80 | `PARAMS_HELPS` | ⏳ Pending | | `cookies_bot.py` | ~100 | Cookie handling | ⏳ Pending | -| `login_wrap.py` | ~100 | `LoginWrapState` | ⏳ Pending | ### New Architecture Files (Already Created) @@ -165,7 +164,7 @@ mw_api/ def get_text(self, redirects=False): from ..repositories import PageRepository from ..core.page import Page - + page = Page(title=self.title, lang=self.lang, family=self.family) repo = PageRepository(self.login_bot) self.text = repo.get_text(page) @@ -261,7 +260,7 @@ mw_api/ **Current State**: ✅ Complete -The exception hierarchy has been migrated to `core/exceptions.py`. +The exception hierarchy has been migrated to `core/exceptions.py`. **Remaining Task**: - [ ] Update `HANDEL_ERRORS.handel_err()` to use `parse_api_error()` from `core/exceptions.py` diff --git a/mw_api/DB_bots/__init__.py b/mw_api/DB_bots/__init__.py index 40a96af..e69de29 100644 --- a/mw_api/DB_bots/__init__.py +++ b/mw_api/DB_bots/__init__.py @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- diff --git a/mw_api/DB_bots/db_bot.py b/mw_api/DB_bots/db_bot.py index 2ba14a1..7c5ae62 100644 --- a/mw_api/DB_bots/db_bot.py +++ b/mw_api/DB_bots/db_bot.py @@ -27,13 +27,9 @@ def __init__(self, db_path: str) -> None: # self.db = sqlite_utils.Database(db_path, tracer=tracer) self.db = sqlite_utils.Database(db_path) - def create_table( - self, table_name: str, fields: Dict[str, Any], pk: str = "id", **kwargs - ) -> None: + def create_table(self, table_name: str, fields: Dict[str, Any], pk: str = "id", **kwargs) -> None: # Create table if it doesn't exist - self.db[table_name].create( - fields, pk=pk, if_not_exists=True, ignore=True, **kwargs - ) + self.db[table_name].create(fields, pk=pk, if_not_exists=True, ignore=True, **kwargs) def query(self, sql: str) -> List[tuple]: # return self.db.query(sql) @@ -58,9 +54,7 @@ def insert(self, table_name: str, data: Dict[str, Any], check: bool = True) -> N self.db[table_name].insert(data, ignore=True, pk="id") del data - def insert_all( - self, table_name: str, datalist: List[Dict[str, Any]], prnt: bool = True - ) -> None: + def insert_all(self, table_name: str, datalist: List[Dict[str, Any]], prnt: bool = True) -> None: if prnt: print(f"inserting {len(datalist)} rows") self.db[table_name].insert_all(datalist, ignore=True, pk="id") diff --git a/mw_api/DB_bots/pymysql_bot.py b/mw_api/DB_bots/pymysql_bot.py index 5f5e189..1249658 100644 --- a/mw_api/DB_bots/pymysql_bot.py +++ b/mw_api/DB_bots/pymysql_bot.py @@ -1,28 +1,21 @@ """ -from mw_api import pymysql_bot # result = pymysql_bot.sql_connect_pymysql(query, return_dict=False, values=None, main_args={}, credentials={}, conversions=None) """ import copy +import logging import pymysql import pymysql.cursors +logger = logging.getLogger(__name__) + def sql_connect_pymysql( - query, - return_dict=False, - values=None, - main_args={}, - credentials={}, - conversions=None, - many=False, - **kwargs + query, return_dict=False, values=None, main_args={}, credentials={}, conversions=None, many=False, **kwargs ): args = copy.deepcopy(main_args) - args["cursorclass"] = ( - pymysql.cursors.DictCursor if return_dict else pymysql.cursors.Cursor - ) + args["cursorclass"] = pymysql.cursors.DictCursor if return_dict else pymysql.cursors.Cursor if conversions: args["conv"] = conversions @@ -31,6 +24,7 @@ def sql_connect_pymysql( try: connection = pymysql.connect(**args, **credentials) except Exception as e: + logger.exception(e) return [] with connection as conn, conn.cursor() as cursor: @@ -42,11 +36,14 @@ def sql_connect_pymysql( cursor.execute(query, params) except Exception as e: + logger.exception(e) return [] try: results = cursor.fetchall() except Exception as e: + logger.exception(e) + logger.exception("Exception during fetchall", exc_info=True) return [] return results diff --git a/mw_api/__init__.py b/mw_api/__init__.py index 75fa0d6..a4b365c 100644 --- a/mw_api/__init__.py +++ b/mw_api/__init__.py @@ -1,12 +1,11 @@ -# -*- coding: utf-8 -*- """ """ + from pathlib import Path from .all_apis import ALL_APIS -from .api_utils import botEdit, printe, txtlib, wd_sparql +from .api_utils import botEdit, txtlib, wd_sparql from .DB_bots import db_bot, pymysql_bot from .logging_config import setup_logging -from .super.login_wrap import LoginWrap setup_logging(Path(__name__).parent.name) @@ -17,6 +16,4 @@ "pymysql_bot", "db_bot", "botEdit", - "printe", - "LoginWrap", ] diff --git a/mw_api/accounts/__init__.py b/mw_api/accounts/__init__.py index d437c80..6398230 100644 --- a/mw_api/accounts/__init__.py +++ b/mw_api/accounts/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ TODO: should be removed, user can new version like: from mw_api import ALL_APIS diff --git a/mw_api/all_apis.py b/mw_api/all_apis.py index 3260036..e7e000c 100644 --- a/mw_api/all_apis.py +++ b/mw_api/all_apis.py @@ -1,6 +1,8 @@ """ """ -from .pages_bots.all_apis import ALL_APIS +from .pages_bots.all_apis import ( + ALL_APIS, +) __all__ = [ "ALL_APIS", diff --git a/mw_api/api/__init__.py b/mw_api/api/__init__.py index 810ca32..6fda529 100644 --- a/mw_api/api/__init__.py +++ b/mw_api/api/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ API package for mw_api. diff --git a/mw_api/api/client.py b/mw_api/api/client.py index 4cfa3b6..cbc5495 100644 --- a/mw_api/api/client.py +++ b/mw_api/api/client.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ MediaWiki API Client. @@ -6,6 +5,7 @@ abstracting HTTP requests and response parsing. """ +import functools import logging from typing import Any, Dict, List, Optional @@ -17,6 +17,15 @@ logger = logging.getLogger(__name__) +@functools.lru_cache(maxsize=1024) +def get_session(lang, family) -> requests.session: + """ + function args used to load cached sessions + """ + session = requests.session() + return session + + class MediaWikiApiClient: """ Low-level MediaWiki API client. @@ -33,7 +42,7 @@ class MediaWikiApiClient: def __init__( self, endpoint: str, - session: Optional[requests.Session] = None, + session: Optional[requests.session] = None, user_agent: str = "mw_api Python client", ) -> None: """ @@ -41,18 +50,16 @@ def __init__( Args: endpoint: The API endpoint URL (e.g., 'https://en.wikipedia.org/w/api.php'). - session: Optional requests.Session for connection reuse. + session: Optional requests.session for connection reuse. user_agent: The User-Agent string for requests. """ self.endpoint = endpoint - self.session = session or requests.Session() + self.session = session or get_session(self.endpoint, self.endpoint) self.user_agent = user_agent self._csrf_token: str = "" self._headers = {"User-Agent": self.user_agent} - def post( - self, params: Dict[str, Any], config: Optional[RequestConfig] = None - ) -> Dict[str, Any]: + def post(self, params: Dict[str, Any], config: Optional[RequestConfig] = None) -> Dict[str, Any]: """ Make a POST request to the API. @@ -91,9 +98,7 @@ def post( logger.warning(str(e)) return {} - def get( - self, params: Dict[str, Any], config: Optional[RequestConfig] = None - ) -> Dict[str, Any]: + def get(self, params: Dict[str, Any], config: Optional[RequestConfig] = None) -> Dict[str, Any]: """ Make a GET request to the API. @@ -122,9 +127,7 @@ def get( logger.warning(str(e)) return {} - def request( - self, params: Dict[str, Any], config: Optional[RequestConfig] = None - ) -> Dict[str, Any]: + def request(self, params: Dict[str, Any], config: Optional[RequestConfig] = None) -> Dict[str, Any]: """ Make a request using the configured method. @@ -141,9 +144,7 @@ def request( return self.get(params, config) return self.post(params, config) - def query( - self, params: Dict[str, Any], continue_key: str = "continue" - ) -> List[Dict[str, Any]]: + def query(self, params: Dict[str, Any], continue_key: str = "continue") -> List[Dict[str, Any]]: """ Make a query with automatic continuation. diff --git a/mw_api/api/token_manager.py b/mw_api/api/token_manager.py index b58961c..e984f5b 100644 --- a/mw_api/api/token_manager.py +++ b/mw_api/api/token_manager.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Token Manager for CSRF and login tokens. @@ -28,7 +27,7 @@ class TokenManager: def __init__( self, endpoint: str, - session: requests.Session, + session: requests.session, headers: Optional[Dict[str, str]] = None, ) -> None: """ @@ -86,9 +85,7 @@ def _fetch_token(self, token_type: str) -> str: } try: - response = self._session.post( - self._endpoint, data=params, headers=self._headers - ) + response = self._session.post(self._endpoint, data=params, headers=self._headers) data = response.json() token_key = f"{token_type}token" return data.get("query", {}).get("tokens", {}).get(token_key, "") diff --git a/mw_api/api_utils/__init__.py b/mw_api/api_utils/__init__.py index 40a96af..e69de29 100644 --- a/mw_api/api_utils/__init__.py +++ b/mw_api/api_utils/__init__.py @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- diff --git a/mw_api/api_utils/ask_bot.py b/mw_api/api_utils/ask_bot.py index 3cd4c8e..c89fc08 100644 --- a/mw_api/api_utils/ask_bot.py +++ b/mw_api/api_utils/ask_bot.py @@ -6,6 +6,8 @@ import difflib import logging +import pywikibot + from ..core.config import get_default_config logger = logging.getLogger(__name__) @@ -14,11 +16,8 @@ Save_or_Ask = {} -def showDiff(text, newtext): - logger.info("Showing diff between current and new text...") - diff = difflib.unified_diff(text.splitlines(), newtext.splitlines(), lineterm="") - for line in diff: - logger.info(line) +def showDiff(text, newtext) -> None: + pywikibot.showDiff(text, newtext) class ASK_BOT: @@ -59,9 +58,7 @@ def ask_put( logger.info("showDiff error..") # --- logger.info(f"diference in bytes: {len(newtext) - len(text):,}") - logger.info( - f"len of text: {len(text):,}, len of newtext: {len(newtext):,}" - ) + logger.info(f"len of text: {len(text):,}, len of newtext: {len(newtext):,}") # --- if summary: logger.info(f"-Edit summary: {summary}") diff --git a/mw_api/api_utils/botEdit.py b/mw_api/api_utils/botEdit.py index 89b042a..c602731 100644 --- a/mw_api/api_utils/botEdit.py +++ b/mw_api/api_utils/botEdit.py @@ -1,8 +1,10 @@ """ TODO: use EditPermissionChecker instead of this function """ + import logging from typing import Optional + from ..core.config import BotConfig from .bot_edit.bot_edit_by_templates import is_bot_edit_allowed from .bot_edit.bot_edit_by_time import check_create_time, check_last_edit_time @@ -35,11 +37,7 @@ def bot_May_Edit( Returns: True if the bot is allowed to edit the page; False otherwise. """ - check_it = is_bot_edit_allowed( - text=text, - title_page=title_page, - botjob=botjob, config=config - ) + check_it = is_bot_edit_allowed(text=text, title_page=title_page, botjob=botjob, config=config) # --- if page and check_it: # --- diff --git a/mw_api/api_utils/bot_edit/__init__.py b/mw_api/api_utils/bot_edit/__init__.py index b3156d1..d9bd0e5 100644 --- a/mw_api/api_utils/bot_edit/__init__.py +++ b/mw_api/api_utils/bot_edit/__init__.py @@ -1,8 +1,9 @@ -""" -""" +""" """ + import datetime import logging from typing import Optional + from ...core.config import BotConfig, get_default_config from .. import txtlib @@ -33,9 +34,7 @@ class EditPermissionChecker: bot_username: The bot's username for template checks. """ - def __init__( - self, config: Optional[BotConfig] = None, bot_username: str = "Mr.Ibrahembot" - ) -> None: + def __init__(self, config: Optional[BotConfig] = None, bot_username: str = "Mr.Ibrahembot") -> None: """ Initialize the EditPermissionChecker. @@ -110,9 +109,7 @@ def _check_templates(self, text: str, title_page: str, botjob: str) -> bool: restrictions = stop_edit_temps.get(botjob, []) if namestrip in restrictions or namestrip in all_stop: - logger.info( - f"<> botEdit.py: the page has temp:({namestrip}), botjob:{botjob} skipp." - ) + logger.info(f"<> botEdit.py: the page has temp:({namestrip}), botjob:{botjob} skipp.") self._bot_cache[botjob][title_page] = False return False @@ -132,18 +129,14 @@ def _check_templates(self, text: str, title_page: str, botjob: str) -> bool: def _handle_nobots(self, params: dict, title_page: str, botjob: str) -> bool: """Handle nobots template.""" if not params: - logger.info( - f"<> botEdit.py: the page has temp:(nobots), botjob:{botjob} skipp." - ) + logger.info(f"<> botEdit.py: the page has temp:(nobots), botjob:{botjob} skipp.") return False param1 = params.get("1", "") if param1: exclude_list = [x.strip() for x in param1.split(",")] if "all" in exclude_list or self._bot_username in exclude_list: - logger.info( - f"<> botEdit.py: the page has temp:(nobots), botjob:{botjob} skipp." - ) + logger.info(f"<> botEdit.py: the page has temp:(nobots), botjob:{botjob} skipp.") return False return True @@ -158,9 +151,7 @@ def _handle_bots(self, params: dict, title_page: str, botjob: str) -> bool: allow_list = [x.strip() for x in allow.split(",")] result = "all" in allow_list or self._bot_username in allow_list if not result: - logger.info( - f"<>botEdit.py Template:(bots) has |allow={','.join(allow_list)}." - ) + logger.info(f"<>botEdit.py Template:(bots) has |allow={','.join(allow_list)}.") return result deny = params.get("deny") @@ -168,9 +159,7 @@ def _handle_bots(self, params: dict, title_page: str, botjob: str) -> bool: deny_list = [x.strip() for x in deny.split(",")] result = "all" not in deny_list and self._bot_username not in deny_list if not result: - logger.info( - f"<>botEdit.py Template:(bots) has |deny={','.join(deny_list)}." - ) + logger.info(f"<>botEdit.py Template:(bots) has |deny={','.join(deny_list)}.") return result return True @@ -192,18 +181,16 @@ def _check_create_time(self, page, title_page: str) -> bool: if create_data.get("timestamp"): create_time = create_data["timestamp"] - ts_c_time = datetime.datetime.strptime( - create_time, "%Y-%m-%dT%H:%M:%SZ" - ).replace(tzinfo=datetime.timezone.utc) + ts_c_time = datetime.datetime.strptime(create_time, "%Y-%m-%dT%H:%M:%SZ").replace( + tzinfo=datetime.timezone.utc + ) diff = (now - ts_c_time).total_seconds() / (60 * 60) if diff < delay_hours: user = create_data.get("user", "") wait_time = delay_hours - diff logger.info(f"<>Page:{title_page} create at ({create_time}).") - logger.info( - f"<>Page Created before {diff:.2f} hours by: {user}, wait {wait_time:.2f}H." - ) + logger.info(f"<>Page Created before {diff:.2f} hours by: {user}, wait {wait_time:.2f}H.") self._created_cache[title_page] = False return False @@ -220,9 +207,7 @@ def _check_last_edit_time(self, page, title_page: str, delay: int) -> bool: now = datetime.datetime.now(datetime.timezone.utc) if timestamp: - ts_time = datetime.datetime.strptime( - timestamp, "%Y-%m-%dT%H:%M:%SZ" - ).replace(tzinfo=datetime.timezone.utc) + ts_time = datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=datetime.timezone.utc) diff_minutes = (now - ts_time).total_seconds() / 60 if diff_minutes < delay: diff --git a/mw_api/api_utils/bot_edit/bot_edit_by_templates.py b/mw_api/api_utils/bot_edit/bot_edit_by_templates.py index 9e4c158..0cc54d9 100644 --- a/mw_api/api_utils/bot_edit/bot_edit_by_templates.py +++ b/mw_api/api_utils/bot_edit/bot_edit_by_templates.py @@ -1,8 +1,10 @@ -""" -""" +""" """ + import logging -import wikitextparser as wtp from typing import Optional + +import wikitextparser as wtp + from ...core.config import BotConfig, get_default_config logger = logging.getLogger(__name__) @@ -132,7 +134,7 @@ def is_bot_edit_allowed( title = str(template.normal_name()).strip() # --- params = { - str(param.name).strip() : str(param.value).strip() + str(param.name).strip(): str(param.value).strip() for param in template.arguments if str(param.value).strip() } @@ -146,7 +148,7 @@ def is_bot_edit_allowed( Bot_Cache[botjob][title_page] = False return False # --- - logger.debug('<>botEdit.py title:(%s), params:(%s).' % (title, str(params))) + # logger.debug("<>botEdit.py title:(%s), params:(%s)." % (title, str(params))) # --- if title.lower() == "nobots": return _handle_nobots_template(params, title_page, botjob, _template) diff --git a/mw_api/api_utils/bot_edit/bot_edit_by_time.py b/mw_api/api_utils/bot_edit/bot_edit_by_time.py index 04f0a77..d8a613e 100644 --- a/mw_api/api_utils/bot_edit/bot_edit_by_time.py +++ b/mw_api/api_utils/bot_edit/bot_edit_by_time.py @@ -1,7 +1,8 @@ -""" -""" -import logging +""" """ + import datetime +import logging + Created_Cache = {} logger = logging.getLogger(__name__) @@ -82,7 +83,9 @@ def check_last_edit_time(page, title_page, delay): # --- if diff_minutes < delay: logger.info(f"<>Page:{title_page} last edit ({timestamp}).") - logger.info(f"<>Page Last edit before {delay} minutes, Wait {wait_time:.2f} minutes. title:{title_page}") + logger.info( + f"<>Page Last edit before {delay} minutes, Wait {wait_time:.2f} minutes. title:{title_page}" + ) return False # --- return True diff --git a/mw_api/api_utils/printe.py b/mw_api/api_utils/printe.py index 96c0ef4..710ef8f 100644 --- a/mw_api/api_utils/printe.py +++ b/mw_api/api_utils/printe.py @@ -1,757 +1,47 @@ """ """ -import difflib -import functools import logging -import re - -# --- import sys -from collections import abc -from collections.abc import Iterable, Sequence -from difflib import _format_range_unified as format_range_unified -from itertools import zip_longest - -from ..core.config import get_default_config - - -def _configure_logging() -> None: - """Configure logging based on BotConfig settings.""" - config = get_default_config() - if config.debug_logging: - logging.basicConfig(level=logging.DEBUG) - elif config.warning_logging: - logging.basicConfig(level=logging.WARNING) - - -_configure_logging() - -log = logging.getLogger(__name__) -_category_cf = frozenset( - [ - "\xad", - "\u0600", - "\u0601", - "\u0602", - "\u0603", - "\u0604", - "\u0605", - "\u061c", - "\u06dd", - "\u070f", - "\u0890", - "\u0891", - "\u08e2", - "\u180e", - "\u200b", - "\u200c", - "\u200d", - "\u200e", - "\u200f", - "\u202a", - "\u202b", - "\u202c", - "\u202d", - "\u202e", - "\u2060", - "\u2061", - "\u2062", - "\u2063", - "\u2064", - "\u2066", - "\u2067", - "\u2068", - "\u2069", - "\u206a", - "\u206b", - "\u206c", - "\u206d", - "\u206e", - "\u206f", - "\ufeff", - "\ufff9", - "\ufffa", - "\ufffb", - "\U000110bd", - "\U000110cd", - "\U00013430", - "\U00013431", - "\U00013432", - "\U00013433", - "\U00013434", - "\U00013435", - "\U00013436", - "\U00013437", - "\U00013438", - "\U0001bca0", - "\U0001bca1", - "\U0001bca2", - "\U0001bca3", - "\U0001d173", - "\U0001d174", - "\U0001d175", - "\U0001d176", - "\U0001d177", - "\U0001d178", - "\U0001d179", - "\U0001d17a", - "\U000e0001", - "\U000e0020", - "\U000e0021", - "\U000e0022", - "\U000e0023", - "\U000e0024", - "\U000e0025", - "\U000e0026", - "\U000e0027", - "\U000e0028", - "\U000e0029", - "\U000e002a", - "\U000e002b", - "\U000e002c", - "\U000e002d", - "\U000e002e", - "\U000e002f", - "\U000e0030", - "\U000e0031", - "\U000e0032", - "\U000e0033", - "\U000e0034", - "\U000e0035", - "\U000e0036", - "\U000e0037", - "\U000e0038", - "\U000e0039", - "\U000e003a", - "\U000e003b", - "\U000e003c", - "\U000e003d", - "\U000e003e", - "\U000e003f", - "\U000e0040", - "\U000e0041", - "\U000e0042", - "\U000e0043", - "\U000e0044", - "\U000e0045", - "\U000e0046", - "\U000e0047", - "\U000e0048", - "\U000e0049", - "\U000e004a", - "\U000e004b", - "\U000e004c", - "\U000e004d", - "\U000e004e", - "\U000e004f", - "\U000e0050", - "\U000e0051", - "\U000e0052", - "\U000e0053", - "\U000e0054", - "\U000e0055", - "\U000e0056", - "\U000e0057", - "\U000e0058", - "\U000e0059", - "\U000e005a", - "\U000e005b", - "\U000e005c", - "\U000e005d", - "\U000e005e", - "\U000e005f", - "\U000e0060", - "\U000e0061", - "\U000e0062", - "\U000e0063", - "\U000e0064", - "\U000e0065", - "\U000e0066", - "\U000e0067", - "\U000e0068", - "\U000e0069", - "\U000e006a", - "\U000e006b", - "\U000e006c", - "\U000e006d", - "\U000e006e", - "\U000e006f", - "\U000e0070", - "\U000e0071", - "\U000e0072", - "\U000e0073", - "\U000e0074", - "\U000e0075", - "\U000e0076", - "\U000e0077", - "\U000e0078", - "\U000e0079", - "\U000e007a", - "\U000e007b", - "\U000e007c", - "\U000e007d", - "\U000e007e", - "\U000e007f", - ] -) - -_invisible_chars = _category_cf -INVISIBLE_REGEX = re.compile(f"[{''.join(_invisible_chars)}]") - - -@functools.lru_cache(maxsize=1) -def get_color_table(): - # new Define the color codes for different colors - color_numbers = { - # 'lightred': 101, - # 'lightgreen': 102, - # 'lightpurple': 105, - # 'lightyellow': 103, - # 'lightblue': 104, - # 'lightcyan': 106, - # 'aqua': 106, - # 'lightaqua': 107, - # 'lightwhite': 107, - # 'lightgray': 107, - "red": 91, - "green": 92, - "yellow": 93, - "blue": 94, - "purple": 95, - "cyan": 96, - "white": 97, - "black": 98, - "grey": 99, - "gray": 100, - "underline": 4, - "invert": 7, - "blink": 5, - "lightblack": 108, - "bold": 1, - } - color_table = {x: f"\033[{v}m%s\033[00m" for x, v in color_numbers.items()} - - # Add light versions of the colors to the color table - for color in ["purple", "yellow", "blue", "red", "green", "cyan", "gray"]: - color_table[f"light{color}"] = color_table.get(color, 0) - - # Add some additional color names to the color table - color_table["aqua"] = color_table.get("cyan", 0) - color_table["lightaqua"] = color_table.get("cyan", 0) - color_table["lightgrey"] = color_table.get("gray", 0) - color_table["grey"] = color_table.get("gray", 0) - color_table["lightwhite"] = color_table.get("gray", 0) - color_table["light"] = 0 - - return color_table - - -color_table = get_color_table() - - -def make_str(textm): - """ - Converts the given text with color formatting to a printable string. - - The text can contain color tags like '<>' where 'color' is the name of the color. - The color will be applied to the text that follows the tag, until the end of the string or until a '<>' tag is found. - - :param textm: The text to convert. Can contain color tags. - """ - # Define a pattern for color tags - _color_pat = r"((:?\w+|previous);?(:?\w+|previous)?)" - # Compile a regex for color tags - colorTagR = re.compile(rf"(?:\03{{|<<){_color_pat}(?:}}|>>)") - - # Initialize a stack for color tags - color_stack = ["default"] - - # If the input is not a string, print it as is and return - if not isinstance(textm, str): - return textm - - # If the text does not contain any color tags, print it as is and return - if textm.find("\03") == -1 and textm.find("<<") == -1: - return textm - - # Split the text into parts based on the color tags - text_parts = colorTagR.split(textm) + ["default"] - - # Enumerate the parts for processing - enu = enumerate(zip(text_parts[::4], text_parts[1::4])) - - # Initialize the string to be printed - toprint = "" - - # Process each part of the text - for _, (text, next_color) in enu: - # Get the current color from the color stack - # print(f"i: {index}, text: {text}, next_color: {next_color}") - # --- - current_color = color_stack[-1] - - # If the next color is 'previous', pop the color stack to get the previous color - if next_color == "previous": - if len(color_stack) > 1: # keep the last element in the stack - color_stack.pop() - next_color = color_stack[-1] - else: - # If the next color is not 'previous', add it to the color stack - color_stack.append(next_color) - - # Get the color code for the current color - cc = color_table.get(current_color, "") - - # If the color code is not empty, apply it to the text - if cc: - text = cc % text - - # Add the colored text to the string to be printed - toprint += text - - # Print the final colored text - return toprint - - -def replace_invisible(text): - """Replace invisible characters by ''.""" - - def replace(match): - match = match.group() - if sys.maxunicode < 0x10FFFF and len(match) == 2: - mask = (1 << 10) - 1 - assert ord(match[0]) & ~mask == 0xD800 - assert ord(match[1]) & ~mask == 0xDC00 - codepoint = (ord(match[0]) & mask) << 10 | (ord(match[1]) & mask) - else: - codepoint = ord(match) - return f"<{codepoint:x}>" - - return INVISIBLE_REGEX.sub(replace, text) - - -class Hunk: - """One change hunk between a and b. - - Note: parts of this code are taken from by difflib.get_grouped_opcodes(). - - """ - - APPR = 1 - NOT_APPR = -1 - PENDING = 0 - - def __init__( - self, - a: str | Sequence[str], - b: str | Sequence[str], - grouped_opcode: Sequence[tuple[str, int, int, int, int]], - ) -> None: - """ - Initializer. - - :param a: sequence of lines - :param b: sequence of lines - :param grouped_opcode: list of 5-tuples describing how to turn a into - b. It has the same format as returned by difflib.get_opcodes(). - """ - self.a = a - self.b = b - self.group = grouped_opcode - self.colors = { - "+": "lightgreen", - "-": "lightred", - } - self.bg_colors = { - "+": "lightgreen", - "-": "lightred", - } - - self.diff = list(self.create_diff()) - self.diff_plain_text = "".join(self.diff) - self.diff_text = "".join(self.format_diff()) - - first, last = self.group[0], self.group[-1] - self.a_rng = (first[1], last[2]) - self.b_rng = (first[3], last[4]) - - self.header = self.get_header() - self.diff_plain_text = f"{self.header}\n{self.diff_plain_text}" - self.diff_text = self.diff_text - - self.reviewed = self.PENDING - - self.pre_context = 0 - self.post_context = 0 +import pywikibot - def get_header(self) -> str: - """Provide header of unified diff.""" - return f"{self.get_header_text(self.a_rng, self.b_rng)}\n" - - @staticmethod - def get_header_text( - a_rng: tuple[int, int], b_rng: tuple[int, int], affix: str = "@@" - ) -> str: - """Provide header for any ranges.""" - a_rng = format_range_unified(*a_rng) - b_rng = format_range_unified(*b_rng) - return f"{affix} -{a_rng} +{b_rng} {affix}" - - def create_diff(self) -> Iterable[str]: - """Generator of diff text for this hunk, without formatting.""" - - # make sure each line ends with '\n' to prevent - # behaviour like https://bugs.python.org/issue2142 - def check_line(line: str) -> str: - return line if line.endswith("\n") else f"{line}\n" - - for tag, i1, i2, j1, j2 in self.group: - # equal/delete/insert add additional space after the sign as it's - # what difflib.ndiff does do too. - if tag == "equal": - for line in self.a[i1:i2]: - yield f" {check_line(line)}" - elif tag == "delete": - for line in self.a[i1:i2]: - yield f"- {check_line(line)}" - elif tag == "insert": - for line in self.b[j1:j2]: - yield f"+ {check_line(line)}" - elif tag == "replace": - for line in difflib.ndiff(self.a[i1:i2], self.b[j1:j2]): - yield check_line(line) - - def format_diff(self) -> Iterable[str]: - """Color diff lines.""" - diff = iter(self.diff) - - fmt = "" - line1, line2 = "", next(diff) - for line in diff: - fmt, line1, line2 = line1, line2, line - # do not show lines starting with '?'. - if line1.startswith("?"): - continue - if line2.startswith("?"): - yield self.color_line(line1, line2) - # do not try to reuse line2 as format at next iteration - # if already used for an added line. - if line1.startswith("+"): - line2 = "" - continue - if line1.startswith("-"): - # Color whole line to be removed. - yield self.color_line(line1) - elif line1.startswith("+"): - # Reuse last available fmt as diff line, if possible, - # or color whole line to be added. - fmt = fmt if fmt.startswith("?") else "" - fmt = fmt[: min(len(fmt), len(line1))] - fmt = fmt if fmt else None - yield self.color_line(line1, fmt) - - # handle last line - # If line line2 is removed, color the whole line. - # If line line2 is added, check if line1 is a '?-type' line, to prevent - # the entire line line2 to be colored (see T130572). - # The case where line2 start with '?' has been covered already. - if line2.startswith("-"): - # Color whole line to be removed. - yield self.color_line(line2) - elif line2.startswith("+"): - # Reuse last available line1 as diff line, if possible, - # or color whole line to be added. - fmt = line1 if line1.startswith("?") else "" - fmt = fmt[: min(len(fmt), len(line2))] - fmt = fmt if fmt else None - yield self.color_line(line2, fmt) - - def color_line(self, line: str, line_ref: str | None = None) -> str: - """Color line characters. - - If line_ref is None, the whole line is colored. - If line_ref[i] is not blank, line[i] is colored. - Color depends if line starts with +/-. - - line_ref: string. - """ - color = line[0] - - if line_ref is None: - if color in self.colors: - # colored_line = color_format('{color}{0}{default}',line, color=self.colors[color]) - colored_line = f"<<{self.colors[color]}>>" - colored_line += f"{line}<>" - return colored_line - return line - - colored_line = "" - color_closed = True - for char, char_ref in zip_longest(line, line_ref.strip(), fillvalue=" "): - char_tagged = char - if color_closed: - if char_ref != " ": - apply_color = ( - self.colors[color] - if char != " " - else f"default;{self.bg_colors[color]}" - ) - # char_tagged = color_format('{color}{0}', char, color=apply_color) - char_tagged = f"<<{apply_color}>>" - char_tagged += char - color_closed = False - elif char_ref == " ": - # char_tagged = color_format('{default}{0}', char) - char_tagged = f"<>{char}" - color_closed = True - colored_line += char_tagged - - if not color_closed: - # colored_line += color_format('{default}') - colored_line += "<>" - - return colored_line - - def __str__(self) -> str: - """Return the diff as plain text.""" - return "".join(self.diff_plain_text) - - def __repr__(self) -> str: - """Return a reconstructable representation.""" - # TODO - return f"{self.__class__.__name__}(a, b, {self.group})" - - -class _Superhunk(abc.Sequence): - def __init__(self, hunks: Sequence[Hunk]) -> None: - self._hunks = hunks - self.a_rng = (self._hunks[0].a_rng[0], self._hunks[-1].a_rng[1]) - self.b_rng = (self._hunks[0].b_rng[0], self._hunks[-1].b_rng[1]) - self.pre_context = self._hunks[0].pre_context - self.post_context = self._hunks[0].post_context - - def __getitem__(self, idx: int) -> Hunk: - return self._hunks[idx] - - def __len__(self) -> int: - return len(self._hunks) - - -def get_header_text( - a_rng: tuple[int, int], b_rng: tuple[int, int], affix: str = "@@" -) -> str: - """Provide header for any ranges.""" - a_rng = format_range_unified(*a_rng) - b_rng = format_range_unified(*b_rng) - return f"{affix} -{a_rng} +{b_rng} {affix}" - - -class PatchManager: - def __init__( - self, - text_a: str, - text_b: str, - context: int = 0, - by_letter: bool = False, - replace_invisible: bool = False, - ) -> None: - self.a = text_a.splitlines(True) - self.b = text_b.splitlines(True) - - # groups and hunk have same order (one hunk correspond to one group). - s = difflib.SequenceMatcher(None, self.a, self.b) - self.groups = list(s.get_grouped_opcodes(0)) - self.hunks = [] - previous_hunk = None - for group in self.groups: - hunk = Hunk(self.a, self.b, group) - self.hunks.append(hunk) - hunk.pre_context = hunk.a_rng[0] - if previous_hunk: - hunk.pre_context -= previous_hunk.a_rng[1] - previous_hunk.post_context = hunk.pre_context - previous_hunk = hunk - if self.hunks: - self.hunks[-1].post_context = len(self.a) - self.hunks[-1].a_rng[1] - # blocks are a superset of hunk, as include also parts not - # included in any hunk. - self.blocks = self.get_blocks() - self.context = context - self._super_hunks = self._generate_super_hunks() - self._replace_invisible = replace_invisible - - def get_blocks(self) -> list[tuple[int, tuple[int, int], tuple[int, int]]]: - """Return list with blocks of indexes. - - Format of each block:: - - [-1, (i1, i2), (-1, -1)] -> block a[i1:i2] does not change from - a to b then is there is no corresponding hunk. - [hunk index, (i1, i2), (j1, j2)] -> block a[i1:i2] becomes b[j1:j2] - """ - blocks = [] - i2 = 0 - for hunk_idx, group in enumerate(self.groups): - first, last = group[0], group[-1] - i1, prev_i2, i2 = first[1], i2, last[2] - - # there is a section of unchanged text before this hunk. - if prev_i2 < i1: - rng = (-1, (prev_i2, i1), (-1, -1)) - blocks.append(rng) - - rng = (hunk_idx, (first[1], last[2]), (first[3], last[4])) - blocks.append(rng) - - # there is a section of unchanged text at the end of a, b. - if i2 < len(self.a): - rng = (-1, (i2, len(self.a)), (-1, -1)) - blocks.append(rng) - - return blocks - - def print_hunks(self) -> None: - """Print the headers and diff texts of all hunks to the output.""" - if self.hunks: - output( - "\n".join( - self._generate_diff(super_hunk) for super_hunk in self._super_hunks - ) - ) - - def _generate_super_hunks( - self, hunks: Iterable[Hunk] | None = None - ) -> list[_Superhunk]: - if hunks is None: - hunks = self.hunks - - if not hunks: - return [] - - if self.context: - # Determine if two hunks are connected by self.context - super_hunk = [] - super_hunks = [super_hunk] - for hunk in hunks: - # self.context * 2, because if self.context is 2 the hunks - # would be directly adjacent when 4 lines in between and for - # anything below 4 they share lines. - # not super_hunk == first hunk as any other super_hunk is - # created with one hunk - if not super_hunk or hunk.pre_context <= self.context * 2: - # previous hunk has shared/adjacent self.context lines - super_hunk += [hunk] - else: - super_hunk = [hunk] - super_hunks += [super_hunk] - else: - super_hunks = [[hunk] for hunk in hunks] - return [_Superhunk(sh) for sh in super_hunks] - - def _get_context_range( - self, super_hunk: _Superhunk - ) -> tuple[tuple[int, int], tuple[int, int]]: - """Dynamically determine context range for a super hunk.""" - a0, a1 = super_hunk.a_rng - b0, b1 = super_hunk.b_rng - return ( - ( - a0 - min(super_hunk.pre_context, self.context), - a1 + min(super_hunk.post_context, self.context), - ), - ( - b0 - min(super_hunk.pre_context, self.context), - b1 + min(super_hunk.post_context, self.context), - ), - ) - - def _generate_diff(self, hunks: _Superhunk) -> str: - """Generate a diff text for the given hunks.""" - - def extend_context(start: int, end: int) -> str: - """Add context lines.""" - return "".join(f" {line.rstrip()}\n" for line in self.a[start:end]) - - context_range = self._get_context_range(hunks) - a11 = get_header_text(*context_range) - a22 = extend_context(context_range[0][0], hunks[0].a_rng[0]) - # OutPut = color_format('{aqua}{0}{default}\n{1}',a11,a22) - OutPut = f"<>{a11}<>\n{a22}" - previous_hunk = None - for hunk in hunks: - if previous_hunk: - OutPut += extend_context(previous_hunk.a_rng[1], hunk.a_rng[0]) - previous_hunk = hunk - OutPut += hunk.diff_text - OutPut += extend_context(hunks[-1].a_rng[1], context_range[0][1]) - if self._replace_invisible: - OutPut = replace_invisible(OutPut) - return OutPut +logger = logging.getLogger(__name__) def showDiff(text_a: str, text_b: str, context: int = 0) -> None: - """ - Output a string showing the differences between text_a and text_b. - - The differences are highlighted (only on compatible systems) to show which - changes were made. - """ - config = get_default_config() - if config.no_diff: + if "nodiff" in sys.argv: return - PatchManager(text_a, text_b, context=context).print_hunks() + pywikibot.showDiff(text_a, text_b) -def output(textm, *uargs, **kwargs): - """ - Prints the given text with color formatting. - - The text can contain color tags like '<>' where 'color' is the name of the color. - The color will be applied to the text that follows the tag, until the end of the string or until a '<>' tag is found. - - If config.no_print is True, the function will return without printing anything. - - :param textm: The text to print. Can contain color tags. - """ - config = get_default_config() - if config.no_print and not kwargs.get("p", False): +def output(textm, *args, **kwargs): + if "noprint" in sys.argv and not kwargs.get("p", False): return - end = kwargs.get("end", "\n") - toprint = make_str(textm) + logger.info(textm) - print(toprint, end=end) - -def error(text): +def error(text, *args, **kwargs): text = f"<> {str(text)} <>" - new_text = make_str(text) - log.error(new_text) + logger.error(text) -def debug(text): - new_text = make_str(text) - log.debug(new_text) +def info(text, *args, **kwargs): + logger.info(text) -def info(text): - new_text = make_str(text) - log.info(new_text) +def warn(text, *args, **kwargs): + logger.warning(text) -def warn(text): - new_text = make_str(text) - log.warning(new_text) +def warning(text, *args, **kwargs): + logger.warning(text) -def warning(text): - new_text = make_str(text) - log.warning(new_text) +def debug(text, *args, **kwargs): + logger.debug(text) -def test_print(s): - config = get_default_config() - if config.test_print: - output(s) +def test_print(text, *args, **kwargs): + logger.debug(text) __all__ = [ @@ -763,16 +53,3 @@ def test_print(s): "info", "test_print", ] - -if __name__ == "__main__": - line = "" - numb = 0 - for co, cac in color_table.items(): - if cac: - numb += 1 - line += f" {co.ljust(15)} <<{co}>> test.<>" - line += "\n" - # if numb % 5 == 0: line += "\n" - # --- - output(line) - showDiff(line, f"{line}3434s") diff --git a/mw_api/api_utils/txtlib.py b/mw_api/api_utils/txtlib.py index e9348de..725967f 100644 --- a/mw_api/api_utils/txtlib.py +++ b/mw_api/api_utils/txtlib.py @@ -1,7 +1,5 @@ #!/usr/bin/python3 -""" - -""" +""" """ import logging from functools import lru_cache @@ -51,9 +49,7 @@ def extract_templates_and_params(text): return result -def get_one_temp_params( - text, tempname="", templates=[], lowers=False, get_all_temps=False -): +def get_one_temp_params(text, tempname="", templates=[], lowers=False, get_all_temps=False): ingr = extract_templates_and_params(text) # --- temps = templates @@ -61,10 +57,7 @@ def get_one_temp_params( if tempname: temps.append(tempname) # --- - temps = [ - x.replace("قالب:", "").replace("Template:", "").replace("_", " ").strip() - for x in temps - ] + temps = [x.replace("قالب:", "").replace("Template:", "").replace("_", " ").strip() for x in temps] # --- if lowers: temps = [x.lower() for x in temps] @@ -99,45 +92,6 @@ def get_all_temps_params(text, templates=None, lowers=False): if templates is None: templates = [] # --- - tab = get_one_temp_params( - text, templates=templates, lowers=lowers, get_all_temps=True - ) + tab = get_one_temp_params(text, templates=templates, lowers=lowers, get_all_temps=True) # --- return tab - - -# --- -test_text = """ -{{ص.م/صورة مضاعفة ويكي بيانات|معرف ويكي بيانات={{{معرف ويكي بيانات|}}} -| صورة1 ={{{علم|{{{flag|{{{صورة علم|}}}}}}}}} -| تعليق1 ={{#لو:{{قيمة ويكي بيانات|معرف ويكي بيانات={{{معرف ويكي بيانات|}}}|{{{وصف العلم|{{{flagcaption|}}}}}}|خاصية=P163|rank=best}}|{{قيمة ويكي بيانات|معرف ويكي بيانات={{{معرف ويكي بيانات|}}}|{{{وصف العلم|{{{flagcaption|}}}}}}|خاصية=P163|rank=best}}|{{فصع}}}} -| عرض1 ={{{عرض العلم|{{{flagsize|125}}}}}} -| صورة2 ={{{motto|{{{شعار|}}}}}} -| تعليق2 ={{#لو:{{قيمة ويكي بيانات|معرف ويكي بيانات={{{معرف ويكي بيانات|}}}|{{{تعليق الشعار|{{{وصف الشعار|}}}}}}|خاصية=P237|rank=best}}|{{قيمة ويكي بيانات|معرف ويكي بيانات={{{معرف ويكي بيانات|}}}|{{{تعليق الشعار|{{{وصف الشعار|}}}}}}|خاصية=P237|rank=best}}|{{فصع}}}} -| عرض2 = {{{عرض الشعار|125}}} -| خاصية1 =P41 -| خاصية2 ={{#لو:{{#خاصية:P94}}|P94|P154}} -|خلفية={{{خلفية|}}} -}} - -{{ourworldindatamirror|https://owidm.wmcloud.org/grapher/cancer-death-rates?tab=map {{Webarchive}}}} -""" -# --- -if __name__ == "__main__": - # --- - # --- - ingr = extract_templates_and_params(test_text) - for temp in ingr: - # --- - name, namestrip, params, template = ( - temp["name"], - temp["namestrip"], - temp["params"], - temp["item"], - ) - # --- - print("-----------------------------") - print(f"name: {name}") - print(f"namestrip: {namestrip}") - print(f"params: {params}") - print(f"template: {template}") diff --git a/mw_api/api_utils/wd_sparql.py b/mw_api/api_utils/wd_sparql.py index 189a5b4..c500bfb 100644 --- a/mw_api/api_utils/wd_sparql.py +++ b/mw_api/api_utils/wd_sparql.py @@ -1,16 +1,20 @@ """ -from mw_api.api_utils import wd_sparql get_query_result = wd_sparql.get_query_result get_query_data = wd_sparql.get_query_data """ +import json +import logging import sys +from urllib.error import HTTPError, URLError from SPARQLWrapper import JSON, SPARQLWrapper +logger = logging.getLogger(__name__) + def get_query_data(query): """Retrieve query data from the Wikidata SPARQL endpoint. @@ -34,24 +38,20 @@ def get_query_data(query): # endpoint_url = "https://query-main.wikidata.org/sparql" endpoint_url = "https://query.wikidata.org/sparql" # --- - user_agent = "WDQS-example Python/%s.%s" % ( - sys.version_info[0], - sys.version_info[1], - ) + user_agent = f"WDQS-example Python/{sys.version_info[0]}.{sys.version_info[1]}" # --- sparql = SPARQLWrapper(endpoint_url, agent=user_agent) # --- sparql.setQuery(query) sparql.setReturnFormat(JSON) + sparql.setTimeout(30) # --- data = {} # --- try: data = sparql.query().convert() - except Exception as e: - # logger.warning(e, text=f"API/tools.py quoteurl: Exception: {e}") - print("API/tools.py get_query_data: Exception: e:") - print(e) + except (HTTPError, URLError, TimeoutError, ValueError, json.JSONDecodeError): + logger.exception("wd_helps.get_query_data failed") # --- return data diff --git a/mw_api/auth/__init__.py b/mw_api/auth/__init__.py index 7680504..719ec7f 100644 --- a/mw_api/auth/__init__.py +++ b/mw_api/auth/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Authentication package for mw_api. diff --git a/mw_api/auth/authenticator.py b/mw_api/auth/authenticator.py index ac4d030..f4ed30e 100644 --- a/mw_api/auth/authenticator.py +++ b/mw_api/auth/authenticator.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Authenticator for MediaWiki login. @@ -31,7 +30,7 @@ class Authenticator: def __init__( self, endpoint: str, - session: requests.Session, + session: requests.session, headers: Optional[Dict[str, str]] = None, ) -> None: """ @@ -77,9 +76,7 @@ def login(self, username: str, password: str) -> bool: } try: - response = self._session.post( - self._endpoint, data=params, headers=self._headers - ) + response = self._session.post(self._endpoint, data=params, headers=self._headers) data = response.json() except Exception as e: @@ -106,9 +103,7 @@ def _get_login_token(self) -> str: } try: - response = self._session.post( - self._endpoint, data=params, headers=self._headers - ) + response = self._session.post(self._endpoint, data=params, headers=self._headers) data = response.json() return data.get("query", {}).get("tokens", {}).get("logintoken", "") @@ -129,9 +124,7 @@ def is_authenticated(self) -> bool: } try: - response = self._session.post( - self._endpoint, data=params, headers=self._headers - ) + response = self._session.post(self._endpoint, data=params, headers=self._headers) data = response.json() userinfo = data.get("query", {}).get("userinfo", {}) return "anon" not in userinfo diff --git a/mw_api/auth/token_provider.py b/mw_api/auth/token_provider.py index a0fb53d..e2744fe 100644 --- a/mw_api/auth/token_provider.py +++ b/mw_api/auth/token_provider.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Token Provider for authentication tokens. @@ -28,7 +27,7 @@ class TokenProvider: def __init__( self, endpoint: str, - session: requests.Session, + session: requests.session, headers: Optional[Dict[str, str]] = None, ) -> None: """ @@ -93,9 +92,7 @@ def _fetch_token(self, token_type: str) -> str: } try: - response = self._session.post( - self._endpoint, data=params, headers=self._headers - ) + response = self._session.post(self._endpoint, data=params, headers=self._headers) data = response.json() token_key = f"{token_type}token" return data.get("query", {}).get("tokens", {}).get(token_key, "") diff --git a/mw_api/core/__init__.py b/mw_api/core/__init__.py index a70ef35..b9e012b 100644 --- a/mw_api/core/__init__.py +++ b/mw_api/core/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Core module for mw_api. diff --git a/mw_api/core/config.py b/mw_api/core/config.py index 11aff5d..03cc81f 100644 --- a/mw_api/core/config.py +++ b/mw_api/core/config.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Configuration module for mw_api. diff --git a/mw_api/core/container.py b/mw_api/core/container.py index 2f3358d..7d59f13 100644 --- a/mw_api/core/container.py +++ b/mw_api/core/container.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Dependency Injection Container for mw_api. @@ -19,18 +18,18 @@ class SessionManager: with an encapsulated, thread-aware session management system. Attributes: - _sessions: Dictionary mapping session keys to requests.Session instances. + _sessions: Dictionary mapping session keys to requests.session instances. _users: Dictionary mapping language codes to logged-in usernames. _login_counts: Dictionary tracking login attempt counts. """ def __init__(self) -> None: """Initialize the SessionManager with empty state.""" - self._sessions: Dict[str, requests.Session] = {} + self._sessions: Dict[str, requests.session] = {} self._users: Dict[str, str] = {} self._total_logins: int = 0 - def get_session(self, key: str) -> Optional[requests.Session]: + def get_session(self, key: str) -> Optional[requests.session]: """ Get a session by its key. @@ -38,17 +37,17 @@ def get_session(self, key: str) -> Optional[requests.Session]: key: The session key (typically in format 'lang-family-username'). Returns: - The requests.Session if it exists, None otherwise. + The requests.session if it exists, None otherwise. """ return self._sessions.get(key) - def set_session(self, key: str, session: requests.Session) -> None: + def set_session(self, key: str, session: requests.session) -> None: """ Store a session with the given key. Args: key: The session key (typically in format 'lang-family-username'). - session: The requests.Session instance to store. + session: The requests.session instance to store. """ self._sessions[key] = session diff --git a/mw_api/core/exceptions.py b/mw_api/core/exceptions.py index 87714f4..3a19959 100644 --- a/mw_api/core/exceptions.py +++ b/mw_api/core/exceptions.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Exception hierarchy for mw_api. @@ -170,9 +169,7 @@ def __init__( message: str = "Invalid CSRF token.", raw_error: Optional[Dict[str, Any]] = None, ) -> None: - super().__init__( - code="badtoken", message=message, is_retryable=True, raw_error=raw_error - ) + super().__init__(code="badtoken", message=message, is_retryable=True, raw_error=raw_error) class AuthenticationError(NewApiException): @@ -215,9 +212,7 @@ def parse_api_error(error_dict: Dict[str, Any]) -> Optional[ApiError]: abusefilter = error_dict.get("abusefilter", {}) description = abusefilter.get("description", "") filter_id = str(abusefilter.get("id", "")) - return AbuseFilterError( - description=description, filter_id=filter_id, raw_error=error_dict - ) + return AbuseFilterError(description=description, filter_id=filter_id, raw_error=error_dict) if code == "maxlag": lag = int(error_dict.get("lag", 0)) diff --git a/mw_api/core/namespace.py b/mw_api/core/namespace.py index d8f6a09..8628375 100644 --- a/mw_api/core/namespace.py +++ b/mw_api/core/namespace.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Namespace registry for MediaWiki namespaces. diff --git a/mw_api/core/page.py b/mw_api/core/page.py index c2eebe0..fca85ea 100644 --- a/mw_api/core/page.py +++ b/mw_api/core/page.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Core page entities for mw_api. diff --git a/mw_api/core/protocols.py b/mw_api/core/protocols.py index 01de5ab..c3c560d 100644 --- a/mw_api/core/protocols.py +++ b/mw_api/core/protocols.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Protocols for mw_api. @@ -52,7 +51,7 @@ class SessionProtocol(Protocol): """ Protocol defining the interface for HTTP session objects. - Compatible with requests.Session and similar implementations. + Compatible with requests.session and similar implementations. """ def request(self, method: str, url: str, **kwargs: Any) -> Any: diff --git a/mw_api/core/request_config.py b/mw_api/core/request_config.py index 7e9f527..779603a 100644 --- a/mw_api/core/request_config.py +++ b/mw_api/core/request_config.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Request configuration for API calls. diff --git a/mw_api/logging_config.py b/mw_api/logging_config.py index 2c302e7..16724e8 100644 --- a/mw_api/logging_config.py +++ b/mw_api/logging_config.py @@ -124,19 +124,26 @@ def wrapper(record): return wrapper -def prepare_log_file(log_file, project_logger): - log_file = Path(log_file).expanduser() +def prepare_log_file(log_file: str | None, project_logger: logging.Logger) -> Path | None: + """ + Prepare the log file path and create parent directories if needed. + """ + if not log_file: + return None + log_file_path = os.path.expandvars(str(log_file)) + log_file_path = Path(log_file_path).expanduser() + try: - log_file.parent.mkdir(parents=True, exist_ok=True) + log_file_path.parent.mkdir(parents=True, exist_ok=True) except Exception as e: project_logger.error(f"Failed to create log directory: {e}") - log_file = None - return log_file + log_file_path = None + return log_file_path def setup_logging( - level: str = "WARNING", name: str = "mw_api", + level: str = "WARNING", log_file: str | None = None, ) -> None: """ @@ -152,7 +159,7 @@ def setup_logging( project_logger.propagate = False formatter = colorlog.ColoredFormatter( - fmt="%(filename)s:%(lineno)s %(funcName)s() - %(log_color)s%(levelname)-s %(reset)s%(message)s", + fmt="%(name)s:%(lineno)s %(funcName)s() - %(log_color)s%(levelname)-s %(reset)s%(message)s", log_colors={ "DEBUG": "cyan", "INFO": "green", @@ -174,22 +181,19 @@ def setup_logging( if log_file: log_file = prepare_log_file(log_file, project_logger) - file_formatter = logging.Formatter( - fmt="%(asctime)s - %(name)s - %(levelname)-8s - %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", - ) - file_handler = logging.FileHandler(log_file, mode="a", encoding="utf-8") - file_handler.setFormatter(file_formatter) - file_handler.setLevel(numeric_level) - project_logger.addHandler(file_handler) + file_logger(log_file, project_logger, numeric_level) # Separate error log file - log_file2 = log_file.with_suffix(".err") - file_formatter = logging.Formatter( - fmt="%(asctime)s - %(name)s - %(levelname)-8s - %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", - ) - file_handler = logging.FileHandler(log_file2, mode="a", encoding="utf-8") - file_handler.setFormatter(file_formatter) - file_handler.setLevel(logging.WARNING) - project_logger.addHandler(file_handler) + log_file_2 = log_file.with_suffix(".err") + file_logger(log_file_2, project_logger, logging.WARNING) + + +def file_logger(log_file, project_logger, numeric_level): + file_formatter = logging.Formatter( + fmt="%(asctime)s - %(name)s - %(levelname)-8s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + file_handler = logging.FileHandler(log_file, mode="a", encoding="utf-8") + file_handler.setFormatter(file_formatter) + file_handler.setLevel(numeric_level) + project_logger.addHandler(file_handler) diff --git a/mw_api/pages_bots/__init__.py b/mw_api/pages_bots/__init__.py index 92e9d14..2f44a00 100644 --- a/mw_api/pages_bots/__init__.py +++ b/mw_api/pages_bots/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import os if not os.getenv("BOTNAME"): diff --git a/mw_api/pages_bots/all_apis.py b/mw_api/pages_bots/all_apis.py index 2e8f476..67581ef 100644 --- a/mw_api/pages_bots/all_apis.py +++ b/mw_api/pages_bots/all_apis.py @@ -1,7 +1,4 @@ """ -from mw_api.all_apis import ALL_APIS - -from mw_api import ALL_APIS main_api = ALL_APIS(lang='en', family='wikipedia', username='your_username', password='your_password') page = main_api.MainPage('Main Page Title') @@ -11,19 +8,26 @@ import functools import logging -from typing import Any from ..super.S_API import bot_api from ..super.S_Category import catdepth_new from ..super.S_Page import super_page - -# --- from ..super.super_login import Login logger = logging.getLogger(__name__) -class ALL_APIS: +@functools.lru_cache(maxsize=1024) +def _login(lang, family, username) -> Login: + # --- + login_bot = Login(lang, family=family) + # --- + logger.info(f"### <> _login make new bot for ({lang}.{family}.org|{username})") + # --- + return login_bot + + +class ALL_APIS: # noqa: N801 """ A class that provides access to various API functionalities. Usage: @@ -46,23 +50,14 @@ def MainPage(self, title, *args, **kwargs) -> super_page.MainPage: def CatDepth(self, title, sitecode="", family="", *args, **kwargs): # cat_members = CatDepth("RTTNEURO", sitecode="www", family="mdwiki", depth=3, ns="0") - return catdepth_new.subcatquery( - self.login_bot, title, sitecode=self.lang, family=self.family, **kwargs - ) + return catdepth_new.subcatquery(self.login_bot, title, sitecode=self.lang, family=self.family, **kwargs) def NEW_API(self, *args, **kwargs) -> bot_api.NEW_API: # --- return bot_api.NEW_API(self.login_bot, lang=self.lang, family=self.family) - @functools.lru_cache(maxsize=1) def _login(self) -> Login: - # --- - login_bot = Login(self.lang, family=self.family) - # --- - logger.info( - f"### <> LoginWrap make new bot for ({self.lang}.{self.family}.org|{self.username})" - ) - # --- + bot = _login(self.lang, self.family, self.username) user_tables = { self.family: { "username": self.username, @@ -70,9 +65,9 @@ def _login(self) -> Login: } } # --- - login_bot.add_users(user_tables, lang=self.lang) + bot.add_users(user_tables, lang=self.lang) # --- - return login_bot + return bot __all__ = [ diff --git a/mw_api/printe.py b/mw_api/printe.py deleted file mode 100644 index a6cab36..0000000 --- a/mw_api/printe.py +++ /dev/null @@ -1,13 +0,0 @@ -""" """ - -from .api_utils.printe import debug, error, info, output, showDiff, test_print, warn - -__all__ = [ - "showDiff", - "output", - "debug", - "warn", - "error", - "info", - "test_print", -] diff --git a/mw_api/repositories/__init__.py b/mw_api/repositories/__init__.py index 7a948dd..eacdb5b 100644 --- a/mw_api/repositories/__init__.py +++ b/mw_api/repositories/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Repositories package for mw_api. diff --git a/mw_api/repositories/page_repository.py b/mw_api/repositories/page_repository.py index 9ba4c40..6462e33 100644 --- a/mw_api/repositories/page_repository.py +++ b/mw_api/repositories/page_repository.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Page Repository for data access operations. @@ -110,9 +109,7 @@ def get_page_info(self, page: Page) -> PageMetadata: # Protection info protection = page_data.get("protection", []) if protection: - metadata.protection = { - p.get("type"): p.get("level") for p in protection - } + metadata.protection = {p.get("type"): p.get("level") for p in protection} return metadata diff --git a/mw_api/services/__init__.py b/mw_api/services/__init__.py index 072a78b..3d39d6d 100644 --- a/mw_api/services/__init__.py +++ b/mw_api/services/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Services package for mw_api. diff --git a/mw_api/services/edit_validator.py b/mw_api/services/edit_validator.py index 76ac42e..77bbfe7 100644 --- a/mw_api/services/edit_validator.py +++ b/mw_api/services/edit_validator.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Edit Validator service for validating bot edit permissions. @@ -8,6 +7,7 @@ from dataclasses import dataclass, field from typing import Any, Dict, List, Optional + from ..api_utils import txtlib from ..core.config import BotConfig, get_default_config @@ -63,9 +63,7 @@ class EditValidator: "portal": ["لا لربط البوابات المعادل", "لا لصيانة البوابات"], } - def __init__( - self, config: Optional[BotConfig] = None, bot_username: str = "Mr.Ibrahembot" - ) -> None: + def __init__(self, config: Optional[BotConfig] = None, bot_username: str = "Mr.Ibrahembot") -> None: """ Initialize the EditValidator. @@ -124,9 +122,7 @@ def can_edit( return result - def _check_templates( - self, templates: List[Dict[str, Any]], title: str, job: str - ) -> EditCheckResult: + def _check_templates(self, templates: List[Dict[str, Any]], title: str, job: str) -> EditCheckResult: """Check templates for edit restrictions.""" all_stop = self.DEFAULT_STOP_TEMPLATES["all"] job_stop = self.DEFAULT_STOP_TEMPLATES.get(job, []) @@ -137,9 +133,7 @@ def _check_templates( # Check stop templates if name in job_stop or name in all_stop: - return EditCheckResult.denied( - f"Page has blocking template: {name}", template=name - ) + return EditCheckResult.denied(f"Page has blocking template: {name}", template=name) # Handle {{nobots}} template if name.lower() == "nobots": @@ -155,9 +149,7 @@ def _handle_nobots(self, params: Dict[str, str], title: str) -> EditCheckResult: """Handle the nobots template logic.""" # {{nobots}} with no params blocks all bots if not params: - return EditCheckResult.denied( - "Page has {{nobots}} template", template="nobots" - ) + return EditCheckResult.denied("Page has {{nobots}} template", template="nobots") # Check if bot is in the exclusion list param1 = params.get("1", "") @@ -180,18 +172,14 @@ def _handle_bots(self, params: Dict[str, str], title: str) -> EditCheckResult: allow_list = [x.strip() for x in allow.split(",")] if "all" in allow_list or self._bot_username in allow_list: return EditCheckResult.success() - return EditCheckResult.denied( - f"Bot not in allow list: {allow}", template="bots" - ) + return EditCheckResult.denied(f"Bot not in allow list: {allow}", template="bots") # Check deny parameter deny = params.get("deny", "") if deny: deny_list = [x.strip() for x in deny.split(",")] if "all" in deny_list or self._bot_username in deny_list: - return EditCheckResult.denied( - f"Bot in deny list: {deny}", template="bots" - ) + return EditCheckResult.denied(f"Bot in deny list: {deny}", template="bots") return EditCheckResult.success() diff --git a/mw_api/services/template_service.py b/mw_api/services/template_service.py index 00840b3..723dd88 100644 --- a/mw_api/services/template_service.py +++ b/mw_api/services/template_service.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Template Service for parsing and manipulating wiki templates. diff --git a/mw_api/super/S_API/bot.py b/mw_api/super/S_API/bot.py index a76ca5c..dc527be 100644 --- a/mw_api/super/S_API/bot.py +++ b/mw_api/super/S_API/bot.py @@ -263,9 +263,7 @@ def Parse_Text(self, line, title): # --- return textnew - def upload_by_file( - self, file_name, text, file_path, comment="", ignorewarnings=False - ): + def upload_by_file(self, file_name, text, file_path, comment="", ignorewarnings=False): # --- logger.info(f"<> def . {file_name=}") # --- @@ -296,11 +294,7 @@ def upload_by_file( success = upload_result.get("result") == "Success" _error = data.get("error", {}) # --- - duplicate = ( - upload_result.get("warnings", {}) - .get("duplicate", [""])[0] - .replace("_", " ") - ) + duplicate = upload_result.get("warnings", {}).get("duplicate", [""])[0].replace("_", " ") # --- if success: logger.info(f"<> ** upload true .. [[File:{file_name}]] ") diff --git a/mw_api/super/S_API/bot_api.py b/mw_api/super/S_API/bot_api.py index 7f5911c..74b9b8b 100644 --- a/mw_api/super/S_API/bot_api.py +++ b/mw_api/super/S_API/bot_api.py @@ -1,6 +1,5 @@ """ """ -# --- import datetime import logging import time @@ -155,9 +154,7 @@ def Find_pages_exists_or_not(self, liste, get_redirect=False, noprint=False): exists += 1 # --- if not noprint: - logger.info( - f"Find_pages_exists_or_not : missing:{missing}, exists: {exists}, redirects: {redirects}" - ) + logger.info(f"Find_pages_exists_or_not : missing:{missing}, exists: {exists}, redirects: {redirects}") # --- return table @@ -230,9 +227,7 @@ def Find_pages_exists_or_not_with_qids( continue # --- # { "user_input": title, "redirect_to": "", "normalized_to": "", "real_title": title, } - title_tab = self.get_title_redirect_normalize( - title_x, redirects_table, normalized_table - ) + title_tab = self.get_title_redirect_normalize(title_x, redirects_table, normalized_table) # --- if use_user_input_title and title_tab.get("user_input"): title_x = title_tab["user_input"] @@ -257,9 +252,7 @@ def Find_pages_exists_or_not_with_qids( exists += 1 # --- if not noprint: - logger.info( - f"Find_pages_exists_or_not : missing:{missing}, exists: {exists}, redirects: {redirects}" - ) + logger.info(f"Find_pages_exists_or_not : missing:{missing}, exists: {exists}, redirects: {redirects}") # --- if return_all_jsons: return table, all_jsons @@ -303,9 +296,7 @@ def Get_All_pages( if start: params["apfrom"] = start # --- - newp = self.post_continue( - params, "query", _p_="allpages", p_empty=[], Max=limit_all - ) + newp = self.post_continue(params, "query", _p_="allpages", p_empty=[], Max=limit_all) # --- logger.debug(f"<> --- : find {len(newp)} pages.") # --- @@ -364,9 +355,7 @@ def PrefixSearch(self, pssearch="", ns="0", pslimit="max", limit_all=100000): if pslimit.isdigit(): params["pslimit"] = pslimit # --- - newp = self.post_continue( - params, "query", _p_="prefixsearch", p_empty=[], Max=limit_all - ) + newp = self.post_continue(params, "query", _p_="prefixsearch", p_empty=[], Max=limit_all) # --- logger.debug(f"<> --- : find {len(newp)} pages.") # --- @@ -416,21 +405,15 @@ def Get_All_pages_generator( if start: params["gapfrom"] = start # --- - newp = self.post_continue( - params, "query", _p_="pages", p_empty=[], Max=limit_all - ) + newp = self.post_continue(params, "query", _p_="pages", p_empty=[], Max=limit_all) # --- - logger.debug( - f"<> --- Get_All_pages_generator : find {len(newp)} pages." - ) + logger.debug(f"<> --- Get_All_pages_generator : find {len(newp)} pages.") # --- Main_table = {x["title"]: x for x in newp} # --- logger.debug(f"len of Main_table {len(Main_table)}.") # --- - logger.info( - f"bot_api.py Get_All_pages_generator : find {len(Main_table)} pages." - ) + logger.info(f"bot_api.py Get_All_pages_generator : find {len(Main_table)} pages.") # --- return Main_table @@ -523,9 +506,7 @@ def Get_Newpages( else: limit = 5000 - json1 = self.post_continue( - params, "query", _p_="recentchanges", p_empty=[], Max=limit - ) + json1 = self.post_continue(params, "query", _p_="recentchanges", p_empty=[], Max=limit) Main_table = [x["title"] for x in json1] @@ -552,9 +533,7 @@ def UserContribs(self, user, limit=5000, namespace="*", ucshow=""): if ucshow: params["ucshow"] = ucshow # --- - results = self.post_continue( - params, "query", _p_="usercontribs", p_empty=[], Max=limit - ) + results = self.post_continue(params, "query", _p_="usercontribs", p_empty=[], Max=limit) # --- results = [x["title"] for x in results] # --- @@ -572,9 +551,7 @@ def chunk_titles(self, titles, chunk_size=50, noprint=False): result = [titles[i : i + chunk_size] for i in range(0, len(titles), chunk_size)] # --- if not noprint: - result = tqdm.tqdm( - result, desc=f"chunk_titles {len(titles)} split to {len(result)} chunks" - ) + result = tqdm.tqdm(result, desc=f"chunk_titles {len(titles)} split to {len(result)} chunks") # --- return result @@ -601,9 +578,7 @@ def Get_langlinks_for_list(self, titles, targtsitecode="", numbes=40): """ # --- - logger.debug( - f'bot_api.Get_langlinks_for_list for "{len(titles)} pages". in wiki:{self.lang}' - ) + logger.debug(f'bot_api.Get_langlinks_for_list for "{len(titles)} pages". in wiki:{self.lang}') # --- if targtsitecode.endswith("wiki"): targtsitecode = targtsitecode[:-4] @@ -635,7 +610,7 @@ def Get_langlinks_for_list(self, titles, targtsitecode="", numbes=40): for title_chunk in self.chunk_titles(titles, chunk_size=numbes): params["titles"] = "|".join(title_chunk) # --- - # logger.debug(f'bot_api. work for {len(group)} pages') + # logger.debug(f'work for {len(group)} pages') # --- json1 = self.post_params(params) # --- @@ -705,9 +680,7 @@ def get_extlinks(self, title): "formatversion": 2, } # --- - results = self.post_continue( - params, "query", "pages", [], first=True, _p_2="extlinks", _p_2_empty=[] - ) + results = self.post_continue(params, "query", "pages", [], first=True, _p_2="extlinks", _p_2_empty=[]) # --- links = [x["url"] for x in results] # --- @@ -739,9 +712,7 @@ def get_pageassessments(self, titles): # --- return results - def get_revisions( - self, title, rvprop="comment|timestamp|user|content|ids", options=None - ): + def get_revisions(self, title, rvprop="comment|timestamp|user|content|ids", options=None): # --- params = { "action": "query", @@ -824,9 +795,7 @@ def querypage_list(self, qppage="Wantedcategories", qplimit=None, Max=None): if qppage not in qppage_values: logger.info(f"<> qppage {qppage} not in qppage_values.") # --- - results = self.post_continue( - params, "query", _p_="querypage", p_empty=[], Max=Max - ) + results = self.post_continue(params, "query", _p_="querypage", p_empty=[], Max=Max) # --- logger.debug(f" len(results) = {len(results)}") # --- @@ -834,9 +803,7 @@ def querypage_list(self, qppage="Wantedcategories", qplimit=None, Max=None): def Get_template_pages(self, title, namespace="*", Max=10000): # --- - logger.debug( - f'Get_template_pages for template:"{title}", limit:"{Max}",namespace:"{namespace}"' - ) + logger.debug(f'Get_template_pages for template:"{title}", limit:"{Max}",namespace:"{namespace}"') # --- params = { "action": "query", @@ -933,9 +900,7 @@ def pageswithprop(self, pwppropname="unlinkedwikibase_id", pwplimit=None, Max=No if pwppropname != "": params["pwppropname"] = pwppropname # --- - results = self.post_continue( - params, "query", _p_="pageswithprop", p_empty=[], Max=Max - ) + results = self.post_continue(params, "query", _p_="pageswithprop", p_empty=[], Max=Max) # --- logger.debug(f" len(results) = {len(results)}") # --- @@ -1007,7 +972,7 @@ def users_infos(self, ususers=[]): "ususers": "Mr.Ibrahembot", } # --- - all_usprops = [ + _all_usprops = [ "groups", "implicitgroups", "cancreate", diff --git a/mw_api/super/S_Category/bot.py b/mw_api/super/S_Category/bot.py index 5208a5c..2456c1c 100644 --- a/mw_api/super/S_Category/bot.py +++ b/mw_api/super/S_Category/bot.py @@ -10,16 +10,6 @@ import tqdm -from ...core.namespace import NamespaceRegistry - -# Use NamespaceRegistry for localized namespace names -_ar_registry = NamespaceRegistry("ar") - -# Legacy ns_list for backward compatibility -ns_list: Dict[str, str] = { - str(ns_id): ns.local_name for ns_id, ns in _ar_registry.get_all_namespaces().items() -} - logger = logging.getLogger(__name__) @@ -30,7 +20,12 @@ class CategoryDepth: Provides methods for recursively querying category members. """ - def __init__(self, login_bot: Any, title: str, **kwargs: Any) -> None: + def __init__( + self, + login_bot: Any, + title: str, + **kwargs: Any, + ) -> None: # --- self.login_bot = login_bot # --- @@ -232,10 +227,7 @@ def pages_table_work(self, table, pages): # --- tablese["templates"] = list(set(tablese["templates"])) # --- - langlinks = { - fo["lang"]: fo.get("title") or fo.get("*") or "" - for fo in caca.get("langlinks", []) - } + langlinks = {fo["lang"]: fo.get("title") or fo.get("*") or "" for fo in caca.get("langlinks", [])} # --- if langlinks: if tablese.get("langlinks"): @@ -291,9 +283,7 @@ def get_cat_new(self, cac): d += 1 # --- if self.limit > 0 and len(results) >= self.limit: - logger.info( - f"<> limit:{self.limit} reached, len of results: {len(results)} break .." - ) + logger.info(f"<> limit:{self.limit} reached, len of results: {len(results)} break ..") break # --- if continue_params: @@ -359,9 +349,7 @@ def subcatquery_(self, **kwargs): new_tab2 = [] # --- if self.limit > 0 and len(self.result_table) >= self.limit: - logger.info( - f"<> limit:{self.limit} reached, len of results: {len(self.result_table)} break .." - ) + logger.info(f"<> limit:{self.limit} reached, len of results: {len(self.result_table)} break ..") break # --- depth_done += 1 @@ -388,6 +376,6 @@ def subcatquery_(self, **kwargs): key=lambda item: self.timestamps.get(item[0], 0), reverse=True, ) - self.result_table = {k: v for k, v in soro} + self.result_table = dict(soro) # --- return self.result_table diff --git a/mw_api/super/S_Category/catdepth_new.py b/mw_api/super/S_Category/catdepth_new.py index f1c4878..02ffe82 100644 --- a/mw_api/super/S_Category/catdepth_new.py +++ b/mw_api/super/S_Category/catdepth_new.py @@ -1,6 +1,4 @@ -""" - -""" +""" """ import logging import time diff --git a/mw_api/super/S_Page/ar_err.py b/mw_api/super/S_Page/ar_err.py index c4f5d82..2c680c3 100644 --- a/mw_api/super/S_Page/ar_err.py +++ b/mw_api/super/S_Page/ar_err.py @@ -40,8 +40,3 @@ def test_find_edit_error(): print(f"Test case 3: Result = {result}") print("All test cases pass!") - - -if __name__ == "__main__": - # Run the test - test_find_edit_error() diff --git a/mw_api/super/S_Page/data.py b/mw_api/super/S_Page/data.py index 4477781..88950d8 100644 --- a/mw_api/super/S_Page/data.py +++ b/mw_api/super/S_Page/data.py @@ -8,6 +8,7 @@ from dataclasses import dataclass, field + @dataclass class Content: text_html: str = "" @@ -15,6 +16,7 @@ class Content: words: int = 0 length: int = 0 + @dataclass class Meta: is_Disambig: bool = False @@ -29,6 +31,7 @@ class Meta: flagged: str = "" wikibase_item: str = "" + @dataclass class RevisionsData: revid: str = "" @@ -38,6 +41,7 @@ class RevisionsData: revisions: list = field(default_factory=list) touched: str = "" + @dataclass class LinksData: back_links: list = field(default_factory=list) @@ -47,12 +51,14 @@ class LinksData: links: list = field(default_factory=list) links2: list = field(default_factory=list) + @dataclass class CategoriesData: categories: dict = field(default_factory=dict) hidden_categories: dict = field(default_factory=dict) all_categories_with_hidden: dict = field(default_factory=dict) + @dataclass class TemplateData: templates: dict = field(default_factory=dict) diff --git a/mw_api/super/S_Page/super_page.py b/mw_api/super/S_Page/super_page.py index a113c31..f3a7ee4 100644 --- a/mw_api/super/S_Page/super_page.py +++ b/mw_api/super/S_Page/super_page.py @@ -1,12 +1,11 @@ """ """ -# import os import logging from typing import Any, Dict, Optional, Union import wikitextparser as wtp -from ...api_utils import botEdit, printe, txtlib +from ...api_utils import botEdit, txtlib from ...api_utils.ask_bot import ASK_BOT from ...api_utils.lang_codes import change_codes from ...core.config import BotConfig, get_default_config @@ -109,7 +108,7 @@ def false_edit(self) -> bool: if len(self.newtext) < 0.1 * len(self.text): text_err = f"Edit will remove 90% of the text. {len(self.newtext)} < 0.1 * {len(self.text)}" text_err += f"title: {self.title}, summary: {self.content.summary}" - logger.warning("", text=text_err) + logger.exception(text_err) return True # --- if self.lang == "ar" and self.ns == 0: @@ -230,9 +229,7 @@ def get_text(self, redirects=False): # title = v["title"] # --- pageprops = v.get("pageprops", {}) - self.meta.wikibase_item = ( - pageprops.get("wikibase_item") or self.meta.wikibase_item - ) + self.meta.wikibase_item = pageprops.get("wikibase_item") or self.meta.wikibase_item # --- # "flagged": { "stable_revid": 61366100, "level": 0, "level_text": "stable"} self.meta.flagged = v.get("flagged", False) is not False @@ -243,13 +240,9 @@ def get_text(self, redirects=False): # --- self.text = page_data.get("slots", {}).get("main", {}).get("*", "") self.user = page_data.get("user") or self.user - self.revisions_data.revid = ( - page_data.get("revid") or self.revisions_data.revid - ) + self.revisions_data.revid = page_data.get("revid") or self.revisions_data.revid # --- - self.revisions_data.timestamp = ( - page_data.get("timestamp") or self.revisions_data.timestamp - ) + self.revisions_data.timestamp = page_data.get("timestamp") or self.revisions_data.timestamp # --- if "parentid" in page_data and page_data["parentid"] == 0: self.meta.create_data = { @@ -327,32 +320,27 @@ def get_infos(self): if "sortkey" in cat: del cat["sortkey"] # --- - tit = cat["title"] + category_title = cat["title"] # --- - self.categories_data.all_categories_with_hidden[tit] = cat + self.categories_data.all_categories_with_hidden[category_title] = cat # --- if cat.get("hidden") is True: - self.categories_data.hidden_categories[tit] = cat + self.categories_data.hidden_categories[category_title] = cat else: del cat["hidden"] - self.categories_data.categories[tit] = cat + self.categories_data.categories[category_title] = cat # --- if ta.get("langlinks", []) != []: # --- # {"lang": "ca", "*": "UCI World Tour 2023"} or {'lang': 'bh', 'title': 'टेम्पलेट:AWB'} # --- - self.langlinks = { - ta["lang"]: ta.get("*") or ta.get("title") - for ta in ta.get("langlinks", []) - } + self.langlinks = {ta["lang"]: ta.get("*") or ta.get("title") for ta in ta.get("langlinks", [])} # --- if ta.get("templates", []) != []: # --- # 'templates': [{'ns': 10, 'title': 'قالب:No redirect'}], # --- - self.template_data.templates_API = [ - ta["title"] for ta in ta.get("templates", []) - ] + self.template_data.templates_API = [ta["title"] for ta in ta.get("templates", [])] # --- # "linkshere": [{"pageid": 189150,"ns": 0,"title": "طواف فرنسا"}, {"pageid": 308641,"ns": 10,"title": "قالب:AWB","redirect": ""}] self.links_data.links_here = ta.get("linkshere", []) @@ -498,9 +486,7 @@ def isRedirect(self): def isDisambiguation(self): # --- # if the title ends with '(توضيح)' or '(disambiguation)' - self.meta.is_Disambig = self.title.endswith("(توضيح)") or self.title.endswith( - "(disambiguation)" - ) + self.meta.is_Disambig = self.title.endswith("(توضيح)") or self.title.endswith("(disambiguation)") # --- if self.meta.is_Disambig: logger.info(f'<> page "{self.title}" is Disambiguation / توضيح') @@ -520,10 +506,7 @@ def get_categories(self, with_hidden=False): def get_hidden_categories(self): # --- - if ( - self.categories_data.categories == {} - and self.categories_data.hidden_categories == {} - ): + if self.categories_data.categories == {} and self.categories_data.hidden_categories == {}: self.get_infos() # --- return self.categories_data.hidden_categories @@ -654,7 +637,7 @@ def exists(self): if not self.meta.Exists: self.get_text() if not self.meta.Exists: - logger.info(f'page "{self.title}" not in {self.lang}:{self.family}') + logger.info(f'page "{self.title}" not exists in {self.lang}:{self.family}') return self.meta.Exists def namespace(self): @@ -667,7 +650,7 @@ def get_user(self): self.get_text() return self.user - def get_templates(self): + def get_templates(self) -> dict: if not self.text: self.text = self.get_text() self.template_data.templates = txtlib.extract_templates_and_params(self.text) @@ -760,29 +743,17 @@ def save( if result.lower() == "success": self.text = newtext self.user = "" - logger.info( - f"<> ** true .. [[{self.lang}:{self.family}:{self.title}]] " - ) + logger.info(f"<> ** true .. [[{self.lang}:{self.family}:{self.title}]] ") # logger.info('Done True...') # --- if self._config.print_pop: print(pop) # --- - self.revisions_data.pageid = ( - edit.get("pageid") or self.revisions_data.pageid - ) - self.revisions_data.revid = ( - edit.get("newrevid") or self.revisions_data.revid - ) - self.revisions_data.newrevid = ( - edit.get("newrevid") or self.revisions_data.newrevid - ) - self.revisions_data.touched = ( - edit.get("touched") or self.revisions_data.touched - ) - self.revisions_data.timestamp = ( - edit.get("newtimestamp") or self.revisions_data.timestamp - ) + self.revisions_data.pageid = edit.get("pageid") or self.revisions_data.pageid + self.revisions_data.revid = edit.get("newrevid") or self.revisions_data.revid + self.revisions_data.newrevid = edit.get("newrevid") or self.revisions_data.newrevid + self.revisions_data.touched = edit.get("touched") or self.revisions_data.touched + self.revisions_data.timestamp = edit.get("newtimestamp") or self.revisions_data.timestamp # --- return True # --- @@ -806,7 +777,7 @@ def purge(self): data = self.post_params(params, addtoken=True) # --- if not data: - logger.info("<> ** error. ") + logger.info("<> ** purge error. ") return False # --- title2 = self.title @@ -825,11 +796,17 @@ def purge(self): if title2 == ti and "purged" in t: return True if "missing" in t: - logger.info(f'page "{t["title"]}" missing') + logger.info(f'page "{ti}" missing') return "missing" return False - def create(self, text="", summary="", nodiff="", noask=False) -> bool: + def create( + self, + text="", + summary="", + nodiff="", + noask=False, + ) -> bool: # --- """ Creates a new page with the specified text and summary. @@ -894,26 +871,14 @@ def create(self, text="", summary="", nodiff="", noask=False) -> bool: # --- self.text = text # --- - logger.info( - f"<> ** true .. [[{self.lang}:{self.family}:{self.title}]] " - ) + logger.info(f"<> ** true .. [[{self.lang}:{self.family}:{self.title}]] ") # logger.info('Done True... time.sleep() ') # --- - self.revisions_data.pageid = ( - edit.get("pageid") or self.revisions_data.pageid - ) - self.revisions_data.revid = ( - edit.get("newrevid") or self.revisions_data.revid - ) - self.revisions_data.touched = ( - edit.get("touched") or self.revisions_data.touched - ) - self.revisions_data.newrevid = ( - edit.get("newrevid") or self.revisions_data.newrevid - ) - self.revisions_data.timestamp = ( - edit.get("newtimestamp") or self.revisions_data.timestamp - ) + self.revisions_data.pageid = edit.get("pageid") or self.revisions_data.pageid + self.revisions_data.revid = edit.get("newrevid") or self.revisions_data.revid + self.revisions_data.touched = edit.get("touched") or self.revisions_data.touched + self.revisions_data.newrevid = edit.get("newrevid") or self.revisions_data.newrevid + self.revisions_data.timestamp = edit.get("newtimestamp") or self.revisions_data.timestamp # --- return True # --- @@ -925,7 +890,13 @@ def create(self, text="", summary="", nodiff="", noask=False) -> bool: # --- return False - def Create(self, text="", summary="", nodiff="", noask=False) -> bool: + def Create( + self, + text="", + summary="", + nodiff="", + noask=False, + ) -> bool: return self.create(text=text, summary=summary, nodiff=nodiff, noask=noask) def page_backlinks(self, ns=0): @@ -956,7 +927,17 @@ def page_backlinks(self, ns=0): # --- return self.links_data.back_links - def page_links(self): + def page_links(self) -> list: + """ + Get the links on the page. + Return: + list: A list of links on the page, where each link is represented as a dictionary containing its namespace, title, and existence status. + Example of returned data: + [ + {'ns': 14, 'title': 'تصنيف:مقالات بحاجة لشريط بوابات', 'exists': True}, + {'ns': 14, 'title': 'تصنيف:مقالات بحاجة لصندوق معلومات', 'exists': False} + ] + """ params = { "action": "parse", "prop": "links", @@ -966,7 +947,7 @@ def page_links(self): # data = self.post_params(params) # data = data.get('parse', {}).get('links', []) # --- - data = self.post_continue(params, "parse", _p_="links", p_empty=[]) + data: list = self.post_continue(params, "parse", _p_="links", p_empty=[]) # --- # [{'ns': 14, 'title': 'تصنيف:مقالات بحاجة لشريط بوابات', 'exists': True}, {'ns': 14, 'title': 'تصنيف:مقالات بحاجة لصندوق معلومات', 'exists': False}] # --- @@ -1033,3 +1014,9 @@ def get_revisions(self, rvprops=[]): self.revisions_data.revisions = revisions # --- return revisions + + def __getitem__(self, key): + if key == "q": + return self.get_qid() + else: + raise diff --git a/mw_api/super/__init__.py b/mw_api/super/__init__.py index e53d885..655160e 100644 --- a/mw_api/super/__init__.py +++ b/mw_api/super/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ """ from . import super_login diff --git a/mw_api/super/bot.py b/mw_api/super/bot.py index 21f72c4..f0c0b15 100644 --- a/mw_api/super/bot.py +++ b/mw_api/super/bot.py @@ -7,6 +7,7 @@ """ +import functools import logging import os from http.cookiejar import MozillaCookieJar @@ -18,7 +19,6 @@ from ..core.config import BotConfig, get_default_config from ..core.container import SessionManager, get_default_session_manager from .cookies_bot import del_cookies_file, get_file_name - from .params_help import PARAMS_HELPS logger = logging.getLogger(__name__) @@ -26,6 +26,16 @@ botname = "mw_api" +@functools.lru_cache(maxsize=1024) +def get_session(lang, family) -> requests.session: + """ + function args used to load cached sessions + """ + session = requests.session() + session.headers.update({"User-Agent": default_user_agent()}) + return session + + class LOGIN_HELPS(PARAMS_HELPS): """ Helper class for login and session management. @@ -38,30 +48,24 @@ def __init__( session_manager: Optional[SessionManager] = None, config: Optional[BotConfig] = None, ) -> None: - # print("class LOGIN_HELPS:") self.cookie_jar = False - self.session = requests.Session() - # --- - # Get session manager and config - use provided or default - self._session_manager = ( - session_manager - if session_manager is not None - else get_default_session_manager() - ) - self._config = config if config is not None else get_default_config() # --- # check if self has username before writeself.username = "" self.username = getattr(self, "username", "") self.family = getattr(self, "family", "") self.lang = getattr(self, "lang", "") # --- - self.endpoint = getattr( - self, "endpoint", f"https://{self.lang}.{self.family}.org/w/api.php" - ) + self.endpoint = getattr(self, "endpoint", f"https://{self.lang}.{self.family}.org/w/api.php") # --- if self.endpoint == "https://www.mdwiki.org/w/api.php": self.endpoint = "https://mdwiki.org/w/api.php" # --- + self.session = get_session(self.endpoint, self.endpoint) + # --- + # Get session manager and config - use provided or default + self._session_manager = session_manager if session_manager is not None else get_default_session_manager() + self._config = config if config is not None else get_default_config() + # --- self.connection = None # --- self.password = "" @@ -76,7 +80,9 @@ def __init__( super().__init__() def log_error(self, result, action, params=None) -> None: - logger.error(f"Error occurred during {action}: {result}") + good_result = ["200", "success"] + if str(result).lower() not in good_result: + logger.error(f"Error occurred: {result}, Action: {action}, Params: {params}") def add_User_tables(self, family, table, lang="") -> None: # --- @@ -87,13 +93,11 @@ def add_User_tables(self, family, table, lang="") -> None: langx = lang # --- if table["username"].find("bot") == -1 and family == "wikipedia": - print(f"add_User_tables: {family=}, {table['username']=}") + logger.info(f"add_User_tables: {family=}, {table['username']=}") # --- if family != "" and table["username"] != "" and table["password"] != "": # --- - if self.family == family or ( - langx == "ar" and self.family.startswith("wik") - ): # wiktionary + if self.family == family or (langx == "ar" and self.family.startswith("wik")): # wiktionary self.user_table_done = True # --- self.username = table["username"] @@ -130,12 +134,8 @@ def log_in(self) -> bool: Bot_passwords = self.password.find("@") != -1 login_count = self._session_manager.increment_login_count() - logger.info( - f"<<{color}>> {botname}/page.py: Log_to_wiki {self.endpoint} count:{login_count}" - ) - logger.info( - f"{botname}/page.py: log to {self.lang}.{self.family}.org user:{self.username}, ({Bot_passwords=})" - ) + logger.info(f"<<{color}>> {botname}/page.py: Log_to_wiki {self.endpoint} count:{login_count}") + logger.info(f"{botname}/page.py: log to {self.lang}.{self.family}.org user:{self.username}, ({Bot_passwords=})") logintoken = self.get_logintoken() @@ -165,19 +165,15 @@ def get_logintoken(self) -> str: if session is None: logger.error(f"<> No session found for key: {self.sea_key}") return "" - r11 = session.request( - "POST", self.endpoint, data=r1_params, headers=self.headers - ) + r11 = session.request("POST", self.endpoint, data=r1_params, headers=self.headers) # --- self.log_error(r11.status_code, "logintoken") # --- if not str(r11.status_code).startswith("2"): - logger.info( - f"<> {botname} {r11.status_code} Server Error: Server Hangup for url: {self.endpoint}" - ) + logger.info(f"<> {botname} {r11.status_code} Server Error: Server Hangup for url: {self.endpoint}") # --- except Exception as e: - logger.exception('Exception:', exc_info=True) + logger.error(f"Failed to get login token: {str(e)}") return "" jsson1 = {} @@ -185,8 +181,8 @@ def get_logintoken(self) -> str: try: jsson1 = r11.json() except Exception as e: - print(r11.text) - logger.exception('Exception:', exc_info=True) + logger.info(r11.text) + logger.error(f"Failed to get login token: {str(e)}") return "" return jsson1.get("query", {}).get("tokens", {}).get("logintoken") or "" @@ -211,11 +207,9 @@ def get_login_result(self, logintoken) -> bool: if session is None: logger.error(f"<> No session found for key: {self.sea_key}") return False - req = session.request( - "POST", self.endpoint, data=r2_params, headers=self.headers - ) + req = session.request("POST", self.endpoint, data=r2_params, headers=self.headers) except Exception as e: - logger.exception('Exception:', exc_info=True) + logger.error(f"Failed to get login token: {str(e)}") return False # --- r22 = {} @@ -224,8 +218,8 @@ def get_login_result(self, logintoken) -> bool: try: r22 = req.json() except Exception as e: - logger.exception('Exception:', exc_info=True) - print(req.text) + logger.error(f"Failed to get login token: {str(e)}") + logger.info(req.text) return False # --- login_result = r22.get("login", {}).get("result", "") @@ -240,7 +234,7 @@ def get_login_result(self, logintoken) -> bool: # --- reason = r22.get("login", {}).get("reason", "") # --- - # logger.warning(r22) + # logger.exception(r22) # --- if reason == "Incorrect username or password entered. Please try again.": logger.info(f"user:{self.username}, pass:******") @@ -266,11 +260,9 @@ def loged_in(self) -> bool: logger.error(f"<> No session found for key: {self.sea_key}") self.log_error("failed", "userinfo") return False - req = session.request( - "POST", self.endpoint, data=params, headers=self.headers - ) + req = session.request("POST", self.endpoint, data=params, headers=self.headers) except Exception as e: - logger.exception('Exception:', exc_info=True) + logger.error(f"Failed to get login token: {str(e)}") self.log_error("failed", "userinfo") return False # --- @@ -279,8 +271,8 @@ def loged_in(self) -> bool: try: json1 = req.json() except Exception as e: - logger.exception('Exception:', exc_info=True) - print(req.text) + logger.error(f"Failed to get login token: {str(e)}") + logger.info(req.text) return False # --- userinfo = json1.get("query", {}).get("userinfo", {}) @@ -289,7 +281,7 @@ def loged_in(self) -> bool: # --- self.log_error(result_x, "userinfo") # --- - # print(json1) + # logger.info(json1) # --- if "anon" in userinfo or "temp" in userinfo: return False @@ -303,7 +295,8 @@ def make_new_session(self) -> None: # --- logger.info(f":({self.lang}, {self.family}, {self.username})") # --- - new_session = requests.Session() + new_session = get_session(self.lang, self.family) + # --- self._session_manager.set_session(self.sea_key, new_session) # --- self.cookies_file = get_file_name(self.lang, self.family, self.username) @@ -311,13 +304,13 @@ def make_new_session(self) -> None: self.cookie_jar = MozillaCookieJar(self.cookies_file) # --- if os.path.exists(self.cookies_file) and self.family != "mdwiki": - print("Load cookies from file, including session cookies") + logger.info("Load cookies from file, including session cookies") try: self.cookie_jar.load(ignore_discard=True, ignore_expires=True) - print("We have %d cookies" % len(self.cookie_jar)) + logger.info(f"We have {len(self.cookie_jar)} cookies") # --- except Exception as e: - print(e) + logger.error(f"Failed to load cookies file: {str(e)}") # --- session = self._session_manager.get_session(self.sea_key) if session: @@ -328,9 +321,7 @@ def make_new_session(self) -> None: if len(self.cookie_jar) > 0: if self.loged_in(): loged_t = True - logger.info( - f"<>Cookie Already logged in with user:{self.username_in}" - ) + logger.info(f"<>Cookie Already logged in with user:{self.username_in}") else: loged_t = self.log_in() # --- @@ -395,7 +386,7 @@ def raw_request(self, params, files=None, timeout=30): except Exception as e: self.log_error("Exception", u_action, params=params) - logger.exception('Exception:', exc_info=True) + logger.error(f"Failed to get login token: {str(e)}") # --- self._handle_server_error(req0, u_action, params=params) # --- @@ -423,7 +414,7 @@ def post_it(self, params, files=None, timeout=30): # --- if req0.headers and req0.headers.get("x-database-lag"): logger.info("<> x-database-lag.. ") - print(req0.headers) + logger.info(req0.headers) # raise # --- return req0 @@ -446,11 +437,14 @@ def post_it_parse_data(self, params, files=None, timeout=30, relogin=False) -> d # --- if code == "assertnameduserfailed": # --- - print("assertnameduserfailed" * 10) + logger.info("assertnameduserfailed" * 10) # --- del_cookies_file(self.cookies_file) # --- self.username_in = "" + # --- + get_session.cache_clear() + # --- self.make_new_session() # --- return self.post_it_parse_data(params, files, timeout, relogin=True) @@ -459,7 +453,7 @@ def post_it_parse_data(self, params, files=None, timeout=30, relogin=False) -> d def get_rest_result(self, url) -> dict: # --- - print("get_rest_result:") + logger.info("get_rest_result:") # --- if not self._session_manager.has_session(self.sea_key): self.make_new_session() @@ -477,7 +471,7 @@ def get_rest_result(self, url) -> dict: ) # --- except Exception as e: - logger.exception('Exception:', exc_info=True) + logger.error(f"Failed to request REST API: {str(e)}") return {} # --- result = {} @@ -485,8 +479,8 @@ def get_rest_result(self, url) -> dict: try: result = req0.json() except Exception as e: - print(req0.text) - logger.exception('Exception:', exc_info=True) + logger.info(req0.text) + logger.error(f"Failed to get login token: {str(e)}") return {} # --- return result diff --git a/mw_api/super/bot_new.py b/mw_api/super/bot_new.py index a61905e..6e67f22 100644 --- a/mw_api/super/bot_new.py +++ b/mw_api/super/bot_new.py @@ -7,8 +7,10 @@ """ import copy +import functools import logging import os +import sys from http.cookiejar import MozillaCookieJar import requests @@ -24,6 +26,16 @@ logins_count = {1: 0} +@functools.lru_cache(maxsize=1024) +def get_session(lang, family) -> requests.session: + """ + function args used to load cached sessions + """ + session = requests.session() + session.headers.update({"User-Agent": default_user_agent()}) + return session + + class MwClientSite: def __init__(self, lang, family, config: BotConfig = None): self.lang = lang @@ -44,7 +56,9 @@ def __init__(self, lang, family, config: BotConfig = None): # self._start_() def log_error(self, result, action, params=None) -> None: - logger.error(f"Error occurred during {action}: {result}") + good_result = ["200", "success"] + if str(result).lower() not in good_result: + logger.error(f"Error occurred: {result}, Action: {action}, Params: {params}") def _start_(self, username, password): self.username = username @@ -59,18 +73,14 @@ def __initialize_connection(self): self.jar_cookie = MozillaCookieJar(cookies_file) - self.connection = requests.Session() - - self.connection.headers["User-Agent"] = default_user_agent() + self.connection = get_session(self.lang, self.family) # Get a cached session for the given lang and family. # --- if os.path.exists(cookies_file) and self.family != "mdwiki": # logger.info("<>loading cookies") try: # Load cookies from file, including session cookies self.jar_cookie.load(ignore_discard=True, ignore_expires=True) - self.connection.cookies = ( - self.jar_cookie - ) # Tell Requests session to use the cookiejar. + self.connection.cookies = self.jar_cookie # Tell Requests session to use the cookiejar. except Exception as e: logger.info("Could not load cookies: %s" % e) @@ -107,14 +117,10 @@ def do_login(self): if not self.site_mwclient.logged_in: logins_count[1] += 1 - logger.info( - f"<>logging in to ({self.domain}) count:{logins_count[1]}, user: {self.username}" - ) + logger.info(f"<>logging in to ({self.domain}) count:{logins_count[1]}, user: {self.username}") # --- try: - login_result = self.site_mwclient.login( - username=self.username, password=self.password - ) + login_result = self.site_mwclient.login(username=self.username, password=self.password) self.log_error(login_result, "login") self.login_done = True @@ -123,9 +129,7 @@ def do_login(self): logger.info(f"Could not login to ({self.domain}): %s" % e) if self.site_mwclient.logged_in: - logger.info( - f"<>logged in as {self.site_mwclient.username} to ({self.domain})" - ) + logger.info(f"<>logged in as {self.site_mwclient.username} to ({self.domain})") # Save cookies to file, including session cookies if self.jar_cookie: @@ -165,7 +169,7 @@ def do_request(self, params=None, method="POST"): if "text" in params: params["text"] = params["text"][:100] # --- - logger.warning(e, text=params) + logger.exception(f"params: {str(params)}") # --- return {} @@ -206,9 +210,7 @@ def add_User_tables(self, family, table, lang="") -> None: # --- if family != "" and table["username"] != "" and table["password"] != "": # --- - if self.family == family or ( - langx == "ar" and self.family.startswith("wik") - ): # wiktionary + if self.family == family or (langx == "ar" and self.family.startswith("wik")): # wiktionary self.user_table_done = True # --- self.username = table["username"] @@ -267,6 +269,8 @@ def post_it_parse_data(self, params, files=None, timeout=30, relogin=False) -> d code = error.get("code", "") # --- if code == "assertnameduserfailed": + # --- + get_session.cache_clear() # --- del_cookies_file(self.cookies_file) # --- @@ -288,7 +292,7 @@ def get_rest_result(self, url) -> dict: result = req0.json() except Exception as e: - logger.exception('Exception:', exc_info=True) + logger.exception("Exception:", exc_info=True) # --- return result diff --git a/mw_api/super/cookies_bot.py b/mw_api/super/cookies_bot.py index 8d6733d..51cf544 100644 --- a/mw_api/super/cookies_bot.py +++ b/mw_api/super/cookies_bot.py @@ -1,37 +1,42 @@ """ -from .super.cookies_bot import get_cookies # cookies = get_cookies(lang, family, username) """ +import functools import logging import os import stat +import sys from datetime import datetime, timedelta -from functools import lru_cache from pathlib import Path from ..core.config import get_default_config logger = logging.getLogger(__name__) - statgroup = stat.S_IRWXU | stat.S_IRWXG -tool = os.getenv("HOME") -# --- -if not tool: - tool = Path(__file__).parent -else: - tool = Path(tool) -# --- -ta_dir = tool / "cookies" -# --- -if not ta_dir.exists(): - ta_dir.mkdir() - logger.info("<> mkdir:") - logger.info(f"ta_dir:{ta_dir}") - logger.info("<> mkdir:") - os.chmod(ta_dir, statgroup) + + +@functools.lru_cache(maxsize=1) +def get_ta_dir() -> Path: + tool = os.getenv("HOME") + + if not tool: + tool = Path(__file__).parent + else: + tool = Path(tool) + + ta_dir = tool / "cookies" + + if not ta_dir.exists(): + ta_dir.mkdir() + logger.info("<> mkdir:") + logger.info(f"ta_dir:{ta_dir}") + logger.info("<> mkdir:") + os.chmod(ta_dir, statgroup) + + return ta_dir def del_cookies_file(file_path): @@ -47,7 +52,9 @@ def del_cookies_file(file_path): def get_file_name(lang, family, username) -> Path: - # --- + + ta_dir = get_ta_dir() + config = get_default_config() if config.no_cookies: randome = os.urandom(8).hex() @@ -70,7 +77,7 @@ def get_file_name(lang, family, username) -> Path: del_cookies_file(file) elif datetime.now() - file_time > timedelta(days=3): del_cookies_file(file) - # --- + # --- return file @@ -102,15 +109,13 @@ def from_folder(lang, family, username): return cookies -@lru_cache(maxsize=128) +@functools.lru_cache(maxsize=128) def get_cookies(lang, family, username): # --- cookies = from_folder(lang, family, username) # --- if not cookies: - logger.info( - f" <> get_cookies: <> [[{lang}:{family}]] user:{username} <> not found" - ) + logger.info(f" <> get_cookies: <> [[{lang}:{family}]] user:{username} <> not found") return "make_new" # --- return cookies diff --git a/mw_api/super/handel_errors.py b/mw_api/super/handel_errors.py index e0d2b06..4033533 100644 --- a/mw_api/super/handel_errors.py +++ b/mw_api/super/handel_errors.py @@ -4,6 +4,7 @@ """ import logging +import sys from typing import Any, Dict, Optional, Union from ..core.config import BotConfig, get_default_config @@ -122,9 +123,7 @@ def handel_err( # Generic error handling if do_error: safe_params = {k: v for k, v in params.items() if k not in ("data", "text")} - logger.error( - f"<>{function} ERROR: <>info: {err_info}, params={safe_params}" - ) + logger.error(f"<>{function} ERROR: <>info: {err_info}, params={safe_params}") if self._config.raise_on_error: raise api_error diff --git a/mw_api/super/login_wrap.py b/mw_api/super/login_wrap.py deleted file mode 100644 index 41837f1..0000000 --- a/mw_api/super/login_wrap.py +++ /dev/null @@ -1,125 +0,0 @@ -""" - -from mw_api import LoginWrap - -from .super.login_wrap import LoginWrap - -# login_bot, catbots_login2 = LoginWrap(sitecode, family, bots_login_cache, User_tables) -# bots_login_cache.update(catbots_login2) - -""" - -import logging -from typing import Any, Dict, Optional, Tuple - -from ..core.config import BotConfig -from .super_login import Login - -logger = logging.getLogger(__name__) - - -class LoginWrapState: - """ - Encapsulates the state for LoginWrap function. - - Replaces global `hases` dictionary with an instance-based state management. - """ - - def __init__(self) -> None: - self._access_counts: Dict[Tuple, int] = {} - - def get_access_count(self, cache_key: Tuple) -> int: - """Get the access count for a cache key.""" - return self._access_counts.get(cache_key, 0) - - def increment_access_count(self, cache_key: Tuple) -> int: - """Increment and return the access count for a cache key.""" - self._access_counts.setdefault(cache_key, 0) - self._access_counts[cache_key] += 1 - return self._access_counts[cache_key] - - def clear(self) -> None: - """Clear all access counts.""" - self._access_counts.clear() - - -# Default state instance - can be replaced for testing -_default_state: Optional[LoginWrapState] = None - - -def get_default_state() -> LoginWrapState: - """Get the default LoginWrapState instance.""" - global _default_state - if _default_state is None: - _default_state = LoginWrapState() - return _default_state - - -def set_default_state(state: LoginWrapState) -> None: - """Set the default LoginWrapState instance.""" - global _default_state - _default_state = state - - -def reset_default_state() -> None: - """Reset the default state to None.""" - global _default_state - _default_state = None - - -def LoginWrap( - sitecode: str, - family: str, - bots_login_cache: Dict[Tuple, Login], - User_tables: Dict[str, Any], - config: Optional[BotConfig] = None, - state: Optional[LoginWrapState] = None, -) -> Tuple[Login, Dict[Tuple, Login]]: - """ - Get or create a Login instance for the given site and credentials. - - Args: - sitecode: The site code (language code). - family: The wiki family (e.g., 'wikipedia', 'wikidata'). - bots_login_cache: Cache of existing Login instances. - User_tables: User credentials dictionary with 'username' and 'password'. - config: Optional BotConfig instance for the Login. - state: Optional LoginWrapState for tracking access counts. - - Returns: - Tuple of (Login instance, updated bots_login_cache). - """ - if state is None: - state = get_default_state() - - # --- - cache_key: Tuple = (sitecode, family) # Consider adding relevant kwargs to key - # --- - username = User_tables.get("username") - # --- - if username: - cache_key = (sitecode, family, username) - # --- - if bots_login_cache.get(cache_key): - login_bot = bots_login_cache[cache_key] - # --- - access_count = state.increment_access_count(cache_key) - # --- - if access_count % 100 == 0: - logger.info( - f"### <> LoginWrap has bot for ({sitecode}.{family}.org|{username}) count: {access_count}" - ) - else: - login_bot = Login(sitecode, family=family, config=config) - # --- - logger.info( - f"### <> LoginWrap make new bot for ({sitecode}.{family}.org|{username})" - ) - # --- - login_bot.add_users({family: User_tables}, lang=sitecode) - # --- - bots_login_cache[cache_key] = login_bot - # --- - state.increment_access_count(cache_key) - # --- - return login_bot, bots_login_cache diff --git a/mw_api/super/login_wrap_new.py b/mw_api/super/login_wrap_new.py deleted file mode 100644 index 252af62..0000000 --- a/mw_api/super/login_wrap_new.py +++ /dev/null @@ -1,59 +0,0 @@ -""" - -from mw_api import LoginWrap - -from .super.login_wrap import LoginWrap - -# login_bot = LoginWrap(sitecode, family, User_tables) - -""" - -import logging -from functools import lru_cache - -from .super_login import Login - -logger = logging.getLogger(__name__) - - -@lru_cache(maxsize=128) -def _create_login_bot(sitecode, family, username, password): - """ - Create and cache a login bot instance. - - Note: The log message inside this function will only appear on cache misses - (i.e., when a new bot is created). This is intentional - on cache hits, - the same bot instance is returned without logging. - """ - logger.info( - f"### <> LoginWrap make new bot for ({sitecode}.{family}.org|{username})" - ) - # --- - login_bot = Login(sitecode, family=family) - # --- - User_tables = {"username": username, "password": password} - login_bot.add_users({family: User_tables}, lang=sitecode) - # --- - return login_bot - - -def LoginWrap(sitecode, family, bots_login_cache, User_tables): - """ - Get or create a cached login bot. - - Note: bots_login_cache parameter is kept for backward compatibility but is no longer used. - The caching is now handled by functools.lru_cache. - """ - username = User_tables.get("username", "") - password = User_tables.get("password", "") - # --- - login_bot = _create_login_bot(sitecode, family, username, password) - # --- - cache_info = _create_login_bot.cache_info() - if cache_info.hits > 0 and cache_info.hits % 100 == 0: - logger.info( - f"### <> LoginWrap has bot for ({sitecode}.{family}.org|{username}) count: {cache_info.hits}" - ) - # --- - # Return bots_login_cache for backward compatibility - return login_bot, bots_login_cache diff --git a/mw_api/super/params_help.py b/mw_api/super/params_help.py index a5a9741..c18cc4f 100644 --- a/mw_api/super/params_help.py +++ b/mw_api/super/params_help.py @@ -66,7 +66,7 @@ def parse_data(self, req0) -> dict: return data except Exception as e: - logger.warn(e) + logger.exception(e) text = str(req0.text).strip() valid_text = text.startswith("{") and text.endswith("}") @@ -78,6 +78,7 @@ def parse_data(self, req0) -> dict: data = json.loads(text) return data except Exception as e: - logger.warn(e, self.url_o_print) + logger.exception(e) + logger.exception(self.url_o_print) return {} diff --git a/mw_api/super/super_login.py b/mw_api/super/super_login.py index 6d435e5..17b8b9f 100644 --- a/mw_api/super/super_login.py +++ b/mw_api/super/super_login.py @@ -51,7 +51,10 @@ class Login(DEFAULT_LOGIN_HELPS, HANDEL_ERRORS): """ def __init__( - self, lang: str, family: str = "wikipedia", config: Optional[BotConfig] = None + self, + lang: str, + family: str = "wikipedia", + config: Optional[BotConfig] = None, ): # print(f"class Login:{lang=}") # --- @@ -91,20 +94,12 @@ def p_url(self, params) -> None: no_remove = ["titles", "title"] # --- pams2 = { - k: ( - v[:100] - if isinstance(v, str) and len(v) > 100 and k not in no_remove - else v - ) + k: (v[:100] if isinstance(v, str) and len(v) > 100 and k not in no_remove else v) for k, v in params.items() if k not in no_url } # --- - self.url_o_print = ( - f"{self.endpoint}?{urllib.parse.urlencode(pams2)}".replace( - "&format=json", "" - ) - ) + self.url_o_print = f"{self.endpoint}?{urllib.parse.urlencode(pams2)}".replace("&format=json", "") # --- if self.url_o_print not in urls_prints: urls_prints[self.url_o_print] = 0 @@ -112,9 +107,7 @@ def p_url(self, params) -> None: urls_prints[self.url_o_print] += 1 urls_prints["all"] += 1 # --- - logger.info( - f"c: {urls_prints[self.url_o_print]}/{urls_prints['all']}\t {self.url_o_print}" - ) + logger.info(f"c: {urls_prints[self.url_o_print]}/{urls_prints['all']}\t {self.url_o_print}") def make_response(self, params, files=None, timeout=30, do_error=True): """ @@ -167,9 +160,7 @@ def filter_params(self, params) -> dict: return params def post(self, params, Type="get", addtoken=False, CSRF=True, files=None): - return self.post_params( - params, Type=Type, addtoken=addtoken, GET_CSRF=CSRF, files=files - ) + return self.post_params(params, Type=Type, addtoken=addtoken, GET_CSRF=CSRF, files=files) def post_params( self, @@ -202,11 +193,7 @@ def post_params( # --- action = params["action"] # --- - to_add_action = ( - action in wb_actions - or action.startswith("wbcreate") - or action.startswith("wbset") - ) + to_add_action = action in wb_actions or action.startswith(("wbcreate", "wbset")) # --- if self.family == "wikidata" and to_add_action: params["maxlag"] = ar_lag[1] @@ -216,7 +203,7 @@ def post_params( self.r3_token = self.make_new_r3_token() if not self.r3_token: - logger.warning("<> super_login(post): no r3_token. return {}.") + logger.warning('<> self.r3_token == "" ') params["token"] = self.r3_token @@ -241,16 +228,12 @@ def post_params( logger.error(f"<> super_login(post): error: {error}") # --- if Invalid == "Invalid CSRF token.": - logger.info( - f'<> ** error "Invalid CSRF token.".\n{self.r3_token} ' - ) + logger.info(f'<> ** error "Invalid CSRF token.".\n{self.r3_token} ') if GET_CSRF: # --- self.r3_token = self.make_new_r3_token() # --- - return self.post_params( - params, Type=Type, addtoken=addtoken, GET_CSRF=False - ) + return self.post_params(params, Type=Type, addtoken=addtoken, GET_CSRF=False) # --- error_code = error.get("code", "") # --- @@ -259,9 +242,7 @@ def post_params( # --- logger.debug(params) # --- - logger.info( - f"<>post_params: <> {lage=} {max_retry=}, sleep: {lage + 1}" - ) + logger.info(f"<>post_params: <> {lage=} {max_retry=}, sleep: {lage + 1}") # --- time.sleep(lage + 1) # --- @@ -269,9 +250,7 @@ def post_params( # --- params["maxlag"] = ar_lag[1] # --- - return self.post_params( - params, Type=Type, addtoken=addtoken, max_retry=max_retry + 1 - ) + return self.post_params(params, Type=Type, addtoken=addtoken, max_retry=max_retry + 1) # --- if self.config.print_data: logger.info(data) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..cb836ea --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,147 @@ + +# ============================================ +# BLACK +# ============================================ +[tool.black] +line-length = 120 +target-version = ["py313"] +include = '\.pyi?$' +exclude = ''' + /( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + )/ +''' + +# ============================================ +# ISORT +# ============================================ +[tool.isort] +profile = "black" +line_length = 120 + +# Keep vertical formatting (multi-line import style) +multi_line_output = 3 +use_parentheses = true +force_grid_wrap = 0 +include_trailing_comma = true + +# OPTIONAL: You can add this to ensure isort does NOT collapse into one line +ensure_newline_before_comments = true + +# Project paths +src_paths = "ArWikiCats" +known_first_party = "ArWikiCats" +skip = [ + ".env", + "env", + ".venv", + "venv", + "build", + "dist", + "__pycache__", + ".mypy_cache", + ".pytest_cache", + ".git", +] + +# ============================================ +# RUFF +# ============================================ +[tool.ruff] +exclude = [ + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "old", + "build", + "dist", + "site-packages", + "venv", +] +line-length = 120 +target-version = "py313" +fix = true + +[tool.ruff.format] +# Allow Ruff/Black to maintain vertical list formatting +skip-magic-trailing-comma = false + +# Keep Black-like behavior +quote-style = "double" +indent-style = "space" +line-ending = "auto" + +# Format code blocks inside docstrings (excellent for README-like docstrings) +docstring-code-format = true +docstring-code-line-length = 120 + +[tool.ruff.lint] +# "E402", "E225", "E226", "E227", "E228", "E252", "F841", "E224", "E203", "F401", +ignore = ["E501", "UP035", "UP006", "N806", "N999", "UP015", "I001", "N802", "UP007", "UP045"] +select = [ + "E", # pycodestyle (error) + "F", # pyflakes + "W", # pycodestyle (warning) + "B", # flake8-bugbear + "I", # isort + "N", # pep8-naming + "PIE", # flake8-pie + "PLE", # pylint error + "RUF100", # Unused noqa comments + "PGH004", # blanket noqa comments + "UP", # pyupgrade + "C4", # flake8-comprehensions + "SIM101", # merge duplicate isinstance calls + "SIM201", "SIM202", "SIM222", "SIM223", # flake8-simplify + "FURB168", # Prefer is operator over isinstance for None checks + "FURB169", # Do not use is comparison with type(None). Use None + "FURB187", # avoid list reverse copy + "FURB188", # use str.remove(pre|suf)fix + "ISC001", # implicitly concatenated string + "RET501", "RET502", # better return None handling +] +# ============================================ +# Flynt +# ============================================ +[tool.flynt] +# Keep it aligned with Black/Ruff +line_length = 120 + +[tool.mypy] +python_version = "3.13" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = false +ignore_missing_imports = true +line-length = 120 + +[tool.pylint.messages_control] +max-line-length = 120 +disable = [ + "C0111", # missing-docstring + "C0103", # invalid-name + "R0913", # too-many-arguments +] diff --git a/pytest.ini b/pytest.ini index ec115b3..49477ee 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,9 +1,28 @@ -[tool:pytest] -testpaths = tests -python_files = test_*.py *Test.py Test*.py +[pytest] +# Pytest configuration file +pythonpath = mw_api +# Test discovery patterns +python_files = test*.py *Test.py Test*.py python_classes = Test* -python_functions = test_* -addopts = -v --tb=short --strict-markers +python_functions = test* + +# Test paths +testpaths = tests + +# Output options +addopts = + -v + --strict-markers + --tb=short + -m "not network" + --durations=10 + ; --cov=mw_api + --cov-report=term-missing + --cov-report=xml + --cov-branch + --maxfail=25 + +# Markers for organizing tests markers = slow: marks tests as slow (deselect with '-m "not slow"') integration: marks tests as integration tests @@ -13,3 +32,20 @@ markers = filterwarnings = ignore::DeprecationWarning ignore::PendingDeprecationWarning + +# Coverage options +[coverage:run] +source = src +omit = + */tests/* + */test_* + */__pycache__/* + */venv/* + +[coverage:report] +precision = 2 +show_missing = True +skip_covered = False + +; python -m cProfile -o profile_slow.prof -m pytest -m slow +; snakeviz profile_slow.prof diff --git a/requirements.in b/requirements.in index 48da4e3..b807fbc 100644 --- a/requirements.in +++ b/requirements.in @@ -7,3 +7,5 @@ mwclient ratelimiter SPARQLWrapper colorlog +python-dotenv +pytest-mock diff --git a/sweep.yaml b/sweep.yaml new file mode 100644 index 0000000..89e1d02 --- /dev/null +++ b/sweep.yaml @@ -0,0 +1,27 @@ +# Sweep AI turns bugs & feature requests into code changes (https://sweep.dev) +# For details on our config file, check out our docs at https://docs.sweep.dev/usage/config + +# This setting contains a list of rules that Sweep will check for. If any of these rules are broken in a new commit, Sweep will create an pull request to fix the broken rule. +rules: + - "All new business logic should have corresponding unit tests." + - "Refactor large functions to be more modular." + - "Add docstrings to all functions and file headers." + +# This is the branch that Sweep will develop from and make pull requests to. Most people use 'main' or 'master' but some users also use 'dev' or 'staging'. +branch: 'main' + +# By default Sweep will read the logs and outputs from your existing Github Actions. To disable this, set this to false. +gha_enabled: True + +# This is the description of your project. It will be used by sweep when creating PRs. You can tell Sweep what's unique about your project, what frameworks you use, or anything else you want. +# +# Example: +# +# description: sweepai/sweep is a python project. The main api endpoints are in sweepai/api.py. Write code that adheres to PEP8. +description: '' + +# This sets whether to create pull requests as drafts. If this is set to True, then all pull requests will be created as drafts and GitHub Actions will not be triggered. +draft: False + +# This is a list of directories that Sweep will not be able to edit. +blocked_dirs: [] diff --git a/tests/conftest.py b/tests/conftest.py index cf2994d..73cfb47 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,8 +5,43 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +from unittest.mock import MagicMock + + +@pytest.fixture(autouse=True) +def mock_get_session(mocker): + """ + Directly mocks get_session to return a controlled session object + for all tests automatically. + """ + # 1. Create a mock session object + mock_session = MagicMock() + + # 2. Create a mock response object + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"status": "success"} + + # 3. Link the session methods to return the mock response + # This covers cases like: session.get(), session.post(), or session.request() + mock_session.get.return_value = mock_response + mock_session.post.return_value = mock_response + mock_session.request.return_value = mock_response + + # 4. Patch the get_session function in the target module + # Note: Replace 'mw_api.super.bot' with the actual import path + mocker.patch("mw_api.super.bot.get_session", return_value=mock_session) + mocker.patch("mw_api.super.bot.get_session", return_value=mock_session) + mocker.patch("mw_api.api.client.get_session", return_value=mock_session) + + return mock_session + @pytest.fixture def temp_test_page(): - """صفحة اختبار مؤقتة""" return "User:TestBot/pytest_sandbox" + + +@pytest.fixture +def user_credentials(): + return {"username": "username", "password": "password"} diff --git a/tests/test_ALL_APIS.py b/tests/test_ALL_APIS.py index d81c975..c02d9b1 100644 --- a/tests/test_ALL_APIS.py +++ b/tests/test_ALL_APIS.py @@ -1,19 +1,26 @@ from unittest.mock import MagicMock, patch import pytest - from mw_api import ALL_APIS +@pytest.fixture(autouse=True) +def clear_login_cache(): + from mw_api.pages_bots import all_apis + + all_apis._login.cache_clear() + yield + all_apis._login.cache_clear() + + @pytest.fixture def mock_dependencies(): - with patch("mw_api.pages_bots.all_apis.Login") as mock_login, patch( - "mw_api.pages_bots.all_apis.super_page.MainPage" - ) as mock_main_page, patch( - "mw_api.pages_bots.all_apis.catdepth_new.subcatquery" - ) as mock_subcatquery, patch( - "mw_api.pages_bots.all_apis.bot_api.NEW_API" - ) as mock_new_api: + with ( + patch("mw_api.pages_bots.all_apis.Login") as mock_login, + patch("mw_api.pages_bots.all_apis.super_page.MainPage") as mock_main_page, + patch("mw_api.pages_bots.all_apis.catdepth_new.subcatquery") as mock_subcatquery, + patch("mw_api.pages_bots.all_apis.bot_api.NEW_API") as mock_new_api, + ): # Setup mock login instance mock_login_instance = MagicMock() mock_login.return_value = mock_login_instance diff --git a/tests/test_LiteDB.py b/tests/test_LiteDB.py index e22bcfd..30ddb49 100644 --- a/tests/test_LiteDB.py +++ b/tests/test_LiteDB.py @@ -2,7 +2,6 @@ import tempfile import pytest - from mw_api.DB_bots.db_bot import LiteDB diff --git a/tests/test_MainPage.py b/tests/test_MainPage.py index 5004e9e..da3f030 100644 --- a/tests/test_MainPage.py +++ b/tests/test_MainPage.py @@ -1,5 +1,4 @@ import pytest - from mw_api import ALL_APIS diff --git a/tests/test_api_modules.py b/tests/test_api_modules.py index 466f902..47dfe63 100644 --- a/tests/test_api_modules.py +++ b/tests/test_api_modules.py @@ -10,7 +10,6 @@ import pytest import requests - from mw_api.api.client import MediaWikiApiClient from mw_api.api.token_manager import TokenManager from mw_api.core.request_config import RequestConfig @@ -58,9 +57,7 @@ def test_init_custom_session(self, mock_session): def test_set_csrf_token(self, mock_session): """Test setting and getting CSRF token.""" - client = MediaWikiApiClient( - "https://en.wikipedia.org/w/api.php", session=mock_session - ) + client = MediaWikiApiClient("https://en.wikipedia.org/w/api.php", session=mock_session) assert client.get_csrf_token() == "" client.set_csrf_token("test_token_123") @@ -71,9 +68,7 @@ def test_post_success(self, mock_session, mock_response): mock_response.json.return_value = {"query": {"pages": {}}} mock_session.post.return_value = mock_response - client = MediaWikiApiClient( - "https://en.wikipedia.org/w/api.php", session=mock_session - ) + client = MediaWikiApiClient("https://en.wikipedia.org/w/api.php", session=mock_session) result = client.post({"action": "query"}) assert result == {"query": {"pages": {}}} @@ -84,9 +79,7 @@ def test_post_with_token(self, mock_session, mock_response): mock_response.json.return_value = {"edit": {"result": "Success"}} mock_session.post.return_value = mock_response - client = MediaWikiApiClient( - "https://en.wikipedia.org/w/api.php", session=mock_session - ) + client = MediaWikiApiClient("https://en.wikipedia.org/w/api.php", session=mock_session) client.set_csrf_token("csrf_token_123") config = RequestConfig(add_token=True) @@ -100,9 +93,7 @@ def test_post_failure_returns_empty(self, mock_session): """Test POST request failure returns empty dict.""" mock_session.post.side_effect = requests.RequestException("Network error") - client = MediaWikiApiClient( - "https://en.wikipedia.org/w/api.php", session=mock_session - ) + client = MediaWikiApiClient("https://en.wikipedia.org/w/api.php", session=mock_session) result = client.post({"action": "query"}) assert result == {} @@ -112,9 +103,7 @@ def test_get_success(self, mock_session, mock_response): mock_response.json.return_value = {"query": {"userinfo": {}}} mock_session.get.return_value = mock_response - client = MediaWikiApiClient( - "https://en.wikipedia.org/w/api.php", session=mock_session - ) + client = MediaWikiApiClient("https://en.wikipedia.org/w/api.php", session=mock_session) result = client.get({"action": "query", "meta": "userinfo"}) assert result == {"query": {"userinfo": {}}} @@ -123,9 +112,7 @@ def test_get_failure_returns_empty(self, mock_session): """Test GET request failure returns empty dict.""" mock_session.get.side_effect = requests.RequestException("Timeout") - client = MediaWikiApiClient( - "https://en.wikipedia.org/w/api.php", session=mock_session - ) + client = MediaWikiApiClient("https://en.wikipedia.org/w/api.php", session=mock_session) result = client.get({"action": "query"}) assert result == {} @@ -135,9 +122,7 @@ def test_request_method_selection(self, mock_session, mock_response): mock_session.post.return_value = mock_response mock_session.get.return_value = mock_response - client = MediaWikiApiClient( - "https://en.wikipedia.org/w/api.php", session=mock_session - ) + client = MediaWikiApiClient("https://en.wikipedia.org/w/api.php", session=mock_session) # POST request config_post = RequestConfig(method="post") @@ -160,23 +145,17 @@ def test_query_with_continuation(self, mock_session): mock_response.json = Mock(side_effect=responses) mock_session.post.return_value = mock_response - client = MediaWikiApiClient( - "https://en.wikipedia.org/w/api.php", session=mock_session - ) + client = MediaWikiApiClient("https://en.wikipedia.org/w/api.php", session=mock_session) results = client.query({"prop": "info"}) assert len(results) == 2 def test_fetch_csrf_token(self, mock_session, mock_response): """Test fetching CSRF token from API.""" - mock_response.json.return_value = { - "query": {"tokens": {"csrftoken": "new_token_456"}} - } + mock_response.json.return_value = {"query": {"tokens": {"csrftoken": "new_token_456"}}} mock_session.post.return_value = mock_response - client = MediaWikiApiClient( - "https://en.wikipedia.org/w/api.php", session=mock_session - ) + client = MediaWikiApiClient("https://en.wikipedia.org/w/api.php", session=mock_session) token = client.fetch_csrf_token() assert token == "new_token_456" @@ -184,14 +163,10 @@ def test_fetch_csrf_token(self, mock_session, mock_response): def test_is_authenticated_true(self, mock_session, mock_response): """Test is_authenticated returns True for logged-in user.""" - mock_response.json.return_value = { - "query": {"userinfo": {"id": 12345, "name": "TestUser"}} - } + mock_response.json.return_value = {"query": {"userinfo": {"id": 12345, "name": "TestUser"}}} mock_session.post.return_value = mock_response - client = MediaWikiApiClient( - "https://en.wikipedia.org/w/api.php", session=mock_session - ) + client = MediaWikiApiClient("https://en.wikipedia.org/w/api.php", session=mock_session) assert client.is_authenticated() is True def test_is_authenticated_false(self, mock_session, mock_response): @@ -199,9 +174,7 @@ def test_is_authenticated_false(self, mock_session, mock_response): mock_response.json.return_value = {"query": {"userinfo": {"anon": True}}} mock_session.post.return_value = mock_response - client = MediaWikiApiClient( - "https://en.wikipedia.org/w/api.php", session=mock_session - ) + client = MediaWikiApiClient("https://en.wikipedia.org/w/api.php", session=mock_session) assert client.is_authenticated() is False @@ -220,9 +193,7 @@ def test_init(self, mock_session): def test_get_csrf_token_fetch(self, mock_session, mock_response): """Test fetching CSRF token.""" - mock_response.json.return_value = { - "query": {"tokens": {"csrftoken": "csrf_123"}} - } + mock_response.json.return_value = {"query": {"tokens": {"csrftoken": "csrf_123"}}} mock_session.post.return_value = mock_response manager = TokenManager("https://en.wikipedia.org/w/api.php", mock_session) @@ -232,9 +203,7 @@ def test_get_csrf_token_fetch(self, mock_session, mock_response): def test_get_csrf_token_cached(self, mock_session, mock_response): """Test CSRF token is cached.""" - mock_response.json.return_value = { - "query": {"tokens": {"csrftoken": "csrf_456"}} - } + mock_response.json.return_value = {"query": {"tokens": {"csrftoken": "csrf_456"}}} mock_session.post.return_value = mock_response manager = TokenManager("https://en.wikipedia.org/w/api.php", mock_session) @@ -267,9 +236,7 @@ def test_get_csrf_token_force_refresh(self, mock_session): def test_get_login_token(self, mock_session, mock_response): """Test fetching login token.""" - mock_response.json.return_value = { - "query": {"tokens": {"logintoken": "login_789"}} - } + mock_response.json.return_value = {"query": {"tokens": {"logintoken": "login_789"}}} mock_session.post.return_value = mock_response manager = TokenManager("https://en.wikipedia.org/w/api.php", mock_session) diff --git a/tests/test_auth_modules.py b/tests/test_auth_modules.py index 6262d0a..9ce66f8 100644 --- a/tests/test_auth_modules.py +++ b/tests/test_auth_modules.py @@ -10,7 +10,6 @@ import pytest import requests - from mw_api.auth.authenticator import Authenticator from mw_api.auth.token_provider import TokenProvider from mw_api.core.exceptions import AuthenticationError @@ -104,9 +103,7 @@ def test_is_authenticated_not_logged_in(self, mock_session): def test_is_authenticated_logged_in(self, mock_session, mock_response): """Test is_authenticated returns True when logged in.""" - mock_response.json.return_value = { - "query": {"userinfo": {"id": 123, "name": "TestUser"}} - } + mock_response.json.return_value = {"query": {"userinfo": {"id": 123, "name": "TestUser"}}} mock_session.post.return_value = mock_response auth = Authenticator("https://en.wikipedia.org/w/api.php", mock_session) @@ -175,9 +172,7 @@ def test_init(self, mock_session): def test_get_csrf_token(self, mock_session, mock_response): """Test getting CSRF token.""" - mock_response.json.return_value = { - "query": {"tokens": {"csrftoken": "csrf_token_123"}} - } + mock_response.json.return_value = {"query": {"tokens": {"csrftoken": "csrf_token_123"}}} mock_session.post.return_value = mock_response provider = TokenProvider("https://en.wikipedia.org/w/api.php", mock_session) @@ -187,9 +182,7 @@ def test_get_csrf_token(self, mock_session, mock_response): def test_get_csrf_token_cached(self, mock_session, mock_response): """Test CSRF token caching.""" - mock_response.json.return_value = { - "query": {"tokens": {"csrftoken": "csrf_cached"}} - } + mock_response.json.return_value = {"query": {"tokens": {"csrftoken": "csrf_cached"}}} mock_session.post.return_value = mock_response provider = TokenProvider("https://en.wikipedia.org/w/api.php", mock_session) @@ -204,9 +197,7 @@ def test_get_csrf_token_cached(self, mock_session, mock_response): def test_get_login_token(self, mock_session, mock_response): """Test getting login token (always fetches fresh).""" - mock_response.json.return_value = { - "query": {"tokens": {"logintoken": "login_token_456"}} - } + mock_response.json.return_value = {"query": {"tokens": {"logintoken": "login_token_456"}}} mock_session.post.return_value = mock_response provider = TokenProvider("https://en.wikipedia.org/w/api.php", mock_session) @@ -216,9 +207,7 @@ def test_get_login_token(self, mock_session, mock_response): def test_get_watch_token(self, mock_session, mock_response): """Test getting watch token.""" - mock_response.json.return_value = { - "query": {"tokens": {"watchtoken": "watch_token_789"}} - } + mock_response.json.return_value = {"query": {"tokens": {"watchtoken": "watch_token_789"}}} mock_session.post.return_value = mock_response provider = TokenProvider("https://en.wikipedia.org/w/api.php", mock_session) diff --git a/tests/test_core_phase1.py b/tests/test_core_phase1.py index 6a29fac..cac1c34 100644 --- a/tests/test_core_phase1.py +++ b/tests/test_core_phase1.py @@ -10,7 +10,6 @@ import pytest import requests - from mw_api.core.config import ( BotConfig, get_default_config, diff --git a/tests/test_mdwiki_page.py b/tests/test_mdwiki_page.py index e73e735..c96b208 100644 --- a/tests/test_mdwiki_page.py +++ b/tests/test_mdwiki_page.py @@ -1,17 +1,15 @@ """ """ import pytest - from mw_api import ALL_APIS @pytest.fixture(scope="module", autouse=True) def load_api(): - return ALL_APIS( - lang="www", family="mdwiki", username="your_username", password="your_password" - ) + return ALL_APIS(lang="www", family="mdwiki", username="your_username", password="your_password") +@pytest.mark.skip(reason="Integration test - requires mdwiki credentials") def test_main_page(load_api): api = load_api page = api.MainPage("Category:RTT") @@ -21,6 +19,7 @@ def test_main_page(load_api): assert len(text) > 0 +@pytest.mark.skip(reason="Integration test - requires mdwiki credentials") def test_main_page2(load_api): api = load_api page = api.MainPage("Main_Page") @@ -28,6 +27,7 @@ def test_main_page2(load_api): assert exists is True +@pytest.mark.skip(reason="Integration test - requires mdwiki credentials") def test_cat_members(load_api): api = load_api cat_members = api.CatDepth("RTT", depth=3, ns="0") @@ -36,6 +36,7 @@ def test_cat_members(load_api): assert len(cat_members) > 0 +@pytest.mark.skip(reason="Integration test - requires mdwiki credentials") def test_cat_members2(load_api): api = load_api cat_members = api.CatDepth("RTTNEURO", depth=3, ns="0") diff --git a/tests/test_phase2_3.py b/tests/test_phase2_3.py index 38898d6..2142a7e 100644 --- a/tests/test_phase2_3.py +++ b/tests/test_phase2_3.py @@ -6,7 +6,6 @@ """ import pytest - from mw_api.core.exceptions import ( AbuseFilterError, ApiError, @@ -38,9 +37,7 @@ def test_default_values(self): assert meta.wikibase_item == "" def test_custom_values(self): - meta = PageMetadata( - exists=True, is_redirect=True, namespace=14, wikibase_item="Q12345" - ) + meta = PageMetadata(exists=True, is_redirect=True, namespace=14, wikibase_item="Q12345") assert meta.exists is True assert meta.is_redirect is True assert meta.namespace == 14 diff --git a/tests/test_protocols.py b/tests/test_protocols.py index d1b2db6..69ebda9 100644 --- a/tests/test_protocols.py +++ b/tests/test_protocols.py @@ -10,7 +10,6 @@ from unittest.mock import create_autospec import pytest - from mw_api.core.protocols import LoginBotProtocol, SessionProtocol diff --git a/tests/test_repositories.py b/tests/test_repositories.py index 29b8f3e..f693c23 100644 --- a/tests/test_repositories.py +++ b/tests/test_repositories.py @@ -8,7 +8,6 @@ from unittest.mock import MagicMock, Mock import pytest - from mw_api.core.page import Page, PageMetadata from mw_api.repositories.page_repository import PageRepository @@ -52,13 +51,7 @@ def test_get_text_page_exists(self): "12345": { "pageid": 12345, "title": "Test Page", - "revisions": [ - { - "slots": { - "main": {"content": "This is the page content."} - } - } - ], + "revisions": [{"slots": {"main": {"content": "This is the page content."}}}], } } } @@ -74,9 +67,7 @@ def test_get_text_page_exists(self): def test_get_text_page_not_exists(self): """Test getting text from non-existent page.""" mock_bot = MockLoginBot() - mock_bot.set_response( - {"query": {"pages": {"-1": {"title": "Missing Page", "missing": True}}}} - ) + mock_bot.set_response({"query": {"pages": {"-1": {"title": "Missing Page", "missing": True}}}}) repo = PageRepository(mock_bot) page = Page(title="Missing Page") @@ -88,17 +79,7 @@ def test_get_text_alternative_format(self): """Test getting text with alternative content key.""" mock_bot = MockLoginBot() mock_bot.set_response( - { - "query": { - "pages": { - "12345": { - "revisions": [ - {"slots": {"main": {"*": "Alternative content format"}}} - ] - } - } - } - } + {"query": {"pages": {"12345": {"revisions": [{"slots": {"main": {"*": "Alternative content format"}}}]}}}} ) repo = PageRepository(mock_bot) @@ -150,9 +131,7 @@ def test_get_page_info_exists(self): def test_get_page_info_not_exists(self): """Test getting page info for non-existent page.""" mock_bot = MockLoginBot() - mock_bot.set_response( - {"query": {"pages": {"-1": {"title": "Missing Page", "missing": True}}}} - ) + mock_bot.set_response({"query": {"pages": {"-1": {"title": "Missing Page", "missing": True}}}}) repo = PageRepository(mock_bot) page = Page(title="Missing Page") @@ -202,9 +181,7 @@ def test_save_page(self): def test_page_exists_true(self): """Test page_exists returns True for existing page.""" mock_bot = MockLoginBot() - mock_bot.set_response( - {"query": {"pages": {"12345": {"pageid": 12345, "title": "Existing Page"}}}} - ) + mock_bot.set_response({"query": {"pages": {"12345": {"pageid": 12345, "title": "Existing Page"}}}}) repo = PageRepository(mock_bot) @@ -213,9 +190,7 @@ def test_page_exists_true(self): def test_page_exists_false(self): """Test page_exists returns False for non-existent page.""" mock_bot = MockLoginBot() - mock_bot.set_response( - {"query": {"pages": {"-1": {"title": "Missing Page", "missing": True}}}} - ) + mock_bot.set_response({"query": {"pages": {"-1": {"title": "Missing Page", "missing": True}}}}) repo = PageRepository(mock_bot) diff --git a/tests/unit/api_utils/bot_edit/bot_edit_by_templates/test_bot_edit_by_templates.py b/tests/unit/api_utils/bot_edit/bot_edit_by_templates/test_bot_edit_by_templates.py index 8302736..f00d705 100644 --- a/tests/unit/api_utils/bot_edit/bot_edit_by_templates/test_bot_edit_by_templates.py +++ b/tests/unit/api_utils/bot_edit/bot_edit_by_templates/test_bot_edit_by_templates.py @@ -4,14 +4,15 @@ Tests cover all scenarios including template restrictions, caching behavior, and special template handling (nobots, bots). """ + import sys -import pytest +import pytest from mw_api.api_utils.bot_edit.bot_edit_by_templates import ( - is_bot_edit_allowed, Bot_Cache, - stop_edit_temps, edit_username, + is_bot_edit_allowed, + stop_edit_temps, ) @@ -38,7 +39,6 @@ def bot_username(): return edit_username.get(1, "Mr.Ibrahembot") - # Test bot job normalization class TestBotJobNormalization: """Test cases for bot job parameter normalization.""" @@ -370,10 +370,6 @@ def test_different_botjob_not_affected_by_specific_template(self, original_argv) sys.argv = ["script"] text = "{{لا للتعريب}}" # Should block for 'تعريب' botjob - assert not is_bot_edit_allowed( - text=text, title_page="Test", botjob="تعريب" - ) + assert not is_bot_edit_allowed(text=text, title_page="Test", botjob="تعريب") # Should allow for 'all' botjob (not in 'all' stop list) - assert is_bot_edit_allowed( - text=text, title_page="Test2", botjob="all" - ) + assert is_bot_edit_allowed(text=text, title_page="Test2", botjob="all") diff --git a/tests/unit/api_utils/bot_edit/bot_edit_by_templates/test_bot_edit_by_templates2.py b/tests/unit/api_utils/bot_edit/bot_edit_by_templates/test_bot_edit_by_templates2.py index 7de82b3..846ba1f 100644 --- a/tests/unit/api_utils/bot_edit/bot_edit_by_templates/test_bot_edit_by_templates2.py +++ b/tests/unit/api_utils/bot_edit/bot_edit_by_templates/test_bot_edit_by_templates2.py @@ -8,26 +8,26 @@ - Edge cases and special conditions """ -import pytest -from unittest.mock import patch, MagicMock import sys +from unittest.mock import MagicMock, patch +import pytest from mw_api.api_utils.bot_edit.bot_edit_by_templates import ( - is_bot_edit_allowed, Bot_Cache, + is_bot_edit_allowed, stop_edit_temps, ) - # ==================== Fixtures ==================== + @pytest.fixture(autouse=True) def reset_environment(): """Reset environment before and after each test.""" # Setup Bot_Cache.clear() original_argv = sys.argv.copy() - sys.argv = ['test'] + sys.argv = ["test"] yield @@ -39,13 +39,14 @@ def reset_environment(): @pytest.fixture def mock_wtp(): """Provide a mocked wikitextparser.""" - with patch('mw_api.api_utils.bot_edit.bot_edit_by_templates.wtp') as mock: + with patch("mw_api.api_utils.bot_edit.bot_edit_by_templates.wtp") as mock: yield mock @pytest.fixture def create_mock_template(): """Factory fixture for creating mock templates.""" + def _create_template(name, arguments=None): mock_template = MagicMock() mock_template.normal_name.return_value = name @@ -70,12 +71,13 @@ def _create_template(name, arguments=None): @pytest.fixture def setup_parser(mock_wtp, create_mock_template): """Factory fixture for setting up parser with templates.""" + def _setup(templates_config): templates = [] for config in templates_config: if isinstance(config, dict): - name = config.get('name') - arguments = config.get('arguments') + name = config.get("name") + arguments = config.get("arguments") templates.append(create_mock_template(name, arguments)) else: # If just a string, create template with that name @@ -92,6 +94,7 @@ def _setup(templates_config): # ==================== Basic Functionality Tests ==================== + class TestBasicFunctionality: """Test basic functionality of is_bot_edit_allowed.""" @@ -116,17 +119,14 @@ def test_default_botjob_parameter(self): def test_fixref_cat_stub_tempcat_portal_defaults_to_all(self): """Test that combined botjob string defaults to 'all'.""" text = "Plain text" - result = is_bot_edit_allowed( - text=text, - title_page="Test Page", - botjob="fixref|cat|stub|tempcat|portal" - ) + result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="fixref|cat|stub|tempcat|portal") assert result is True assert "all" in Bot_Cache # ==================== Cache Tests ==================== + class TestCacheBehavior: """Test cache behavior and management.""" @@ -186,6 +186,7 @@ def test_cache_persists_across_calls(self): assert first_cache_value == second_cache_value + # ==================== Nobots Template Tests ==================== @@ -194,7 +195,7 @@ class TestNobotsTemplate: def test_nobots_without_params_denies_edit(self, setup_parser): """Test that {{nobots}} without parameters denies editing.""" - setup_parser([{'name': 'nobots', 'arguments': None}]) + setup_parser([{"name": "nobots", "arguments": None}]) text = "{{nobots}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -202,7 +203,7 @@ def test_nobots_without_params_denies_edit(self, setup_parser): def test_nobots_with_all_denies_edit(self, setup_parser): """Test that {{nobots|1=all}} denies editing.""" - setup_parser([{'name': 'nobots', 'arguments': {'1': 'all'}}]) + setup_parser([{"name": "nobots", "arguments": {"1": "all"}}]) text = "{{nobots|1=all}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -210,7 +211,7 @@ def test_nobots_with_all_denies_edit(self, setup_parser): def test_nobots_with_specific_bot_denies_edit(self, setup_parser): """Test that {{nobots|1=Mr.Ibrahembot}} denies editing.""" - setup_parser([{'name': 'nobots', 'arguments': {'1': 'Mr.Ibrahembot'}}]) + setup_parser([{"name": "nobots", "arguments": {"1": "Mr.Ibrahembot"}}]) text = "{{nobots|1=Mr.Ibrahembot}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -218,7 +219,7 @@ def test_nobots_with_specific_bot_denies_edit(self, setup_parser): def test_nobots_with_bot_list_including_our_bot_denies(self, setup_parser): """Test that {{nobots|1=Bot1,Mr.Ibrahembot,Bot2}} denies editing.""" - setup_parser([{'name': 'nobots', 'arguments': {'1': 'Bot1,Mr.Ibrahembot,Bot2'}}]) + setup_parser([{"name": "nobots", "arguments": {"1": "Bot1,Mr.Ibrahembot,Bot2"}}]) text = "{{nobots|1=Bot1,Mr.Ibrahembot,Bot2}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -226,7 +227,7 @@ def test_nobots_with_bot_list_including_our_bot_denies(self, setup_parser): def test_nobots_with_other_bots_allows_edit(self, setup_parser): """Test that {{nobots|1=OtherBot}} allows editing.""" - setup_parser([{'name': 'nobots', 'arguments': {'1': 'OtherBot,AnotherBot'}}]) + setup_parser([{"name": "nobots", "arguments": {"1": "OtherBot,AnotherBot"}}]) text = "{{nobots|1=OtherBot,AnotherBot}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -234,7 +235,7 @@ def test_nobots_with_other_bots_allows_edit(self, setup_parser): def test_nobots_case_insensitive(self, setup_parser): """Test that nobots template matching is case insensitive.""" - setup_parser([{'name': 'NoBots', 'arguments': None}]) + setup_parser([{"name": "NoBots", "arguments": None}]) text = "{{NoBots}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -242,7 +243,7 @@ def test_nobots_case_insensitive(self, setup_parser): def test_nobots_with_whitespace_in_bot_names(self, setup_parser): """Test handling of whitespace in bot name lists.""" - setup_parser([{'name': 'nobots', 'arguments': {'1': ' Bot1 , Mr.Ibrahembot , Bot2 '}}]) + setup_parser([{"name": "nobots", "arguments": {"1": " Bot1 , Mr.Ibrahembot , Bot2 "}}]) text = "{{nobots|1= Bot1 , Mr.Ibrahembot , Bot2 }}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -251,12 +252,13 @@ def test_nobots_with_whitespace_in_bot_names(self, setup_parser): # ==================== Bots Template Tests ==================== + class TestBotsTemplate: """Test bots template handling.""" def test_bots_without_params_denies_edit(self, setup_parser): """Test that {{bots}} without parameters denies editing.""" - setup_parser([{'name': 'bots', 'arguments': None}]) + setup_parser([{"name": "bots", "arguments": None}]) text = "{{bots}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -264,7 +266,7 @@ def test_bots_without_params_denies_edit(self, setup_parser): def test_bots_allow_all_allows_edit(self, setup_parser): """Test that {{bots|allow=all}} allows editing.""" - setup_parser([{'name': 'bots', 'arguments': {'allow': 'all'}}]) + setup_parser([{"name": "bots", "arguments": {"allow": "all"}}]) text = "{{bots|allow=all}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -272,7 +274,7 @@ def test_bots_allow_all_allows_edit(self, setup_parser): def test_bots_allow_specific_bot_allows_edit(self, setup_parser): """Test that {{bots|allow=Mr.Ibrahembot}} allows editing.""" - setup_parser([{'name': 'bots', 'arguments': {'allow': 'Mr.Ibrahembot'}}]) + setup_parser([{"name": "bots", "arguments": {"allow": "Mr.Ibrahembot"}}]) text = "{{bots|allow=Mr.Ibrahembot}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -280,7 +282,7 @@ def test_bots_allow_specific_bot_allows_edit(self, setup_parser): def test_bots_allow_bot_list_including_our_bot_allows(self, setup_parser): """Test that {{bots|allow=Bot1,Mr.Ibrahembot,Bot2}} allows editing.""" - setup_parser([{'name': 'bots', 'arguments': {'allow': 'Bot1,Mr.Ibrahembot,Bot2'}}]) + setup_parser([{"name": "bots", "arguments": {"allow": "Bot1,Mr.Ibrahembot,Bot2"}}]) text = "{{bots|allow=Bot1,Mr.Ibrahembot,Bot2}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -288,7 +290,7 @@ def test_bots_allow_bot_list_including_our_bot_allows(self, setup_parser): def test_bots_allow_none_denies_edit(self, setup_parser): """Test that {{bots|allow=none}} denies editing.""" - setup_parser([{'name': 'bots', 'arguments': {'allow': 'none'}}]) + setup_parser([{"name": "bots", "arguments": {"allow": "none"}}]) text = "{{bots|allow=none}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -296,7 +298,7 @@ def test_bots_allow_none_denies_edit(self, setup_parser): def test_bots_allow_other_bots_denies_edit(self, setup_parser): """Test that {{bots|allow=OtherBot}} denies editing.""" - setup_parser([{'name': 'bots', 'arguments': {'allow': 'OtherBot,AnotherBot'}}]) + setup_parser([{"name": "bots", "arguments": {"allow": "OtherBot,AnotherBot"}}]) text = "{{bots|allow=OtherBot,AnotherBot}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -304,7 +306,7 @@ def test_bots_allow_other_bots_denies_edit(self, setup_parser): def test_bots_deny_all_denies_edit(self, setup_parser): """Test that {{bots|deny=all}} denies editing.""" - setup_parser([{'name': 'bots', 'arguments': {'deny': 'all'}}]) + setup_parser([{"name": "bots", "arguments": {"deny": "all"}}]) text = "{{bots|deny=all}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -312,7 +314,7 @@ def test_bots_deny_all_denies_edit(self, setup_parser): def test_bots_deny_specific_bot_denies_edit(self, setup_parser): """Test that {{bots|deny=Mr.Ibrahembot}} denies editing.""" - setup_parser([{'name': 'bots', 'arguments': {'deny': 'Mr.Ibrahembot'}}]) + setup_parser([{"name": "bots", "arguments": {"deny": "Mr.Ibrahembot"}}]) text = "{{bots|deny=Mr.Ibrahembot}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -320,7 +322,7 @@ def test_bots_deny_specific_bot_denies_edit(self, setup_parser): def test_bots_deny_other_bots_allows_edit(self, setup_parser): """Test that {{bots|deny=OtherBot}} allows editing.""" - setup_parser([{'name': 'bots', 'arguments': {'deny': 'OtherBot,AnotherBot'}}]) + setup_parser([{"name": "bots", "arguments": {"deny": "OtherBot,AnotherBot"}}]) text = "{{bots|deny=OtherBot,AnotherBot}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -328,7 +330,7 @@ def test_bots_deny_other_bots_allows_edit(self, setup_parser): def test_bots_case_insensitive(self, setup_parser): """Test that bots template matching is case insensitive.""" - setup_parser([{'name': 'Bots', 'arguments': {'allow': 'all'}}]) + setup_parser([{"name": "Bots", "arguments": {"allow": "all"}}]) text = "{{Bots|allow=all}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -337,13 +339,14 @@ def test_bots_case_insensitive(self, setup_parser): # ==================== Stop Edit Templates Tests ==================== + class TestStopEditTemplates: """Test stop edit templates handling.""" @pytest.mark.parametrize("template_name", stop_edit_temps["all"]) def test_global_stop_templates_deny_edit(self, template_name, setup_parser): """Test that global stop templates deny editing.""" - setup_parser([{'name': template_name, 'arguments': None}]) + setup_parser([{"name": template_name, "arguments": None}]) text = f"{{{{{template_name}}}}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -352,7 +355,7 @@ def test_global_stop_templates_deny_edit(self, template_name, setup_parser): def test_specific_botjob_stop_template_denies_edit(self, setup_parser): """Test that job-specific stop templates deny editing.""" # Test تعريب job with its specific template - setup_parser([{'name': 'لا للتعريب', 'arguments': None}]) + setup_parser([{"name": "لا للتعريب", "arguments": None}]) text = "{{لا للتعريب}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="تعريب") @@ -361,22 +364,20 @@ def test_specific_botjob_stop_template_denies_edit(self, setup_parser): def test_stop_template_for_different_botjob_allows_edit(self, setup_parser): """Test that stop templates for different bot jobs allow editing.""" # Template for تعريب job, but we're running cat job - setup_parser([{'name': 'لا للتعريب', 'arguments': None}]) + setup_parser([{"name": "لا للتعريب", "arguments": None}]) text = "{{لا للتعريب}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="cat") assert result is True - @pytest.mark.parametrize("botjob,template_list", [ - (job, templates) - for job, templates in stop_edit_temps.items() - if job != "all" - ]) + @pytest.mark.parametrize( + "botjob,template_list", [(job, templates) for job, templates in stop_edit_temps.items() if job != "all"] + ) def test_all_stop_templates_for_each_botjob(self, botjob, template_list, setup_parser): """Test all stop templates for each specific bot job.""" for template_name in template_list: Bot_Cache.clear() - setup_parser([{'name': template_name, 'arguments': None}]) + setup_parser([{"name": template_name, "arguments": None}]) text = f"{{{{{template_name}}}}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob=botjob) @@ -385,15 +386,13 @@ def test_all_stop_templates_for_each_botjob(self, botjob, template_list, setup_p # ==================== Multiple Templates Tests ==================== + class TestMultipleTemplates: """Test handling of multiple templates.""" def test_multiple_templates_first_restricting_denies(self, setup_parser): """Test that first restricting template denies editing.""" - setup_parser([ - {'name': 'nobots', 'arguments': None}, - {'name': 'some_other_template', 'arguments': None} - ]) + setup_parser([{"name": "nobots", "arguments": None}, {"name": "some_other_template", "arguments": None}]) text = "{{nobots}} {{some_other_template}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -401,10 +400,7 @@ def test_multiple_templates_first_restricting_denies(self, setup_parser): def test_multiple_non_restricting_templates_allows(self, setup_parser): """Test that multiple non-restricting templates allow editing.""" - setup_parser([ - {'name': 'infobox', 'arguments': None}, - {'name': 'citation', 'arguments': None} - ]) + setup_parser([{"name": "infobox", "arguments": None}, {"name": "citation", "arguments": None}]) text = "{{infobox}} {{citation}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -413,12 +409,13 @@ def test_multiple_non_restricting_templates_allows(self, setup_parser): # ==================== Edge Cases Tests ==================== + class TestEdgeCases: """Test edge cases and special conditions.""" def test_empty_template_parameters(self, setup_parser): """Test handling of templates with empty parameter values.""" - setup_parser([{'name': 'nobots', 'arguments': {'1': ''}}]) + setup_parser([{"name": "nobots", "arguments": {"1": ""}}]) text = "{{nobots|1=}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -427,10 +424,7 @@ def test_empty_template_parameters(self, setup_parser): def test_template_with_multiple_parameters(self, setup_parser): """Test template with multiple parameters.""" - setup_parser([{ - 'name': 'bots', - 'arguments': {'allow': 'all', 'other_param': 'some_value'} - }]) + setup_parser([{"name": "bots", "arguments": {"allow": "all", "other_param": "some_value"}}]) text = "{{bots|allow=all|other_param=some_value}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -438,10 +432,7 @@ def test_template_with_multiple_parameters(self, setup_parser): def test_parameter_filtering_empty_values(self, setup_parser): """Test that parameters with empty values are filtered out.""" - setup_parser([{ - 'name': 'bots', - 'arguments': {'allow': 'all', 'empty_param': ''} - }]) + setup_parser([{"name": "bots", "arguments": {"allow": "all", "empty_param": ""}}]) text = "{{bots|allow=all|empty_param=}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") @@ -450,14 +441,18 @@ def test_parameter_filtering_empty_values(self, setup_parser): # ==================== Parametrized Test Collections ==================== + class TestParametrizedScenarios: """Parametrized tests for various scenarios.""" - @pytest.mark.parametrize("text,expected", [ - ("", True), - ("Plain text", True), - ("Some content without templates", True), - ]) + @pytest.mark.parametrize( + "text,expected", + [ + ("", True), + ("Plain text", True), + ("Some content without templates", True), + ], + ) def test_non_template_text_allows_editing(self, text, expected): """Test that non-template text allows editing.""" result = is_bot_edit_allowed(text=text, title_page="Test", botjob="all") @@ -470,44 +465,53 @@ def test_cache_initialized_for_different_botjobs(self, botjob): expected_job = "all" if botjob in ["", "fixref|cat|stub|tempcat|portal"] else botjob assert expected_job in Bot_Cache - @pytest.mark.parametrize("bot_list,should_allow", [ - ("OtherBot", True), - ("Bot1,Bot2,Bot3", True), - ("Mr.Ibrahembot", False), - ("Bot1,Mr.Ibrahembot", False), - ("all", False), - ]) + @pytest.mark.parametrize( + "bot_list,should_allow", + [ + ("OtherBot", True), + ("Bot1,Bot2,Bot3", True), + ("Mr.Ibrahembot", False), + ("Bot1,Mr.Ibrahembot", False), + ("all", False), + ], + ) def test_nobots_with_various_bot_lists(self, bot_list, should_allow, setup_parser): """Test nobots template with various bot lists.""" - setup_parser([{'name': 'nobots', 'arguments': {'1': bot_list}}]) + setup_parser([{"name": "nobots", "arguments": {"1": bot_list}}]) result = is_bot_edit_allowed(text="{{nobots}}", title_page="Test", botjob="all") assert result == should_allow - @pytest.mark.parametrize("allow_list,should_allow", [ - ("all", True), - ("Mr.Ibrahembot", True), - ("Bot1,Mr.Ibrahembot,Bot2", True), - ("none", False), - ("OtherBot", False), - ]) + @pytest.mark.parametrize( + "allow_list,should_allow", + [ + ("all", True), + ("Mr.Ibrahembot", True), + ("Bot1,Mr.Ibrahembot,Bot2", True), + ("none", False), + ("OtherBot", False), + ], + ) def test_bots_allow_with_various_lists(self, allow_list, should_allow, setup_parser): """Test bots allow parameter with various lists.""" - setup_parser([{'name': 'bots', 'arguments': {'allow': allow_list}}]) + setup_parser([{"name": "bots", "arguments": {"allow": allow_list}}]) result = is_bot_edit_allowed(text="{{bots}}", title_page="Test", botjob="all") assert result == should_allow - @pytest.mark.parametrize("deny_list,should_allow", [ - ("all", False), - ("Mr.Ibrahembot", False), - ("Bot1,Mr.Ibrahembot,Bot2", False), - ("OtherBot", True), - ("Bot1,Bot2", True), - ]) + @pytest.mark.parametrize( + "deny_list,should_allow", + [ + ("all", False), + ("Mr.Ibrahembot", False), + ("Bot1,Mr.Ibrahembot,Bot2", False), + ("OtherBot", True), + ("Bot1,Bot2", True), + ], + ) def test_bots_deny_with_various_lists(self, deny_list, should_allow, setup_parser): """Test bots deny parameter with various lists.""" - setup_parser([{'name': 'bots', 'arguments': {'deny': deny_list}}]) + setup_parser([{"name": "bots", "arguments": {"deny": deny_list}}]) result = is_bot_edit_allowed(text="{{bots}}", title_page="Test", botjob="all") assert result == should_allow @@ -515,27 +519,20 @@ def test_bots_deny_with_various_lists(self, deny_list, should_allow, setup_parse # ==================== Integration Tests ==================== + class TestIntegration: """Integration tests for complete workflows.""" def test_complete_workflow_allowed(self, setup_parser): """Test complete workflow where editing is allowed.""" # Setup non-restricting template - setup_parser([{'name': 'infobox', 'arguments': None}]) + setup_parser([{"name": "infobox", "arguments": None}]) # First call - process and cache - result1 = is_bot_edit_allowed( - text="{{infobox}}", - title_page="Article", - botjob="all" - ) + result1 = is_bot_edit_allowed(text="{{infobox}}", title_page="Article", botjob="all") # Second call - use cache - result2 = is_bot_edit_allowed( - text="{{infobox}}", - title_page="Article", - botjob="all" - ) + result2 = is_bot_edit_allowed(text="{{infobox}}", title_page="Article", botjob="all") assert result1 is True assert result2 is True @@ -545,21 +542,13 @@ def test_complete_workflow_allowed(self, setup_parser): def test_complete_workflow_denied(self, setup_parser): """Test complete workflow where editing is denied.""" # Setup restricting template - setup_parser([{'name': 'nobots', 'arguments': None}]) + setup_parser([{"name": "nobots", "arguments": None}]) # First call - process and cache - result1 = is_bot_edit_allowed( - text="{{nobots}}", - title_page="Article", - botjob="all" - ) + result1 = is_bot_edit_allowed(text="{{nobots}}", title_page="Article", botjob="all") # Second call - use cache - result2 = is_bot_edit_allowed( - text="{{nobots}}", - title_page="Article", - botjob="all" - ) + result2 = is_bot_edit_allowed(text="{{nobots}}", title_page="Article", botjob="all") assert result1 is False assert result2 is False @@ -569,11 +558,11 @@ def test_complete_workflow_denied(self, setup_parser): def test_different_pages_different_results(self, setup_parser): """Test that different pages can have different results.""" # Page 1 - allowed - setup_parser([{'name': 'infobox', 'arguments': None}]) + setup_parser([{"name": "infobox", "arguments": None}]) result1 = is_bot_edit_allowed(text="{{infobox}}", title_page="Page1", botjob="all") # Page 2 - denied - setup_parser([{'name': 'nobots', 'arguments': None}]) + setup_parser([{"name": "nobots", "arguments": None}]) result2 = is_bot_edit_allowed(text="{{nobots}}", title_page="Page2", botjob="all") assert result1 is True @@ -584,29 +573,30 @@ def test_different_pages_different_results(self, setup_parser): # ==================== Performance and Special Cases ==================== + class TestSpecialCases: """Test special cases and boundary conditions.""" def test_very_long_bot_list(self, setup_parser): """Test handling of very long bot lists.""" long_list = ",".join([f"Bot{i}" for i in range(100)]) - setup_parser([{'name': 'nobots', 'arguments': {'1': long_list}}]) + setup_parser([{"name": "nobots", "arguments": {"1": long_list}}]) result = is_bot_edit_allowed(text="{{nobots}}", title_page="Test", botjob="all") assert result is True # Our bot not in the list def test_unicode_bot_names(self, setup_parser): """Test handling of unicode characters in bot names.""" - setup_parser([{'name': 'nobots', 'arguments': {'1': 'بوت1,بوت2'}}]) + setup_parser([{"name": "nobots", "arguments": {"1": "بوت1,بوت2"}}]) result = is_bot_edit_allowed(text="{{nobots}}", title_page="Test", botjob="all") assert result is True # Our bot not in the list def test_mixed_case_template_names(self, setup_parser): """Test that template name matching works with mixed case.""" - for name in ['nobots', 'Nobots', 'NOBOTS', 'nObOtS']: + for name in ["nobots", "Nobots", "NOBOTS", "nObOtS"]: Bot_Cache.clear() - setup_parser([{'name': name, 'arguments': None}]) + setup_parser([{"name": name, "arguments": None}]) result = is_bot_edit_allowed(text=f"{{{{{name}}}}}", title_page="Test", botjob="all") assert result is False, f"Should deny for template name: {name}" @@ -614,6 +604,7 @@ def test_mixed_case_template_names(self, setup_parser): # ==================== Pytest Marks and Markers ==================== + class TestSmokeTests: """Quick smoke tests for CI/CD.""" @@ -623,7 +614,7 @@ def test_basic_allow(self): def test_basic_deny(self, setup_parser): """Basic deny scenario.""" - setup_parser([{'name': 'nobots', 'arguments': None}]) + setup_parser([{"name": "nobots", "arguments": None}]) assert is_bot_edit_allowed("{{nobots}}", "Test", "all") is False def test_cache_works(self): diff --git a/tests/unit/api_utils/bot_edit/bot_edit_by_templates/test_bot_edit_by_templates_pypass.py b/tests/unit/api_utils/bot_edit/bot_edit_by_templates/test_bot_edit_by_templates_pypass.py index a6aad14..c891ca2 100644 --- a/tests/unit/api_utils/bot_edit/bot_edit_by_templates/test_bot_edit_by_templates_pypass.py +++ b/tests/unit/api_utils/bot_edit/bot_edit_by_templates/test_bot_edit_by_templates_pypass.py @@ -1,13 +1,12 @@ -""" -""" +""" """ -import pytest import sys -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch +import pytest from mw_api.api_utils.bot_edit.bot_edit_by_templates import ( - is_bot_edit_allowed, Bot_Cache, + is_bot_edit_allowed, ) # ==================== Fixtures ==================== @@ -19,7 +18,7 @@ def reset_environment(): # Setup Bot_Cache.clear() original_argv = sys.argv.copy() - sys.argv = ['test'] + sys.argv = ["test"] yield @@ -31,13 +30,14 @@ def reset_environment(): @pytest.fixture def mock_wtp(): """Provide a mocked wikitextparser.""" - with patch('mw_api.api_utils.bot_edit.bot_edit_by_templates.wtp') as mock: + with patch("mw_api.api_utils.bot_edit.bot_edit_by_templates.wtp") as mock: yield mock @pytest.fixture def create_mock_template(): """Factory fixture for creating mock templates.""" + def _create_template(name, arguments=None): mock_template = MagicMock() mock_template.normal_name.return_value = name @@ -62,12 +62,13 @@ def _create_template(name, arguments=None): @pytest.fixture def setup_parser(mock_wtp, create_mock_template): """Factory fixture for setting up parser with templates.""" + def _setup(templates_config): templates = [] for config in templates_config: if isinstance(config, dict): - name = config.get('name') - arguments = config.get('arguments') + name = config.get("name") + arguments = config.get("arguments") templates.append(create_mock_template(name, arguments)) else: # If just a string, create template with that name @@ -91,7 +92,7 @@ class TestCommandLineBypass: def test_argv_bypasses_all_checks(self, argv_value, setup_parser): """Test that specific argv values bypass all restrictions.""" sys.argv.append(argv_value) - setup_parser([{'name': 'nobots', 'arguments': None}]) + setup_parser([{"name": "nobots", "arguments": None}]) text = "{{nobots}}" result = is_bot_edit_allowed(text=text, title_page="Test Page", botjob="all") diff --git a/tests/unit/api_utils/bot_edit/bot_edit_by_time/test_bot_edit_by_time.py b/tests/unit/api_utils/bot_edit/bot_edit_by_time/test_bot_edit_by_time.py index 2464ca5..812aaea 100644 --- a/tests/unit/api_utils/bot_edit/bot_edit_by_time/test_bot_edit_by_time.py +++ b/tests/unit/api_utils/bot_edit/bot_edit_by_time/test_bot_edit_by_time.py @@ -1,9 +1,9 @@ -""" -""" +""" """ + import sys -import pytest -from newapi.api_utils.bot_edit.bot_edit_by_time import ( +import pytest +from mw_api.api_utils.bot_edit.bot_edit_by_time import ( check_create_time, check_last_edit_time, )