From fb55318a6c196be65633512f80d62f8f1e241e9d Mon Sep 17 00:00:00 2001 From: rly0nheart <74001397+rly0nheart@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:15:19 +0200 Subject: [PATCH 1/5] Setting limit, sort and timeframe parameters for individual bulk data functions. Fix sort and limit bug in CLI. Major code fixes and refactoring --- README.md | 60 +++--- knewkarma/_cli.py | 220 ++++++++++++---------- knewkarma/_parser.py | 22 +-- knewkarma/_project.py | 6 +- knewkarma/{_coreutils.py => _utils.py} | 10 + knewkarma/api.py | 49 +++-- knewkarma/base.py | 242 +++++++++++++++---------- pyproject.toml | 4 +- tests/test_api.py | 5 +- 9 files changed, 360 insertions(+), 258 deletions(-) rename knewkarma/{_coreutils.py => _utils.py} (92%) diff --git a/README.md b/README.md index 79959a3..65b4bdc 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ ![knewkarma](https://github.com/bellingcat/knewkarma/assets/74001397/45262d9d-6633-418d-9ace-7c3c88b5ca36) - A **Reddit** Data Analysis Toolkit. [![.Net](https://img.shields.io/badge/Visual%20Basic%20.NET-5C2D91?style=flat&logo=.net&logoColor=white)](https://github.com/search?q=repo%3Abellingcat%2Fknewkarma++language%3A%22Visual+Basic+.NET%22&type=code) [![Python](https://img.shields.io/badge/Python-3670A0?style=flat&logo=python&logoColor=ffdd54)](https://github.com/search?q=repo%3Abellingcat%2Fknewkarma++language%3APython&type=code) [![Docker](https://img.shields.io/badge/Dockefile-%230db7ed.svg?style=flat&logo=docker&logoColor=white)](https://github.com/search?q=repo%3Abellingcat%2Fknewkarma++language%3ADockerfile&type=code) [![PyPI - Version](https://img.shields.io/pypi/v/knewkarma?style=flat&logo=pypi&logoColor=ffdd54&label=PyPI&labelColor=3670A0&color=3670A0)](https://pypi.org/project/knewkarma) [![BuyMeACoffee](https://img.shields.io/badge/Buy%20Me%20a%20Coffee-ffdd00?style=flat&logo=buy-me-a-coffee&logoColor=black)](https://buymeacoffee.com/_rly0nheart) @@ -36,30 +35,32 @@ from knewkarma import RedditUser # Define an asynchronous function to fetch User -async def async_user(username: str, data_timeframe: str, data_limit: int, data_sort: str): - # Initialize a RedditUser object with the specified username, data timeframe, limit, and sorting criteria - user = RedditUser(username=username, data_timeframe=data_timeframe, data_limit=data_limit, data_sort=data_sort) +async def async_user(username: str): + # Initialize RedditUser with the specified username + user = RedditUser(username=username) # Establish an asynchronous HTTP session async with aiohttp.ClientSession() as session: # Fetch user's profile profile = await user.profile(session=session) + # timeframes: ["hour", "day", "month", "year"]. Leave parameter unspecified to get from all timeframes. + # sorting: ["controversial", "new", "top", "best", "hot", "rising"]. Leave parameter unspecified to get from all sort criteria. + # Fetch user's posts - posts = await user.posts(session=session) + posts = await user.posts(limit=200, sort="top", timeframe="year", + session=session) # Fetch user's comments - comments = await user.comments(session=session) + comments = await user.comments(limit=200, sort="top", timeframe="year", + session=session) print(profile) print(posts) print(comments) -# Run the asynchronous function with a specified username, data limit, and sorting parameter -# timeframes: ["all", "hour", "day", "month", "year"] -# sorting: ["all", "controversial", "new", "top", "best", "hot", "rising"] -asyncio.run(async_user(username="automoderator", data_timeframe="year", data_limit=100, data_sort="all")) +asyncio.run(async_user(username="automoderator")) ``` ### Get Subreddit Data @@ -70,11 +71,10 @@ import aiohttp from knewkarma import RedditSub -async def async_subreddit(subreddit_name: str, data_timeframe: str, data_limit: int, data_sort: str): - # Initialize a RedditSub object with the specified subreddit, data timeframe, limit, and sorting criteria +async def async_subreddit(subreddit_name: str): + # Initialize RedditSub with the specified subreddit subreddit = RedditSub( - subreddit=subreddit_name, data_timeframe=data_timeframe, data_limit=data_limit, data_sort=data_sort - ) + subreddit=subreddit_name) # Create an asynchronous HTTP session async with aiohttp.ClientSession() as session: @@ -82,17 +82,16 @@ async def async_subreddit(subreddit_name: str, data_timeframe: str, data_limit: profile = await subreddit.profile(session=session) # Fetch subreddit's posts - posts = await subreddit.posts(session=session) + # timeframes: ["hour", "day", "month", "year"]. Leave parameter unspecified to get from all timeframes. + # sorting: ["controversial", "new", "top", "best", "hot", "rising"]. Leave parameter unspecified to get from all sort criteria. + posts = await subreddit.posts(limit=100, sort="top", timeframe="month", session=session) print(profile) print(posts) -# Run the asynchronous function with specified subreddit name, data limit, and sorting criteria -# timeframes: ["all", "hour", "day", "month", "year"] -# sorting: ["all", "controversial", "new", "top", "best", "hot", "rising"] asyncio.run( - async_subreddit(subreddit_name="MachineLearning", data_timeframe="year", data_limit=100, data_sort="top") + async_subreddit(subreddit_name="MachineLearning") ) ```` @@ -104,28 +103,31 @@ import aiohttp from knewkarma import RedditPosts -async def async_posts(timeframe: str, limit: int, sort: str): - # Initialize RedditPosts with the specified timeframe, limit and sorting criteria - posts = RedditPosts(timeframe=timeframe, limit=limit, sort=sort) +async def async_posts(): + # Initialize RedditPosts + posts = RedditPosts() # Create an asynchronous HTTP session async with aiohttp.ClientSession() as session: + # timeframes: ["hour", "day", "month", "year"]. Leave parameter unspecified to get from all timeframes. + # sorting: ["controversial", "new", "top", "best", "hot", "rising"]. Leave parameter unspecified to get from all sort criteria. + # Fetch front page posts - front_page_posts = await posts.front_page(session=session) + front_page_posts = await posts.front_page(limit=50, sort="top", timeframe="hour", session=session) + # Fetch posts from a specified listing ('best') - listing_posts = await posts.listing(listings_name="best", session=session) + listing_posts = await posts.listing(listings_name="best", limit=50, sort="best", timeframe="month", + session=session) + # Fetch posts that match the specified search query 'covid-19' - search_results = await posts.search(query="covid-19", session=session) + search_results = await posts.search(query="covid-19", limit=300, session=session) print(front_page_posts) print(listing_posts) print(search_results) -# Run the asynchronous function with a specified limit and sorting parameter -# timeframes: ["all", "hour", "day", "month", "year"] -# sorting: ["all", "controversial", "new", "top", "best", "hot", "rising"] -asyncio.run(async_posts(timeframe="year", limit=100, sort="all")) +asyncio.run(async_posts()) ``` diff --git a/knewkarma/_cli.py b/knewkarma/_cli.py index 8e3c24d..d01d3ca 100644 --- a/knewkarma/_cli.py +++ b/knewkarma/_cli.py @@ -8,9 +8,9 @@ import aiohttp from rich.pretty import pprint -from ._coreutils import log, save_data, pathfinder from ._parser import create_parser, version from ._project import PROGRAM_DIRECTORY +from ._utils import log, save_data, pathfinder from .api import get_updates from .base import RedditUser, RedditSub, RedditPosts @@ -18,37 +18,91 @@ # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # -async def stage_cli(arguments: argparse.Namespace): +async def execute(arguments: argparse.Namespace, function_mapping: dict): """ - Sets up the command-line interface and executes the specified actions. + Executes command-line arguments based on user-input. :param arguments: Argparse namespace object containing parsed command-line arguments. + :type arguments: argparse.Namespace + :param function_mapping: A dictionary mapping command-line arguments to their functions. + :type function_mapping: dict """ + + # -------------------------------------------------------------------- # + + async with aiohttp.ClientSession() as request_session: + await get_updates(session=request_session) + + mode_action = function_mapping.get(arguments.mode) + is_executed: bool = False + + for action, function in mode_action: + if getattr(arguments, action, False): + call_function = await function(session=request_session) + + pprint(call_function, expand_all=True) + is_executed = True + + # -------------------------------------------------------------------- # + + if arguments.csv or arguments.json: + target_directory: str = os.path.join( + PROGRAM_DIRECTORY, f"{arguments.mode}_{action}" + ) + pathfinder( + directories=[ + os.path.join(target_directory, "csv"), + os.path.join(target_directory, "json"), + ] + ) + save_data( + data=call_function, + save_to_dir=target_directory, + save_json=arguments.json, + save_csv=arguments.csv, + ) + + break + + # -------------------------------------------------------------------- # + + if not is_executed: + log.warning( + f"knewkarma {arguments.mode}: missing one or more expected argument(s)." + ) + + +# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # + + +def stage(): + """ + Main entrypoint for the Knew Karma command-line interface. + + Sets up the command-line interface and command-line arguments + """ + + # -------------------------------------------------------------------- # + + parser = create_parser() + arguments: argparse = parser.parse_args() + start_time: datetime = datetime.now() + # -------------------------------------------------------------------- # - data_timeframe: str = arguments.timeframe - data_sorting: str = arguments.limit - data_limit: int = arguments.limit + limit: int = arguments.limit + sort = arguments.sort + timeframe = arguments.timeframe # -------------------------------------------------------------------- # user = RedditUser( username=arguments.username if hasattr(arguments, "username") else None, - data_timeframe=data_timeframe, - data_sort=data_sorting, - data_limit=data_limit, ) subreddit = RedditSub( subreddit=arguments.subreddit if hasattr(arguments, "subreddit") else None, - data_timeframe=data_timeframe, - data_sort=data_sorting, - data_limit=data_limit, - ) - posts = RedditPosts( - timeframe=data_timeframe, - sort=data_sorting, - limit=data_limit, ) + posts = RedditPosts() # -------------------------------------------------------------------- # @@ -56,23 +110,53 @@ async def stage_cli(arguments: argparse.Namespace): function_mapping: dict = { "user": [ ("profile", lambda session: user.profile(session=session)), - ("posts", lambda session: user.posts(session=session)), - ("comments", lambda session: user.comments(session=session)), + ( + "posts", + lambda session: user.posts( + limit=limit, sort=sort, timeframe=timeframe, session=session + ), + ), + ( + "comments", + lambda session: user.comments( + limit=limit, sort=sort, timeframe=timeframe, session=session + ), + ), ], "subreddit": [ ("profile", lambda session: subreddit.profile(session=session)), - ("posts", lambda session: subreddit.posts(session=session)), + ( + "posts", + lambda session: subreddit.posts( + limit=limit, sort=sort, timeframe=timeframe, session=session + ), + ), ], "posts": [ - ("front_page", lambda session: posts.front_page(session=session)), + ( + "front_page", + lambda session: posts.front_page( + limit=limit, sort=sort, timeframe=timeframe, session=session + ), + ), ( "search", - lambda session: posts.search(query=arguments.search, session=session), + lambda session: posts.search( + query=arguments.search, + limit=limit, + sort=sort, + timeframe=timeframe, + session=session, + ), ), ( "listing", lambda session: posts.listing( - listings_name=arguments.listing, session=session + listings_name=arguments.listing, + limit=limit, + sort=sort, + timeframe=timeframe, + session=session, ), ), ], @@ -80,83 +164,27 @@ async def stage_cli(arguments: argparse.Namespace): # -------------------------------------------------------------------- # - if arguments.mode in function_mapping: - async with aiohttp.ClientSession() as request_session: - await get_updates(session=request_session) - - mode_action = function_mapping.get(arguments.mode) - is_executed: bool = False - - for action, function in mode_action: - if getattr(arguments, action, False): - call_function = await function(session=request_session) - - pprint(call_function, expand_all=True) - is_executed = True - - # -------------------------------------------------------------------- # - - if arguments.csv or arguments.json: - target_directory: str = os.path.join( - PROGRAM_DIRECTORY, f"{arguments.mode}_{action}" - ) - pathfinder( - directories=[ - os.path.join(target_directory, "csv"), - os.path.join(target_directory, "json"), - ] - ) - save_data( - data=call_function, - save_to_dir=target_directory, - save_json=arguments.json, - save_csv=arguments.csv, - ) - - break - - # -------------------------------------------------------------------- # - - if not is_executed: - log.warning( - f"knewkarma {arguments.mode}: missing one or more expected argument(s)." - ) - - -# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # - - -def run_cli(): - """Main entrypoint for the Knew Karma command-line interface.""" - - # -------------------------------------------------------------------- # - - arguments: argparse = create_parser().parse_args() - start_time: datetime = datetime.now() - - # -------------------------------------------------------------------- # - - print( - """ + if arguments.mode and arguments.mode in function_mapping: + print( + """ ┓┏┓ ┓┏┓ ┃┫ ┏┓┏┓┓┏┏ ┃┫ ┏┓┏┓┏┳┓┏┓ ┛┗┛┛┗┗ ┗┻┛ ┛┗┛┗┻┛ ┛┗┗┗┻""" - ) - - # -------------------------------------------------------------------- # - - try: - start_time: datetime = datetime.now() - - log.info( - f"[bold]Knew Karma CLI[/] {version} started at " - f"{start_time.strftime('%a %b %d %Y, %I:%M:%S%p')}..." ) - asyncio.run(stage_cli(arguments=arguments)) - except KeyboardInterrupt: - log.warning(f"User interruption detected ([yellow]Ctrl+C[/])") - finally: - log.info(f"Stopped in {datetime.now() - start_time} seconds.") + try: + start_time: datetime = datetime.now() + + log.info( + f"[bold]Knew Karma CLI[/] {version} started at " + f"{start_time.strftime('%a %b %d %Y, %I:%M:%S%p')}..." + ) + asyncio.run(execute(arguments=arguments, function_mapping=function_mapping)) + except KeyboardInterrupt: + log.warning(f"User interruption detected ([yellow]Ctrl+C[/])") + finally: + log.info(f"Stopped in {datetime.now() - start_time} seconds.") + else: + parser.print_usage() # -------------------------------------------------------------------- # diff --git a/knewkarma/_parser.py b/knewkarma/_parser.py index f69d68a..eb659bd 100644 --- a/knewkarma/_parser.py +++ b/knewkarma/_parser.py @@ -1,6 +1,7 @@ # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # import argparse +from typing import get_args from rich.markdown import Markdown from rich_argparse import RichHelpFormatter @@ -13,6 +14,9 @@ subreddit_examples, operations_description, version, + POSTS_LISTINGS, + DATA_TIMEFRAME, + DATA_SORT_CRITERION, ) @@ -24,6 +28,7 @@ def create_parser() -> argparse.ArgumentParser: Creates and configures an argument parser for the command line arguments. :return: A configured argparse.ArgumentParser object ready to parse the command line arguments. + :rtype: argparse.ArgumentParser """ # -------------------------------------------------------------------- # @@ -33,7 +38,8 @@ def create_parser() -> argparse.ArgumentParser: formatter_class=RichHelpFormatter, ) subparsers = parser.add_subparsers( - dest="mode", help="operation mode", required=True + dest="mode", + help="operation mode", ) parser.add_argument( "-l", @@ -47,7 +53,7 @@ def create_parser() -> argparse.ArgumentParser: "--timeframe", type=str, default="all", - choices=["all", "hour", "day", "week", "month", "year"], + choices=list(get_args(DATA_TIMEFRAME)), help="timeframe to get ([bold][green]bulk[/][/]) data from (default: %(default)s)", ) parser.add_argument( @@ -55,15 +61,7 @@ def create_parser() -> argparse.ArgumentParser: "--sort", type=str, default="all", - choices=[ - "all", - "best", - "controversial", - "hot", - "new", - "rising", - "top", - ], + choices=list(get_args(DATA_SORT_CRITERION)), help="([bold][green]bulk[/][/]) sort criterion (default: %(default)s)", ) @@ -178,7 +176,7 @@ def create_parser() -> argparse.ArgumentParser: "--listing", default="all", help="get posts from a specified listing", - choices=["best", "controversial", "popular", "rising"], + choices=list(get_args(POSTS_LISTINGS)), ) return parser diff --git a/knewkarma/_project.py b/knewkarma/_project.py index 27f4d58..285e4b1 100644 --- a/knewkarma/_project.py +++ b/knewkarma/_project.py @@ -7,7 +7,7 @@ author: str = "Richard Mwewa" about_author: str = "https://rly0nheart.github.io" -version: str = "3.4.0.0" +version: str = "3.5.0.0" # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # @@ -96,7 +96,7 @@ # Construct path to the program's directory PROGRAM_DIRECTORY: str = os.path.expanduser(os.path.join("~", "knewkarma-data")) -DATA_SORT_CRITERION: Literal[ +DATA_SORT_CRITERION = Literal[ "controversial", "new", "top", @@ -104,7 +104,7 @@ "hot", "rising", ] - +POSTS_LISTINGS = Literal["best", "controversial", "popular", "rising"] DATA_TIMEFRAME = Literal["hour", "day", "week", "month", "year"] # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # diff --git a/knewkarma/_coreutils.py b/knewkarma/_utils.py similarity index 92% rename from knewkarma/_coreutils.py rename to knewkarma/_utils.py index e448eee..1f979ac 100644 --- a/knewkarma/_coreutils.py +++ b/knewkarma/_utils.py @@ -19,7 +19,9 @@ def unix_timestamp_to_utc(timestamp: int) -> str: Converts a UNIX timestamp to a formatted datetime.utc string. :param timestamp: The UNIX timestamp to be converted. + :type timestamp: int :return: A formatted datetime.utc string in the format "dd MMMM yyyy, hh:mm:ssAM/PM" + :rtype: str """ utc_from_timestamp: datetime = datetime.utcfromtimestamp(timestamp) datetime_string: str = utc_from_timestamp.strftime("%d %B %Y, %I:%M:%S%p") @@ -37,6 +39,7 @@ def filename_timestamp() -> str: :return: The formatted timestamp as a string. The format is "%d-%B-%Y-%I-%M-%S%p" for Windows and "%d-%B-%Y-%I:%M:%S%p" for non-Windows systems. + :rtype: str Example ------- @@ -59,6 +62,7 @@ def pathfinder(directories: list[str]): Creates directories in knewkarma-data directory of the user's home folder. :param directories: A list of file directories to create. + :type directories: list[str] """ for directory in directories: os.makedirs(directory, exist_ok=True) @@ -77,9 +81,13 @@ def save_data( Save the given (Reddit) data to a JSON/CSV file based on the save_csv and save_json parameters. :param data: The data to be saved, which can be a dict or a list of dicts. + :type data: Union[User, Subreddit, List[Union[Post, Comment]]] :param save_to_dir: Directory to save data to. + :type save_to_dir: str :param save_json: Used to get the True value and the filename for the created JSON file if specified. + :type save_json: bool :param save_csv: Used to get the True value and the filename for the created CSV file if specified. + :type save_csv: bool """ # -------------------------------------------------------------------- # @@ -137,7 +145,9 @@ def setup_logging(debug_mode: bool) -> logging.getLogger: Configure and return a logging object with the specified log level. :param debug_mode: A boolean value indicating whether log level should be set to DEBUG. + :type debug_mode: bool :return: A logging object configured with the specified log level. + :rtype: logging.getLogger """ from rich.logging import RichHandler diff --git a/knewkarma/api.py b/knewkarma/api.py index 2474f95..9d1108c 100644 --- a/knewkarma/api.py +++ b/knewkarma/api.py @@ -4,8 +4,8 @@ import aiohttp -from ._coreutils import log from ._project import version, about_author, DATA_SORT_CRITERION, DATA_TIMEFRAME +from ._utils import log # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # @@ -20,11 +20,14 @@ async def get_data(session: aiohttp.ClientSession, endpoint: str) -> Union[dict, list]: """ - Fetches JSON data from a given API endpoint. + Asynchronously fetches JSON data from a given API endpoint. :param session: aiohttp session to use for the request. + :type session: aiohttp.ClientSession :param endpoint: The API endpoint to fetch data from. + :type endpoint: str :return: Returns JSON data as a dictionary or list. Returns an empty dict if fetching fails. + :rtype: Union[dict, list] """ from sys import version as python_version @@ -66,8 +69,11 @@ def process_response( If it's a list, it ensures the list is not empty. :param response_data: The API response data to validate, which should be a dictionary or list. + :type response_data: Union[dict, list] :param valid_key: The key to check for in the data if it's a dictionary. + :type valid_key: str :return: The original data if valid, or an empty dictionary or list if invalid. + :rtype: Union[dict, list] """ if isinstance(response_data, dict): if valid_key: @@ -87,11 +93,12 @@ def process_response( async def get_updates(session: aiohttp.ClientSession): """ - Gets and compares the current program version with the remote version. + Asynchronously gets and compares the current program version with the remote version. Assumes version format: major.minor.patch.prefix :param session: aiohttp session to use for the request. + :type session: aiohttp.ClientSession """ import rich from rich.markdown import Markdown @@ -159,16 +166,20 @@ async def get_updates(session: aiohttp.ClientSession): async def get_profile( - profile_source: str, session: aiohttp.ClientSession, + profile_source: str, profile_type: Literal["user_profile", "subreddit_profile"], ) -> dict: """ - Gets profile data from the specified profile_type and profile_source. + Asynchronously gets profile data from the specified profile_type and profile_source. :param profile_source: Source to get profile data from. - :param session: aiohttp session to use for the request. + :type profile_source: str :param profile_type: The type of profile that is to be fetched. + :type profile_type: str + :param session: aiohttp session to use for the request. + :return: A dictionary object containing profile data from a selected source. + :rtype: dict """ # Use a dictionary for direct mapping source_map: dict = { @@ -192,10 +203,7 @@ async def get_profile( async def get_posts( - limit: int, session: aiohttp.ClientSession, - timeframe: DATA_TIMEFRAME, - sort: DATA_SORT_CRITERION, posts_type: Literal[ "user_posts", "user_comments", @@ -204,17 +212,28 @@ async def get_posts( "listing_posts", "front_page_posts", ], + limit: int, posts_source: str = None, -) -> list: + timeframe: DATA_TIMEFRAME = "all", + sort: DATA_SORT_CRITERION = "all", +) -> list[dict]: """ - Gets a specified number of posts, with a specified sorting criterion, from the specified source. + Asynchronously gets a specified number of posts, with a specified sorting criterion, from the specified source. - :param timeframe: Timeframe from which to get posts. - :param session: aiohttp session to use for the request. + :param posts_type: Type of posts to be fetched. + :type posts_type: str + :param posts_source: Source from where posts will be fetched. + :type posts_source: str :param limit: Maximum number of posts to get. + :type limit: int :param sort: Posts' sort criterion. - :param posts_type: Type of posts to be fetched - :param posts_source: Source from where posts will be fetched. + :type sort: str + :param timeframe: Timeframe from which to get posts. + :type timeframe: str + :param session: aiohttp session to use for the request. + :type session: aiohttp.ClientSession + :return: A list of dictionaries, each containing data of a post. + :rtype: list[dict] """ source_map = { "user_posts": f"{BASE_REDDIT_ENDPOINT}/user/{posts_source}/" diff --git a/knewkarma/base.py b/knewkarma/base.py index 26100a4..e2eadc2 100644 --- a/knewkarma/base.py +++ b/knewkarma/base.py @@ -4,8 +4,8 @@ import aiohttp -from ._coreutils import unix_timestamp_to_utc -from ._project import DATA_TIMEFRAME, DATA_SORT_CRITERION +from ._project import DATA_TIMEFRAME, DATA_SORT_CRITERION, POSTS_LISTINGS +from ._utils import unix_timestamp_to_utc from .api import get_profile, get_posts from .data import User, Subreddit, Comment, Post @@ -21,33 +21,25 @@ class RedditUser: def __init__( self, username: str, - data_limit: int, - data_timeframe: DATA_TIMEFRAME = "all", - data_sort: DATA_SORT_CRITERION = "all", ): """ Initialises a RedditUser instance for getting profile, posts and comments data from the specified user. :param username: Username of the user to get data from. - :param data_timeframe: The timeframe from which to get posts/comments - (choices: 'all', 'hour', 'day', 'week', 'month', 'year'). - :param data_sort: Sort criterion for the retrieved posts/comments - (choices: 'all', 'best', 'controversial', 'hot', 'new', 'rising', 'top'). - :param data_limit: The maximum number of user posts/comments to retrieve. + :type username: str """ self._username = username - self._data_timeframe = data_timeframe - self._data_sort = data_sort - self._data_limit = data_limit # -------------------------------------------------------------- # async def profile(self, session: aiohttp.ClientSession) -> User: """ - Gets a user's profile data. + Returns a user's profile data. :param session: aiohttp session to use for the request. + :type session: aiohttp.ClientSession :return: A User object containing user profile data. + :rtype: User """ user_profile: dict = await get_profile( profile_type="user_profile", profile_source=self._username, session=session @@ -74,19 +66,33 @@ async def profile(self, session: aiohttp.ClientSession) -> User: # -------------------------------------------------------------- # - async def posts(self, session: aiohttp.ClientSession) -> List[Post]: + async def posts( + self, + session: aiohttp.ClientSession, + limit: int, + sort: DATA_SORT_CRITERION = "all", + timeframe: DATA_TIMEFRAME = "all", + ) -> list[Post]: """ - Gets a user's posts. - - :param session: aiohttp session to use for the request. - :return: A list of Post objects, each containing a post's data. + Returns a user's posts. + + :param session: Aiohttp session to use for the request. + :type session: aiohttp.ClientSession. + :param limit: Maximum number of posts to return. + :type limit: int + :param sort: Sort criterion for the posts. + :type sort: str + :param timeframe: Timeframe from which to get posts. + :type timeframe: str + :return: A list of Post objects, each containing data about a post. + :rtype: list[Post] """ user_posts: list = await get_posts( posts_type="user_posts", posts_source=self._username, - timeframe=self._data_timeframe, - sort=self._data_sort, - limit=self._data_limit, + limit=limit, + sort=sort, + timeframe=timeframe, session=session, ) @@ -94,20 +100,34 @@ async def posts(self, session: aiohttp.ClientSession) -> List[Post]: # -------------------------------------------------------------- # - async def comments(self, session: aiohttp.ClientSession) -> List[Comment]: + async def comments( + self, + session: aiohttp.ClientSession, + limit: int, + sort: DATA_SORT_CRITERION = "all", + timeframe: DATA_TIMEFRAME = "all", + ) -> list[Comment]: """ - Gets a user's comments. - - :param session: aiohttp session to use for the request. - :return:A list of Comment objects, each containing a comment's data. + Returns a user's comments. + + :param session: Aiohttp session to use for the request. + :type session: aiohttp.ClientSession. + :param limit: Maximum number of comments to return. + :type limit: int + :param sort: Sort criterion for the comments. + :type sort: str + :param timeframe: Timeframe from which to get comments. + :type timeframe: str + :return: A list of Comment objects, each containing data about a comment. + :rtype: list[Comment] """ comments_list: list = [] raw_comments: list = await get_posts( posts_type="user_comments", posts_source=self._username, - timeframe=self._data_timeframe, - sort=self._data_sort, - limit=self._data_limit, + limit=limit, + sort=sort, + timeframe=timeframe, session=session, ) @@ -155,33 +175,25 @@ class RedditSub: def __init__( self, subreddit: str, - data_limit: int, - data_timeframe: DATA_TIMEFRAME = "all", - data_sort: DATA_SORT_CRITERION = "all", ): """ Initialises a RedditSub instance for getting profile and posts from the specified subreddit. :param subreddit: Name of the subreddit to get data from. - :param data_timeframe: The timeframe from which to get posts - (choices: 'all', 'hour', 'day', 'week', 'month', 'year'). - :param data_sort: Sort criterion for the retrieved posts - (choices: 'all', 'best', 'controversial', 'hot', 'new', 'rising', 'top'). - :param data_limit: The maximum number of subreddit posts to retrieve. + :type subreddit: str """ self._subreddit = subreddit - self._data_timeframe = data_timeframe - self._data_sort = data_sort - self._data_limit = data_limit # -------------------------------------------------------------- # async def profile(self, session: aiohttp.ClientSession) -> Subreddit: """ - Gets a subreddit's profile data. + Returns a subreddit's profile data. :param session: aiohttp session to use for the request. + :type session: aiohttp.ClientSession :return: A Subreddit object containing subreddit profile data. + :rtype: Subreddit """ subreddit_profile: dict = await get_profile( profile_type="subreddit_profile", @@ -208,19 +220,33 @@ async def profile(self, session: aiohttp.ClientSession) -> Subreddit: # -------------------------------------------------------------- # - async def posts(self, session: aiohttp.ClientSession) -> List[Post]: + async def posts( + self, + session: aiohttp.ClientSession, + limit: int, + sort: DATA_SORT_CRITERION = "all", + timeframe: DATA_TIMEFRAME = "all", + ) -> List[Post]: """ - Gets a subreddit's posts. - - :param session: aiohttp session to use for the request. - :return: A list of Post objects, each containing a post's data. + Returns a subreddit's posts. + + :param session: Aiohttp session to use for the request. + :type session: aiohttp.ClientSession. + :param limit: Maximum number of posts to return. + :type limit: int + :param sort: Sort criterion for the posts. + :type sort: str + :param timeframe: Timeframe from which to get posts. + :type timeframe: str + :return: A list of Post objects, each containing data about a post. + :rtype: list[Post] """ subreddit_posts: list = await get_posts( posts_type="subreddit_posts", posts_source=self._subreddit, - timeframe=self._data_timeframe, - sort=self._data_sort, - limit=self._data_limit, + limit=limit, + sort=sort, + timeframe=timeframe, session=session, ) @@ -233,29 +259,6 @@ async def posts(self, session: aiohttp.ClientSession) -> List[Post]: class RedditPosts: """Represents Reddit posts and provides method for getting posts from various sources.""" - # -------------------------------------------------------------- # - - def __init__( - self, - limit: int, - timeframe: DATA_TIMEFRAME = "all", - sort: DATA_SORT_CRITERION = "all", - ): - """ - Initializes a RedditPosts instance for getting posts from various sources. - - :param timeframe: The timeframe from which to get posts - (choices: 'all', 'hour', 'day', 'week', 'month', 'year'). - :param sort: Sort criterion for the retrieved posts - (choices: 'all', 'best', 'controversial', 'hot', 'new', 'rising', 'top'). - :param limit: The maximum number of posts to retrieve. - """ - self._timeframe = timeframe - self._sort = sort - self._limit = limit - - # -------------------------------------------------------------- # - @staticmethod def process_posts(raw_posts: list) -> List[Post]: posts_list: list = [] @@ -297,20 +300,36 @@ def process_posts(raw_posts: list) -> List[Post]: # -------------------------------------------------------------- # - async def search(self, query: str, session: aiohttp.ClientSession) -> List[Post]: + @staticmethod + async def search( + session: aiohttp.ClientSession, + query: str, + limit: int, + sort: DATA_SORT_CRITERION = "all", + timeframe: DATA_TIMEFRAME = "all", + ) -> List[Post]: """ - Searches for posts on Reddit based on a search query. + Returns posts that match a specified query.. + :param session: Aiohttp session to use for the request. + :type session: aiohttp.ClientSession. :param query: Search query. - :param session: aiohttp session to use for the request. - :return: A list of Post objects, each containing a post's data. + :type query: str + :param limit: Maximum number of posts to return. + :type limit: int + :param sort: Sort criterion for the posts. + :type sort: str + :param timeframe: Timeframe from which to get posts. + :type timeframe: str + :return: A list of Post objects, each containing data about a post. + :rtype: list[Post] """ search_posts: list = await get_posts( posts_type="search_posts", posts_source=query, - timeframe=self._timeframe, - sort=self._sort, - limit=self._limit, + limit=limit, + sort=sort, + timeframe=timeframe, session=session, ) @@ -318,23 +337,36 @@ async def search(self, query: str, session: aiohttp.ClientSession) -> List[Post] # -------------------------------------------------------------- # + @staticmethod async def listing( - self, listings_name: str, session: aiohttp.ClientSession + session: aiohttp.ClientSession, + listings_name: POSTS_LISTINGS, + limit: int, + sort: DATA_SORT_CRITERION = "all", + timeframe: DATA_TIMEFRAME = "all", ) -> List[Post]: """ - Gets posts from a specified listing. - - :param listings_name: name of listing to get posts from - (choices: 'all', 'best', 'controversial', 'popular', 'rising') - :param session: aiohttp session to use for the request. - :return: A list of Post objects, each containing a post's data. + Returns posts from a specified listing. + + :param session: Aiohttp session to use for the request. + :type session: aiohttp.ClientSession. + :param listings_name: Listing to get posts from.. + :type listings_name: str + :param limit: Maximum number of posts to return. + :type limit: int + :param sort: Sort criterion for the posts. + :type sort: str + :param timeframe: Timeframe from which to get posts. + :type timeframe: str + :return: A list of Post objects, each containing data about a post. + :rtype: list[Post] """ listing_posts: list = await get_posts( posts_type="listing_posts", posts_source=listings_name, - timeframe=self._timeframe, - sort=self._sort, - limit=self._limit, + limit=limit, + sort=sort, + timeframe=timeframe, session=session, ) @@ -342,18 +374,32 @@ async def listing( # -------------------------------------------------------------- # - async def front_page(self, session: aiohttp.ClientSession) -> List[Post]: + @staticmethod + async def front_page( + session: aiohttp.ClientSession, + limit: int, + sort: DATA_SORT_CRITERION = "all", + timeframe: DATA_TIMEFRAME = "all", + ) -> List[Post]: """ - Gets posts from the Reddit front-page. - - :param session: aiohttp session to use for the request. - :return: A list of Post objects, each containing a post's data. + Returns posts from the Reddit front-page. + + :param session: Aiohttp session to use for the request. + :type session: aiohttp.ClientSession. + :param limit: Maximum number of posts to return. + :type limit: int + :param sort: Sort criterion for the posts. + :type sort: str + :param timeframe: Timeframe from which to get posts. + :type timeframe: str + :return: A list of Post objects, each containing data about a post. + :rtype: list[Post] """ front_page_posts: list = await get_posts( posts_type="front_page_posts", - timeframe=self._timeframe, - sort=self._sort, - limit=self._limit, + limit=limit, + sort=sort, + timeframe=timeframe, session=session, ) diff --git a/pyproject.toml b/pyproject.toml index d317f54..841b75e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "knewkarma" -version = "3.4.0.0" +version = "3.5.0.0" description = "A Reddit Data Analysis Toolkit." authors = ["Richard Mwewa "] readme = "README.md" @@ -37,4 +37,4 @@ pytest-cov = "*" pytest-html = "*" [tool.poetry.scripts] -knewkarma = "knewkarma._cli:run_cli" +knewkarma = "knewkarma._cli:stage" diff --git a/tests/test_api.py b/tests/test_api.py index be089a7..223b8c8 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -54,8 +54,8 @@ async def test_get_posts(): user_posts: list = await get_posts( posts_type="user_posts", posts_source=TEST_USERNAME, - sort="all", - timeframe="all", + sort="top", + timeframe="year", limit=100, session=session, ) @@ -100,7 +100,6 @@ async def test_get_posts(): posts_type="search_posts", posts_source="covid-19", sort="controversial", - timeframe="all", limit=5, session=session, ) From e880638e2c487eacde82d186d5ac4281c453adcc Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:27:17 +0200 Subject: [PATCH 2/5] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 65b4bdc..dd1b880 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![knewkarma](https://github.com/bellingcat/knewkarma/assets/74001397/45262d9d-6633-418d-9ace-7c3c88b5ca36) +![knewkarma-cli](https://github.com/bellingcat/knewkarma/assets/74001397/77e603a3-6830-464c-a7db-da8a724bde2d) A **Reddit** Data Analysis Toolkit. From d32c503920ef7e1afaa64f89c62dab33b7f624b1 Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:31:44 +0200 Subject: [PATCH 3/5] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dd1b880..8b86046 100644 --- a/README.md +++ b/README.md @@ -137,5 +137,5 @@ asyncio.run(async_posts()) *[Refer to the Wiki](https://github.com/bellingcat/knewkarma/wiki) for Installation, Usage and Uninstallation instructions.* *** -[![me](https://github.com/bellingcat/knewkarma/assets/74001397/efd19c7e-9840-4969-b33c-04087e73e4da)](https://about.me/rly0nheart) +[![me](https://github.com/bellingcat/knewkarma/assets/74001397/efd19c7e-9840-4969-b33c-04087e73e4da)](https://rly0nheart.github.io) From 1122ae9753a4b568c18fcaab440381d043e7c375 Mon Sep 17 00:00:00 2001 From: rly0nheart <74001397+rly0nheart@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:37:14 +0200 Subject: [PATCH 4/5] Setting limit, sort and timeframe parameters for individual bulk data functions. Fix sort and limit bug in CLI. Major code fixes and refactoring. Rename _project.py to _meta.py --- knewkarma/_cli.py | 2 +- knewkarma/{_project.py => _meta.py} | 0 knewkarma/_parser.py | 2 +- knewkarma/api.py | 2 +- knewkarma/base.py | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename knewkarma/{_project.py => _meta.py} (100%) diff --git a/knewkarma/_cli.py b/knewkarma/_cli.py index d01d3ca..c9309dc 100644 --- a/knewkarma/_cli.py +++ b/knewkarma/_cli.py @@ -8,8 +8,8 @@ import aiohttp from rich.pretty import pprint +from ._meta import PROGRAM_DIRECTORY from ._parser import create_parser, version -from ._project import PROGRAM_DIRECTORY from ._utils import log, save_data, pathfinder from .api import get_updates from .base import RedditUser, RedditSub, RedditPosts diff --git a/knewkarma/_project.py b/knewkarma/_meta.py similarity index 100% rename from knewkarma/_project.py rename to knewkarma/_meta.py diff --git a/knewkarma/_parser.py b/knewkarma/_parser.py index eb659bd..70933a6 100644 --- a/knewkarma/_parser.py +++ b/knewkarma/_parser.py @@ -6,7 +6,7 @@ from rich.markdown import Markdown from rich_argparse import RichHelpFormatter -from ._project import ( +from ._meta import ( description, epilog, posts_examples, diff --git a/knewkarma/api.py b/knewkarma/api.py index 9d1108c..d02209a 100644 --- a/knewkarma/api.py +++ b/knewkarma/api.py @@ -4,7 +4,7 @@ import aiohttp -from ._project import version, about_author, DATA_SORT_CRITERION, DATA_TIMEFRAME +from ._meta import version, about_author, DATA_SORT_CRITERION, DATA_TIMEFRAME from ._utils import log # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # diff --git a/knewkarma/base.py b/knewkarma/base.py index e2eadc2..7898dad 100644 --- a/knewkarma/base.py +++ b/knewkarma/base.py @@ -4,7 +4,7 @@ import aiohttp -from ._project import DATA_TIMEFRAME, DATA_SORT_CRITERION, POSTS_LISTINGS +from ._meta import DATA_TIMEFRAME, DATA_SORT_CRITERION, POSTS_LISTINGS from ._utils import unix_timestamp_to_utc from .api import get_profile, get_posts from .data import User, Subreddit, Comment, Post From d0bd3780bf4fd303cf564f06fb6ab9b9718c849c Mon Sep 17 00:00:00 2001 From: rly0nheart <74001397+rly0nheart@users.noreply.github.com> Date: Fri, 22 Dec 2023 12:54:05 +0000 Subject: [PATCH 5/5] Bump version (3.5.0.0) --- Knew Karma GUI/KnewKarma/KnewKarma.vbproj | 6 +++--- Knew Karma GUI/KnewKarmaSetup/KnewKarmaSetup.vdproj | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Knew Karma GUI/KnewKarma/KnewKarma.vbproj b/Knew Karma GUI/KnewKarma/KnewKarma.vbproj index a21a021..8d97f60 100644 --- a/Knew Karma GUI/KnewKarma/KnewKarma.vbproj +++ b/Knew Karma GUI/KnewKarma/KnewKarma.vbproj @@ -12,11 +12,11 @@ https://github.com/bellingcat/knewkarma README.md https://github.com/bellingcat/knewkarma - 3.4.0.0 - 3.4.0.0 + 3.5.0.0 + 3.5.0.0 LICENSE True - 3.4.0 + 3.5.0 reddit;scraper;reddit-scraper;osint;reddit-data 6.0-recommended diff --git a/Knew Karma GUI/KnewKarmaSetup/KnewKarmaSetup.vdproj b/Knew Karma GUI/KnewKarmaSetup/KnewKarmaSetup.vdproj index 559bb4f..52a4eff 100644 --- a/Knew Karma GUI/KnewKarmaSetup/KnewKarmaSetup.vdproj +++ b/Knew Karma GUI/KnewKarmaSetup/KnewKarmaSetup.vdproj @@ -229,15 +229,15 @@ { "Name" = "8:Microsoft Visual Studio" "ProductName" = "8:Knew Karma" - "ProductCode" = "8:{54746087-7636-4C00-845D-253E0D22CBA6}" - "PackageCode" = "8:{64395976-B1A2-4CBC-95D0-0AC31AA608D1}" + "ProductCode" = "8:{C7B6617B-86DB-4813-833F-8CDF88EC9A9E}" + "PackageCode" = "8:{9CCEBCAF-AF8F-4CAC-AC25-3EEED66DE5B9}" "UpgradeCode" = "8:{9B03AD0F-0C14-4075-AB75-01CD38A594B4}" "AspNetVersion" = "8:2.0.50727.0" "RestartWWWService" = "11:FALSE" "RemovePreviousVersions" = "11:TRUE" "DetectNewerInstalledVersion" = "11:TRUE" "InstallAllUsers" = "11:FALSE" - "ProductVersion" = "8:3.4.0" + "ProductVersion" = "8:3.5.0" "Manufacturer" = "8:Richard Mwewa" "ARPHELPTELEPHONE" = "8:" "ARPHELPLINK" = "8:https://github.com/bellingcat/knewkarma/wiki"