From f6f0cfc916bd670704a53862e0d1059ebab8ea6a Mon Sep 17 00:00:00 2001 From: Kristijan Armeni Date: Tue, 8 Apr 2025 15:51:20 -0400 Subject: [PATCH 01/12] [ENH] update interface to rely on post column, add users col --- analyzers/hashtags/interface.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/analyzers/hashtags/interface.py b/analyzers/hashtags/interface.py index ad71f25d..a4674c96 100644 --- a/analyzers/hashtags/interface.py +++ b/analyzers/hashtags/interface.py @@ -8,18 +8,19 @@ COL_AUTHOR_ID = "user_id" COL_TIME = "time" -COL_HASHTAGS = "hashtags" +COL_POST = "text" -OUTPUT_GINI = "gini_coef" -OUTPUT_COL_TIMESPAN = "time_span" +OUTPUT_GINI = "hashtag_analysis" +OUTPUT_COL_USERS = "users" +OUTPUT_COL_TIMESPAN = "timewindow_start" OUTPUT_COL_GINI = "gini" OUTPUT_COL_COUNT = "count" -OUTPUT_COL_HASHTAGS = COL_HASHTAGS +OUTPUT_COL_HASHTAGS = "hashtags" interface = AnalyzerInterface( id="hashtags", version="0.1.0", - name="hashtags", + name="Hashtag analysis", short_description="Computes the gini coefficient over hashtag usage", long_description=""" Analysis of hashtags measures the extent of online coordination among social media users @@ -55,10 +56,10 @@ ], ), InputColumn( - name=COL_HASHTAGS, + name=COL_POST, data_type="text", - description="The column containing the hashtags associated with the message", - name_hints=["hashtags", "tags", "topics", "keywords"], + description="The column containing the tweet and hashtags associated with the message", + name_hints=["text", "tweet", "post", "tweet_post", "message"], ), InputColumn( name=COL_TIME, @@ -82,6 +83,7 @@ columns=[ OutputColumn(name=OUTPUT_COL_TIMESPAN, data_type="datetime"), OutputColumn(name=OUTPUT_COL_GINI, data_type="float"), + OutputColumn(name=OUTPUT_COL_USERS, data_type="text"), OutputColumn(name=OUTPUT_COL_COUNT, data_type="integer"), OutputColumn(name=OUTPUT_COL_HASHTAGS, data_type="text"), ], From 55930d459ca564f585b01b015599982290053623 Mon Sep 17 00:00:00 2001 From: Kristijan Armeni Date: Tue, 8 Apr 2025 15:53:46 -0400 Subject: [PATCH 02/12] [ENH] update gini() to operate on pl.Series --- analyzers/hashtags/main.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/analyzers/hashtags/main.py b/analyzers/hashtags/main.py index 752534c4..4f49eb17 100644 --- a/analyzers/hashtags/main.py +++ b/analyzers/hashtags/main.py @@ -21,21 +21,20 @@ NULL_CHAR = "[]" # this is taken as the null character for hashtags -def gini(x): +def gini(x: pl.Series) -> float: """ Parameters ---------- - x : list[str] - List of values for which to compute the Gini coefficient + x : pl.Series + polars Series containing values for which to compute the Gini coefficient Returns ------- float - Gini coefficient + Gini coefficient (between 0.0 and 1.0) """ - x_counts = Counter(x).values() + sorted_x = x.value_counts().sort(by="count", descending=False)[:, 1].to_list() - sorted_x = sorted(x_counts) n = len(sorted_x) cumx = list(accumulate(sorted_x)) @@ -43,7 +42,6 @@ def gini(x): def main(context: PrimaryAnalyzerContext): - input_reader = context.input() df_input = input_reader.preprocess(pl.read_parquet(input_reader.parquet_path)) From f9ff31b434711e55cd1413f47c98df695f81fead Mon Sep 17 00:00:00 2001 From: Kristijan Armeni Date: Tue, 8 Apr 2025 16:26:02 -0400 Subject: [PATCH 03/12] [ENH] wrap analysis code into hashtag_analyzer() --- analyzers/hashtags/main.py | 82 +++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/analyzers/hashtags/main.py b/analyzers/hashtags/main.py index 4f49eb17..0329e0dc 100644 --- a/analyzers/hashtags/main.py +++ b/analyzers/hashtags/main.py @@ -7,16 +7,17 @@ from .interface import ( COL_AUTHOR_ID, - COL_HASHTAGS, + COL_POST, COL_TIME, OUTPUT_COL_COUNT, OUTPUT_COL_GINI, OUTPUT_COL_HASHTAGS, + OUTPUT_COL_USERS, OUTPUT_GINI, ) -# let's look at the hashtags column -COLS_ALL = [COL_AUTHOR_ID, COL_TIME, COL_HASHTAGS] +# a handy variable +COLS_ALL = [COL_AUTHOR_ID, COL_TIME, COL_POST] NULL_CHAR = "[]" # this is taken as the null character for hashtags @@ -41,50 +42,49 @@ def gini(x: pl.Series) -> float: return (n + 1 - 2 * sum(cumx) / cumx[-1]) / n -def main(context: PrimaryAnalyzerContext): - input_reader = context.input() - df_input = input_reader.preprocess(pl.read_parquet(input_reader.parquet_path)) +def hashtag_analysis(data_frame: pl.DataFrame, every="1h") -> pl.DataFrame: + # define the expressions + has_hashtag_symbols = pl.col(COL_POST).str.contains("#").any() + extract_hashtags = pl.col(COL_POST).str.extract_all(r"(#\S+)") - # assign None to messages with no hashtags - df_input = df_input.with_columns( - pl.when(pl.col(COL_HASHTAGS) == NULL_CHAR) - .then(None) - .otherwise( - pl.col(COL_HASHTAGS) - .str.strip_chars("[]") - .str.replace_all("'", "") - .str.replace_all(" ", "") - .str.split(",") - ) # split hashtags into List[str] - .name.keep() - ) + # if hashtag symbol is detected, extract with regex + if data_frame.select(has_hashtag_symbols).item(): + df_input = data_frame.with_columns(extract_hashtags).filter( + pl.col(COL_POST) != [] + ) - # select columns - df_input = df_input.select(pl.col(COLS_ALL)) + else: # otherwise, we assume str: "['hashtag1', 'hashtag2', ...]" + raise ValueError(f"The data in {COL_POST} column appear to have no hashtags.") - df_agg = ( - df_input.filter(pl.col(COL_HASHTAGS).is_not_null()) - .select( - pl.col(COL_TIME), - pl.col(COL_HASHTAGS), - ) - .sort(COL_TIME) - .group_by_dynamic(COL_TIME, every="1h") # this could be a parameter + # select columns and sort + df_input = df_input.select(pl.col(COLS_ALL)).sort(pl.col(COL_TIME)) + + # compute gini per timewindow + df_out = ( + df_input.explode(pl.col(COL_POST)) + .with_columns(window_start=pl.col(COL_TIME).dt.truncate(every)) + .group_by("window_start") .agg( - pl.col(COL_HASHTAGS).explode().alias(OUTPUT_COL_HASHTAGS), - pl.col(COL_HASHTAGS).explode().count().alias(OUTPUT_COL_COUNT), - pl.col(COL_HASHTAGS) - .explode() - .map_elements( - lambda x: gini(x.to_list()), - return_dtype=pl.Float32, - returns_scalar=True, - ) + pl.col(COL_AUTHOR_ID).alias(OUTPUT_COL_USERS), + pl.col(COL_POST).alias(OUTPUT_COL_HASHTAGS), + pl.col(COL_POST).count().alias(OUTPUT_COL_COUNT), + pl.col(COL_POST) + .map_batches(gini, returns_scalar=True) .alias(OUTPUT_COL_GINI), ) ) - print("Output preview:") - print(df_agg.head()) + return df_out + + +def main(context: PrimaryAnalyzerContext): + input_reader = context.input() + df_input = input_reader.preprocess(pl.read_parquet(input_reader.parquet_path)) + + # window hard-coded to 1hr for now + df_out = hashtag_analysis( + data_frame=df_input, + every="1hr", + ) - df_agg.write_parquet(context.output(OUTPUT_GINI).parquet_path) + df_out.write_parquet(context.output(OUTPUT_GINI).parquet_path) From 15eba19c53e0cdcd4e7ddef9459a1fb6195d1934 Mon Sep 17 00:00:00 2001 From: Kristijan Armeni Date: Tue, 8 Apr 2025 16:55:43 -0400 Subject: [PATCH 04/12] [ENH] check for time column dtype --- analyzers/hashtags/main.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/analyzers/hashtags/main.py b/analyzers/hashtags/main.py index 0329e0dc..ccc7e0ca 100644 --- a/analyzers/hashtags/main.py +++ b/analyzers/hashtags/main.py @@ -43,6 +43,11 @@ def gini(x: pl.Series) -> float: def hashtag_analysis(data_frame: pl.DataFrame, every="1h") -> pl.DataFrame: + if not isinstance(data_frame.schema[COL_TIME], pl.Datetime): + data_frame = data_frame.with_columns( + pl.col(COL_TIME).str.to_datetime().alias(COL_TIME) + ) + # define the expressions has_hashtag_symbols = pl.col(COL_POST).str.contains("#").any() extract_hashtags = pl.col(COL_POST).str.extract_all(r"(#\S+)") From 1dc99a79786bc792e433735784836f07812c3b04 Mon Sep 17 00:00:00 2001 From: Kristijan Armeni Date: Tue, 8 Apr 2025 19:20:16 -0400 Subject: [PATCH 05/12] [ENH] fix column naming, set defaul window to 12h --- analyzers/hashtags/main.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/analyzers/hashtags/main.py b/analyzers/hashtags/main.py index ccc7e0ca..06622db2 100644 --- a/analyzers/hashtags/main.py +++ b/analyzers/hashtags/main.py @@ -12,6 +12,7 @@ OUTPUT_COL_COUNT, OUTPUT_COL_GINI, OUTPUT_COL_HASHTAGS, + OUTPUT_COL_TIMESPAN, OUTPUT_COL_USERS, OUTPUT_GINI, ) @@ -67,8 +68,8 @@ def hashtag_analysis(data_frame: pl.DataFrame, every="1h") -> pl.DataFrame: # compute gini per timewindow df_out = ( df_input.explode(pl.col(COL_POST)) - .with_columns(window_start=pl.col(COL_TIME).dt.truncate(every)) - .group_by("window_start") + .with_columns(pl.col(COL_TIME).dt.truncate(every).alias(OUTPUT_COL_TIMESPAN)) + .group_by(OUTPUT_COL_TIMESPAN) .agg( pl.col(COL_AUTHOR_ID).alias(OUTPUT_COL_USERS), pl.col(COL_POST).alias(OUTPUT_COL_HASHTAGS), @@ -79,6 +80,11 @@ def hashtag_analysis(data_frame: pl.DataFrame, every="1h") -> pl.DataFrame: ) ) + # convert datetime back to string + df_out = df_out.with_columns( + pl.col(OUTPUT_COL_TIMESPAN).dt.to_string("%Y-%m-%d %H:%M:%S") + ) + return df_out @@ -89,7 +95,7 @@ def main(context: PrimaryAnalyzerContext): # window hard-coded to 1hr for now df_out = hashtag_analysis( data_frame=df_input, - every="1hr", + every="12h", ) df_out.write_parquet(context.output(OUTPUT_GINI).parquet_path) From d9d7c4dac3fe4dfd9a2252eb34dcf4058330aef7 Mon Sep 17 00:00:00 2001 From: Kristijan Armeni Date: Tue, 8 Apr 2025 19:25:58 -0400 Subject: [PATCH 06/12] [ENH] add data and tests for hashtag_analyzer --- analyzers/hashtags/test_data/__init__.py | 3 + .../hashtags/test_data/hashtag_test_input.csv | 101 ++++++++++++++++++ .../test_data/hashtag_test_output.json | 1 + analyzers/hashtags/test_hashtags_analyzer.py | 44 ++++++++ 4 files changed, 149 insertions(+) create mode 100644 analyzers/hashtags/test_data/__init__.py create mode 100644 analyzers/hashtags/test_data/hashtag_test_input.csv create mode 100644 analyzers/hashtags/test_data/hashtag_test_output.json create mode 100644 analyzers/hashtags/test_hashtags_analyzer.py diff --git a/analyzers/hashtags/test_data/__init__.py b/analyzers/hashtags/test_data/__init__.py new file mode 100644 index 00000000..f28e5076 --- /dev/null +++ b/analyzers/hashtags/test_data/__init__.py @@ -0,0 +1,3 @@ +import os + +test_data_dir = os.path.dirname(os.path.abspath(__file__)) diff --git a/analyzers/hashtags/test_data/hashtag_test_input.csv b/analyzers/hashtags/test_data/hashtag_test_input.csv new file mode 100644 index 00000000..8214cc83 --- /dev/null +++ b/analyzers/hashtags/test_data/hashtag_test_input.csv @@ -0,0 +1,101 @@ +user_id,time,text +user05,2025-03-25T18:17:05.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. #happy #fitness" +user01,2025-03-26T01:10:16.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam es #fun #friends #art" +user09,2025-03-26T08:59:19.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id" +user03,2025-03-26T10:11:55.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, #running" +user04,2025-03-26T10:55:41.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est #reading #sunset" +user02,2025-03-26T11:27:53.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libe #photography #fitness #friends" +user06,2025-03-26T13:40:44.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, eges #fun" +user02,2025-03-26T16:39:50.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue" +user02,2025-03-26T18:45:16.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero #health #sunset #photography" +user05,2025-03-26T22:26:19.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam e #photography #smile" +user06,2025-03-27T03:35:14.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Et #health #nature #fitness" +user02,2025-03-27T08:32:27.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, e #travel" +user06,2025-03-27T08:41:07.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, eges #art" +user04,2025-03-27T13:04:18.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est liber #book #book" +user10,2025-03-27T16:47:04.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. #photography" +user01,2025-03-27T17:32:02.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etia #family #book #friends" +user10,2025-03-28T07:45:41.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etia #love #friends #coffee" +user05,2025-03-28T09:47:49.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget #food" +user06,2025-03-28T11:18:54.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam #family #love #music" +user02,2025-03-28T15:05:21.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Eti #coffee #smile #running" +user08,2025-03-28T18:07:38.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Et #travel #reading #sunset" +user02,2025-03-28T23:45:27.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros accumsan, finibus tellus dignissim, vehicula nisl. Nam tristique est non est fermentum, id aliquet magna mattis. Nulla venenatis metus nisl, id pulvinar nibh faucibus et. Suspendisse finibus ut odio id eleifend. Ut consequat magna vel viverra porttitor. Donec in elit efficitur, semper nibh non, fringilla sapien. Morbi egestas ipsum risus, fringilla mollis lorem sodales dictum. Nam gravida orci vitae urna venenatis, eget pharetra nisi ultrices. Etiam nec nisi pretium, consectetur magna sed, eleifend lectus. Nunc nec urna at ipsum porttitor mattis nec in enim. Phasellus ut ornare nunc. Etiam bibendum iaculis ante, sit amet gravida ex consequat id. Sed hendrerit tellus id nulla lacinia euismod. Sed elementum ante lorem. Sed sed ultrices velit. Vestibulum velit mi, commodo vel elit ac, dignissim blandit nunc. Sed condimentum nisi at euismod rutrum. Phasellus feugiat dignissim sodales. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nullam in accumsan nunc. Cras fermentum orci in" +user04,2025-03-29T00:21:57.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est li #food #weekend" +user01,2025-03-29T04:18:44.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, e #coffee" +user04,2025-03-29T08:25:25.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, #friends" +user10,2025-03-29T12:04:37.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros accumsan, finibus tellus" +user10,2025-03-29T14:36:45.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. E #friends #sunset #reading" +user08,2025-03-29T23:36:14.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam es #photography #love" +user01,2025-03-30T00:42:21.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero #fun #love" +user10,2025-03-30T00:46:57.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, eg #music" +user09,2025-03-30T03:01:34.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur #fitness #happy #photography" +user08,2025-03-30T03:05:08.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam #health #art #reading" +user02,2025-03-30T04:28:27.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est l #smile #fitness" +user01,2025-03-30T06:35:19.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros accumsan, finibus tellus dignissim, vehicula nisl. Nam tristique est non est fermentum, id aliquet magna mattis. Nulla venenatis metus nisl, id pulvinar nibh faucibus et. Suspendisse finibus ut odio id eleifend. Ut consequat magna vel viverra porttitor. Donec in elit efficitur, semper nibh non, fringilla sapien. Morbi egestas ipsum risus, fringilla mollis lorem sodales dictum. Nam gravida orci vitae urna venenatis, eget pharetra nisi ultrices. Etiam nec nisi pretium, consectetur magna sed, eleifend lectus. Nunc nec urna at ipsum porttitor mattis nec in enim. Phasellus ut ornare nunc. Etiam bibendum iaculis ante, sit amet gravida ex consequat" +user05,2025-03-30T07:37:14.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est lib #reading #art" +user09,2025-03-30T10:43:57.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Eti #book #friends #weekend" +user06,2025-03-30T14:08:06.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis" +user08,2025-03-30T14:42:30.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est l #weekend #smile" +user03,2025-03-30T18:35:26.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est #travel #weekend" +user06,2025-03-31T01:02:46.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros accumsan, finibus tellus dignissim, vehicula nisl. Nam tristique est" +user08,2025-03-31T08:23:33.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non" +user09,2025-03-31T08:58:18.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est l #friends #music" +user09,2025-03-31T14:48:47.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam #photography #health" +user03,2025-03-31T18:07:04.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est l #family #health" +user09,2025-04-01T00:31:23.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam e #smile #book #happy" +user03,2025-04-01T03:05:20.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros accumsan, finibus tellus dignissim, vehicula nisl. Nam tristique est non est fermentum, id aliquet magna mattis. Nulla venenatis metus nisl, id pulvinar nibh faucibus et. Suspendisse finibus ut odio id eleifend. Ut consequat magna vel viverra porttitor. Donec in elit efficitur, semper nibh non, fringilla sapien. Morbi egestas ipsum risus, fringilla mollis lorem sodales dictum. Nam gravida orci vitae urna venenatis, eget pharetra nisi ultrices. Etiam nec nisi pretium, consectetur magna sed, eleifend lectus. Nunc nec urna at ipsum porttitor mattis nec in" +user09,2025-04-01T03:32:54.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros accumsan, finibus tellus dignissim, vehicula nisl. Nam tristique est non est fermentum, id aliquet magna mattis. Nulla venenatis metus nisl, id pulvinar nibh faucibus et. Suspendisse finibus ut odio id eleifend. Ut consequat magna vel viverra porttitor. Donec in elit efficitur, semper nibh non," +user06,2025-04-01T07:54:50.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam #sunset #coffee #fun" +user02,2025-04-01T09:15:28.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est l #family #health" +user06,2025-04-01T19:22:46.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est l #health #family" +user06,2025-04-01T23:56:55.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, eg #happy" +user10,2025-04-02T00:21:12.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam #travel #smile #happy" +user01,2025-04-02T03:39:20.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam #love #sunset #travel" +user02,2025-04-02T04:05:17.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, #running" +user10,2025-04-02T06:45:52.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros accumsan, finibus tellus dignissim, vehicula nisl. Nam tristique est non est fermentum, id aliquet magna mattis. Nulla venenatis metus nisl, id pulvinar nibh faucibus et. Suspendisse finibus ut odio id eleifend. Ut consequat" +user09,2025-04-02T08:46:41.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est #sunset #fitness" +user02,2025-04-02T11:08:00.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam #photography #family" +user03,2025-04-02T14:24:16.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam #art #nature #reading" +user06,2025-04-02T17:30:22.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, eg #music" +user02,2025-04-02T17:36:35.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros" +user04,2025-04-02T18:59:18.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etia #happy #family #nature" +user01,2025-04-02T23:45:43.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, eg #smile" +user08,2025-04-03T02:07:05.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, #friends" +user01,2025-04-03T04:28:56.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam #happy #book #travel" +user10,2025-04-03T13:07:18.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros accumsan, finibus tellus dignissim, vehicula nisl. Nam tristique est non est fermentum, id aliquet magna mattis. Nulla venenatis metus nisl, id pulvinar nibh faucibus et. Suspendisse finibus ut odio id eleifend. Ut consequat magna vel viverra porttitor. Donec in elit efficitur, semper nibh non, fringilla sapien. Morbi egestas ipsum risus, fringilla mollis lorem sodales dictum. Nam gravida orci vitae urna venenatis, eget pharetra nisi ultrices. Etiam nec nisi pretium, consectetur magna sed, eleifend lectus." +user10,2025-04-03T15:42:40.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est liber #happy #fun" +user08,2025-04-03T15:52:27.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros accumsan, finibus tellus dignissim, vehicula nisl. Nam tristique est non est fermentum, id aliquet magna mattis. Nulla venenatis metus nisl, id pulvinar nibh faucibus et. Suspendisse finibus ut odio id eleifend. Ut consequat magna vel viverra porttitor. Donec in elit efficitur, semper nibh non, fringilla sapien. Morbi egestas ipsum risus, fringilla mollis lorem sodales dictum. Nam gravida orci vitae urna venenatis, eget pharetra nisi ultrices. Etiam nec" +user08,2025-04-03T17:20:57.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libe #smile #food" +user04,2025-04-03T18:20:14.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est #running #friends" +user07,2025-04-04T03:17:16.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est lib #happy #music" +user08,2025-04-04T05:32:09.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt" +user05,2025-04-04T07:04:21.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam es #music #music #art" +user01,2025-04-04T08:20:10.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, #smile #running" +user07,2025-04-04T11:09:35.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, #running" +user04,2025-04-04T17:50:07.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Eti #friends #food #fitness" +user07,2025-04-04T19:51:19.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices" +user05,2025-04-04T20:19:47.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Eti #nature #coffee #travel" +user10,2025-04-04T21:20:05.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, #reading" +user03,2025-04-05T00:16:02.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. E #friends #travel #weekend" +user08,2025-04-05T01:46:21.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libe #photography" +user07,2025-04-05T11:47:59.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam #fitness #music #love" +user06,2025-04-05T12:39:21.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, eges #art" +user01,2025-04-05T14:45:38.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est lib #travel #book" +user02,2025-04-05T21:21:42.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, eges #fun" +user10,2025-04-05T21:34:22.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, ege #food" +user03,2025-04-06T08:57:21.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, e #sunset" +user02,2025-04-06T14:51:09.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, eg #music" +user03,2025-04-06T16:00:21.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel" +user06,2025-04-07T02:23:14.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros accumsan, finibus tellus dignissim, vehicula nisl. Nam tristique est non est fermentum, id aliquet magna mattis. Nulla venenatis metus nisl, id pulvinar nibh faucibus et. Suspendisse finibus ut odio id eleifend. Ut consequat magna vel viverra porttitor. Donec in elit efficitur, semper nibh non," +user06,2025-04-07T04:03:48.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin #family #photography #food" +user01,2025-04-07T04:31:26.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem #travel #art" +user01,2025-04-07T06:05:29.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros accumsan, finibus tellus dignissim, vehicula nisl. Nam tristique est non est fermentum, id aliquet magna mattis. Nulla venenatis metus nisl, id pulvinar nibh faucibus et. Suspendisse finibus ut odio id eleifend. Ut consequat magna vel viverra porttitor. Donec in elit efficitur, semper nibh non, fringilla sapien. Morbi egestas ipsum risus, fringilla mollis lorem sodales dictum. Nam gravida orci vitae urna venenatis, eget pharetra nisi ultrices. Etiam nec nisi pretium, consectetur magna sed, eleifend lectus. Nunc nec urna at ipsum porttitor mattis nec in enim. Phasellus ut ornare nunc. Etiam bibendum iaculis ante, sit amet gravida ex consequat id. Sed hendrerit tellus id nulla lacinia euismod. Sed elementum ante lorem. Sed sed ultrices velit. Vestibulum velit mi, commodo vel elit" +user05,2025-04-07T07:00:41.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, e #coffee" +user02,2025-04-07T14:24:26.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, egestas vel dictum sed, rutrum ut magna. Nulla nec condimentum elit, ut tristique nunc. Sed lacinia mi ut tincidunt auctor. Suspendisse ultrices blandit ullamcorper. Praesent sollicitudin nunc ex, vel consectetur purus imperdiet sed. Morbi vehicula tincidunt est et ultricies. Vestibulum et elit id elit rhoncus aliquam quis id enim. Donec vitae elit in quam maximus sollicitudin. In lacinia pulvinar eros at vulputate. Praesent id urna lobortis lorem volutpat ornare a ut urna. Sed vitae eros accumsan, finibus tellus dignissim, vehicula nisl. Nam tristique est non est fermentum, id aliquet magna mattis. Nulla venenatis metus nisl, id pulvinar nibh faucibus et. Suspendisse finibus ut odio id eleifend. Ut consequat magna vel viverra porttitor. Donec in elit efficitur, semper nibh non, fringilla sapien. Morbi egestas ipsum risus, fringilla mollis lorem sodales dictum. Nam gravida orci vitae" +user07,2025-04-07T17:17:05.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Eti #smile #family #weekend" +user08,2025-04-07T18:39:28.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est li #family #smile" +user01,2025-04-08T11:54:15.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget #book" +user06,2025-04-08T13:52:04.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est li #food #reading" +user05,2025-04-08T14:06:21.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam #photography #running" +user08,2025-04-08T15:30:20.000000,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce mauris lacus, consectetur non consequat non, sollicitudin eget ex. Mauris sit amet sem at augue accumsan aliquam sit amet id lacus. Nunc neque nisl, mattis at ornare vel, gravida at libero. Etiam est libero, eg #music" diff --git a/analyzers/hashtags/test_data/hashtag_test_output.json b/analyzers/hashtags/test_data/hashtag_test_output.json new file mode 100644 index 00000000..4ca052e8 --- /dev/null +++ b/analyzers/hashtags/test_data/hashtag_test_output.json @@ -0,0 +1 @@ +[{"timewindow_start":"2025-03-25 12:00:00","users":["user05","user05"],"hashtags":["#happy","#fitness"],"count":2,"gini":0.0},{"timewindow_start":"2025-03-26 00:00:00","users":["user01","user01","user01","user03","user04","user04","user02","user02","user02"],"hashtags":["#fun","#friends","#art","#running","#reading","#sunset","#photography","#fitness","#friends"],"count":9,"gini":0.09722222222222232},{"timewindow_start":"2025-03-26 12:00:00","users":["user06","user02","user02","user02","user05","user05"],"hashtags":["#fun","#health","#sunset","#photography","#photography","#smile"],"count":6,"gini":0.1333333333333334},{"timewindow_start":"2025-03-27 00:00:00","users":["user06","user06","user06","user02","user06"],"hashtags":["#health","#nature","#fitness","#travel","#art"],"count":5,"gini":0.0},{"timewindow_start":"2025-03-27 12:00:00","users":["user04","user04","user10","user01","user01","user01"],"hashtags":["#book","#book","#photography","#family","#book","#friends"],"count":6,"gini":0.25},{"timewindow_start":"2025-03-28 00:00:00","users":["user10","user10","user10","user05","user06","user06","user06"],"hashtags":["#love","#friends","#coffee","#food","#family","#love","#music"],"count":7,"gini":0.11904761904761907},{"timewindow_start":"2025-03-28 12:00:00","users":["user02","user02","user02","user08","user08","user08"],"hashtags":["#coffee","#smile","#running","#travel","#reading","#sunset"],"count":6,"gini":0.0},{"timewindow_start":"2025-03-29 00:00:00","users":["user04","user04","user01","user04"],"hashtags":["#food","#weekend","#coffee","#friends"],"count":4,"gini":0.0},{"timewindow_start":"2025-03-29 12:00:00","users":["user10","user10","user10","user08","user08"],"hashtags":["#friends","#sunset","#reading","#photography","#love"],"count":5,"gini":0.0},{"timewindow_start":"2025-03-30 00:00:00","users":["user01","user01","user10","user09","user09","user09","user08","user08","user08","user02","user02","user05","user05","user09","user09","user09"],"hashtags":["#fun","#love","#music","#fitness","#happy","#photography","#health","#art","#reading","#smile","#fitness","#reading","#art","#book","#friends","#weekend"],"count":16,"gini":0.14423076923076922},{"timewindow_start":"2025-03-30 12:00:00","users":["user08","user08","user03","user03"],"hashtags":["#weekend","#smile","#travel","#weekend"],"count":4,"gini":0.16666666666666666},{"timewindow_start":"2025-03-31 00:00:00","users":["user09","user09"],"hashtags":["#friends","#music"],"count":2,"gini":0.0},{"timewindow_start":"2025-03-31 12:00:00","users":["user09","user09","user03","user03"],"hashtags":["#photography","#health","#family","#health"],"count":4,"gini":0.16666666666666666},{"timewindow_start":"2025-04-01 00:00:00","users":["user09","user09","user09","user06","user06","user06","user02","user02"],"hashtags":["#smile","#book","#happy","#sunset","#coffee","#fun","#family","#health"],"count":8,"gini":0.0},{"timewindow_start":"2025-04-01 12:00:00","users":["user06","user06","user06"],"hashtags":["#health","#family","#happy"],"count":3,"gini":0.0},{"timewindow_start":"2025-04-02 00:00:00","users":["user10","user10","user10","user01","user01","user01","user02","user09","user09","user02","user02"],"hashtags":["#travel","#smile","#happy","#love","#sunset","#travel","#running","#sunset","#fitness","#photography","#family"],"count":11,"gini":0.1414141414141415},{"timewindow_start":"2025-04-02 12:00:00","users":["user03","user03","user03","user06","user04","user04","user04","user01"],"hashtags":["#art","#nature","#reading","#music","#happy","#family","#nature","#smile"],"count":8,"gini":0.10714285714285714},{"timewindow_start":"2025-04-03 00:00:00","users":["user08","user01","user01","user01"],"hashtags":["#friends","#happy","#book","#travel"],"count":4,"gini":0.0},{"timewindow_start":"2025-04-03 12:00:00","users":["user10","user10","user08","user08","user04","user04"],"hashtags":["#happy","#fun","#smile","#food","#running","#friends"],"count":6,"gini":0.0},{"timewindow_start":"2025-04-04 00:00:00","users":["user07","user07","user05","user05","user05","user01","user01","user07"],"hashtags":["#happy","#music","#music","#music","#art","#smile","#running","#running"],"count":8,"gini":0.25},{"timewindow_start":"2025-04-04 12:00:00","users":["user04","user04","user04","user05","user05","user05","user10"],"hashtags":["#friends","#food","#fitness","#nature","#coffee","#travel","#reading"],"count":7,"gini":0.0},{"timewindow_start":"2025-04-05 00:00:00","users":["user03","user03","user03","user08","user07","user07","user07"],"hashtags":["#friends","#travel","#weekend","#photography","#fitness","#music","#love"],"count":7,"gini":0.0},{"timewindow_start":"2025-04-05 12:00:00","users":["user06","user01","user01","user02","user10"],"hashtags":["#art","#travel","#book","#fun","#food"],"count":5,"gini":0.0},{"timewindow_start":"2025-04-06 00:00:00","users":["user03"],"hashtags":["#sunset"],"count":1,"gini":0.0},{"timewindow_start":"2025-04-06 12:00:00","users":["user02"],"hashtags":["#music"],"count":1,"gini":0.0},{"timewindow_start":"2025-04-07 00:00:00","users":["user06","user06","user06","user01","user01","user05"],"hashtags":["#family","#photography","#food","#travel","#art","#coffee"],"count":6,"gini":0.0},{"timewindow_start":"2025-04-07 12:00:00","users":["user07","user07","user07","user08","user08"],"hashtags":["#smile","#family","#weekend","#family","#smile"],"count":5,"gini":0.1333333333333333},{"timewindow_start":"2025-04-08 00:00:00","users":["user01"],"hashtags":["#book"],"count":1,"gini":0.0},{"timewindow_start":"2025-04-08 12:00:00","users":["user06","user06","user05","user05","user08"],"hashtags":["#food","#reading","#photography","#running","#music"],"count":5,"gini":0.0}] \ No newline at end of file diff --git a/analyzers/hashtags/test_hashtags_analyzer.py b/analyzers/hashtags/test_hashtags_analyzer.py new file mode 100644 index 00000000..c4f96ec6 --- /dev/null +++ b/analyzers/hashtags/test_hashtags_analyzer.py @@ -0,0 +1,44 @@ +import os + +from preprocessing.series_semantic import datetime_string, identifier, text_catch_all +from testing import CsvTestData, JsonTestData, test_primary_analyzer + +from .interface import COL_AUTHOR_ID, COL_POST, COL_TIME, OUTPUT_GINI, interface +from .main import main +from .test_data import test_data_dir + + +# This example shows you how to test a primary analyzer. +# It runs on pytest. +def test_hashtag_analyzer(): + # You use this test function. + test_primary_analyzer( + interface, # You provide the interface ... + main, # ... and the analyzer's entry point. + # There are also JsonTestData, ExcelTestData. + # You can also programmatically create a polars DataFrame + # and use PolarsTestData. + # The column names for the input and output data must match the + # interface schema. + input=CsvTestData( + os.path.join(test_data_dir, "hashtag_test_input.csv"), + # Specifying the column semantics are optional, and are optional for + # each column. It's useful in CsvTestData, ExcelTestData, and + # JsonTestData if you have data that need to be interpreted into + # types not directly supported by the file format like timestamp. + semantics={ + COL_AUTHOR_ID: identifier, + COL_TIME: datetime_string, + COL_POST: text_catch_all, + }, + ), + # These outputs are the expected outputs of the analyzer. + # You don't need to specify all the outputs, only the ones you want to test. + # The output IDs must match the IDs in the interface schema. + # You must provide at least one output (otherwise you're not really testing anything!) + outputs={ + OUTPUT_GINI: JsonTestData( + os.path.join(test_data_dir, "hashtag_test_output.json") + ) + }, + ) From ff8ca1a8bf2be8e54993f459c00a0fcc27c10f0e Mon Sep 17 00:00:00 2001 From: Kristijan Armeni Date: Wed, 9 Apr 2025 10:38:33 -0400 Subject: [PATCH 07/12] [ENH] add tests for gini() --- analyzers/hashtags/test_hashtags_analyzer.py | 70 +++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/analyzers/hashtags/test_hashtags_analyzer.py b/analyzers/hashtags/test_hashtags_analyzer.py index c4f96ec6..1bed6ee3 100644 --- a/analyzers/hashtags/test_hashtags_analyzer.py +++ b/analyzers/hashtags/test_hashtags_analyzer.py @@ -1,12 +1,80 @@ import os +import numpy as np +import polars as pl + from preprocessing.series_semantic import datetime_string, identifier, text_catch_all from testing import CsvTestData, JsonTestData, test_primary_analyzer from .interface import COL_AUTHOR_ID, COL_POST, COL_TIME, OUTPUT_GINI, interface -from .main import main +from .main import gini, main from .test_data import test_data_dir +HASHTAGS = [ + "sunset", + "nature", + "food", + "travel", + "happy", + "friends", + "love", + "family", + "music", + "art", + "photography", + "fun", + "smile", + "weekend", + "coffee", + "book", + "reading", + "running", + "fitness", + "health", + "cat", + "dog", + "bird", + "fish", + "lizard", +] + + +def test_gini(): + test_cases = [ + # a single hashtags is detected + { + "data": ["same"] * 1000, + "expected": 0.0, + "description": "One single hashtag (no inequality possible)", + }, + { + "data": HASHTAGS, + "expected": 0.0, + "description": "Perfect equality (all unique)", + }, + { + "data": HASHTAGS * 5, + "expected": 0.0, + "description": "Perfect equality (all appear 5 times)", + }, + { + "data": ["trending"] * 9999 + HASHTAGS, + "expected": 0.95, # It goes towards 1 + "description": "Extreme inequality (9999 vs 1 occurrences)", + }, + ] + + # caculcate gini and compare + for test_case in test_cases: + data_series = pl.Series(test_case["data"]) + + # Calculate actual Gini coefficient + actual = gini(data_series) + + assert np.allclose( + [actual], [test_case["expected"]], rtol=1e-2, atol=1e-2 + ), f"Failed test case: {test_case['description']}" + # This example shows you how to test a primary analyzer. # It runs on pytest. From 9ccbd8b0572d2250eb6bbb589b0dbe13bcab64c2 Mon Sep 17 00:00:00 2001 From: Kristijan Armeni Date: Wed, 9 Apr 2025 10:57:34 -0400 Subject: [PATCH 08/12] [MAINT] explicitly specify return_dtype --- analyzers/hashtags/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analyzers/hashtags/main.py b/analyzers/hashtags/main.py index 06622db2..24318798 100644 --- a/analyzers/hashtags/main.py +++ b/analyzers/hashtags/main.py @@ -75,7 +75,7 @@ def hashtag_analysis(data_frame: pl.DataFrame, every="1h") -> pl.DataFrame: pl.col(COL_POST).alias(OUTPUT_COL_HASHTAGS), pl.col(COL_POST).count().alias(OUTPUT_COL_COUNT), pl.col(COL_POST) - .map_batches(gini, returns_scalar=True) + .map_batches(gini, returns_scalar=True, return_dtype=pl.Float64) .alias(OUTPUT_COL_GINI), ) ) From 648757ce95bfa41edd71e3159e7ce30da8f1d0c5 Mon Sep 17 00:00:00 2001 From: Kristijan Armeni Date: Wed, 9 Apr 2025 11:25:48 -0400 Subject: [PATCH 09/12] [MAINT] cleanup --- analyzers/hashtags/main.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/analyzers/hashtags/main.py b/analyzers/hashtags/main.py index 24318798..d255bc4a 100644 --- a/analyzers/hashtags/main.py +++ b/analyzers/hashtags/main.py @@ -1,4 +1,3 @@ -from collections import Counter from itertools import accumulate import polars as pl @@ -17,11 +16,6 @@ OUTPUT_GINI, ) -# a handy variable -COLS_ALL = [COL_AUTHOR_ID, COL_TIME, COL_POST] - -NULL_CHAR = "[]" # this is taken as the null character for hashtags - def gini(x: pl.Series) -> float: """ @@ -63,7 +57,9 @@ def hashtag_analysis(data_frame: pl.DataFrame, every="1h") -> pl.DataFrame: raise ValueError(f"The data in {COL_POST} column appear to have no hashtags.") # select columns and sort - df_input = df_input.select(pl.col(COLS_ALL)).sort(pl.col(COL_TIME)) + df_input = df_input.select(pl.col([COL_AUTHOR_ID, COL_TIME, COL_POST])).sort( + pl.col(COL_TIME) + ) # compute gini per timewindow df_out = ( From 8283aee68f17ed99077382149179b35173edff96 Mon Sep 17 00:00:00 2001 From: DeanEby Date: Wed, 9 Apr 2025 12:03:49 -0400 Subject: [PATCH 10/12] fix: hide unsupported hashtag analysis export formats --- components/export_outputs.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/components/export_outputs.py b/components/export_outputs.py index 16d47705..0b9d3968 100644 --- a/components/export_outputs.py +++ b/components/export_outputs.py @@ -46,7 +46,7 @@ def export_outputs(context: ViewContext, analysis: AnalysisContext): return scope.refresh() - format = export_format_prompt() + format = export_format_prompt(analysis) if format is None: print("Export cancelled") wait_for_key(True) @@ -124,12 +124,19 @@ def export_outputs_sequence( wait_for_key(True) -def export_format_prompt(): +def export_format_prompt(analysis: AnalysisContext): + analysis_id = analysis.analyzer_id return prompts.list_input( - "Choose an export format", + "choose an export format", choices=[ - ("CSV", "csv"), - ("Excel", "xlsx"), + *( + [ + ("CSV", "csv"), + ("Excel", "xlsx"), + ] + if analysis_id != "hashtags" + else [] + ), ("JSON", "json"), ("(Back)", None), ], From 2520b853de8f5c582b5b16627699ef9de8ebc41b Mon Sep 17 00:00:00 2001 From: DeanEby Date: Wed, 9 Apr 2025 12:11:49 -0400 Subject: [PATCH 11/12] test tutorial comment cleanup --- analyzers/hashtags/test_hashtags_analyzer.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/analyzers/hashtags/test_hashtags_analyzer.py b/analyzers/hashtags/test_hashtags_analyzer.py index 1bed6ee3..af982510 100644 --- a/analyzers/hashtags/test_hashtags_analyzer.py +++ b/analyzers/hashtags/test_hashtags_analyzer.py @@ -76,34 +76,18 @@ def test_gini(): ), f"Failed test case: {test_case['description']}" -# This example shows you how to test a primary analyzer. -# It runs on pytest. def test_hashtag_analyzer(): - # You use this test function. test_primary_analyzer( - interface, # You provide the interface ... - main, # ... and the analyzer's entry point. - # There are also JsonTestData, ExcelTestData. - # You can also programmatically create a polars DataFrame - # and use PolarsTestData. - # The column names for the input and output data must match the - # interface schema. + interface, + main, # the analyzer's entry point. input=CsvTestData( os.path.join(test_data_dir, "hashtag_test_input.csv"), - # Specifying the column semantics are optional, and are optional for - # each column. It's useful in CsvTestData, ExcelTestData, and - # JsonTestData if you have data that need to be interpreted into - # types not directly supported by the file format like timestamp. semantics={ COL_AUTHOR_ID: identifier, COL_TIME: datetime_string, COL_POST: text_catch_all, }, ), - # These outputs are the expected outputs of the analyzer. - # You don't need to specify all the outputs, only the ones you want to test. - # The output IDs must match the IDs in the interface schema. - # You must provide at least one output (otherwise you're not really testing anything!) outputs={ OUTPUT_GINI: JsonTestData( os.path.join(test_data_dir, "hashtag_test_output.json") From 87438a30d4cf927b2edb25a701fd7bc5230a36c7 Mon Sep 17 00:00:00 2001 From: DeanEby Date: Wed, 9 Apr 2025 12:15:27 -0400 Subject: [PATCH 12/12] fix: Capitalization typo --- components/export_outputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/export_outputs.py b/components/export_outputs.py index 0b9d3968..8cbd4094 100644 --- a/components/export_outputs.py +++ b/components/export_outputs.py @@ -127,7 +127,7 @@ def export_outputs_sequence( def export_format_prompt(analysis: AnalysisContext): analysis_id = analysis.analyzer_id return prompts.list_input( - "choose an export format", + "Choose an export format", choices=[ *( [