In [1]:
import polars as pl

#STEP 1: Load Dataset
df = pl.read_csv("tw_posts_cleaned.csv")
print("Dataset loaded. Shape:", df.shape)

#STEP 2: Descriptive Statistics for Numeric Columns
print("\n=== Descriptive Statistics ===")
display(df.describe())

#STEP 3: Unique Value Counts
print("\n=== Unique Value Counts ===")
for col in df.columns:
    print(f"{col}: {df.select(pl.col(col).n_unique()).item()}")

#STEP 4: Most Frequent Non-Numeric Values
print("\n=== Most Frequent Non-Numeric Values ===")
non_numeric_cols = [col for col, dtype in df.schema.items() if dtype == pl.Utf8]
for col in non_numeric_cols:
    try:
        vc = df.select(pl.col(col).value_counts())
        if vc.is_empty() or len(vc.columns) < 2:
            print(f"{col}: No frequent values")
            continue
        count_col = [c for c in vc.columns if c != col][0]
        vc_sorted = vc.sort(count_col, descending=True).limit(1)
        print(f"{col}:")
        print(vc_sorted)
    except Exception as e:
        print(f"{col}: Error - {e}")

#STEP 5: Grouped Stats by 'source'
numeric_cols = [col for col, dtype in df.schema.items() if dtype in (pl.Float64, pl.Float32, pl.Int64, pl.Int32)]
grouped_source = df.group_by("source").agg([
    *[pl.col(col).count().alias(f"{col}_count") for col in numeric_cols],
    *[pl.col(col).mean().alias(f"{col}_mean") for col in numeric_cols],
    *[pl.col(col).min().alias(f"{col}_min") for col in numeric_cols],
    *[pl.col(col).max().alias(f"{col}_max") for col in numeric_cols],
    *[pl.col(col).std().alias(f"{col}_std") for col in numeric_cols],
])
display(grouped_source)

#STEP 6: Grouped Stats by ('source', 'id')
grouped_source_id = df.group_by(["source", "id"]).agg([
    *[pl.col(col).count().alias(f"{col}_count") for col in numeric_cols],
    *[pl.col(col).mean().alias(f"{col}_mean") for col in numeric_cols],
    *[pl.col(col).min().alias(f"{col}_min") for col in numeric_cols],
    *[pl.col(col).max().alias(f"{col}_max") for col in numeric_cols],
    *[pl.col(col).std().alias(f"{col}_std") for col in numeric_cols],
])
display(grouped_source_id)


Dataset loaded. Shape: (27281, 45)

=== Descriptive Statistics ===


statistic,id,url,source,retweetCount,replyCount,likeCount,quoteCount,viewCount,createdAt,lang,bookmarkCount,isReply,isRetweet,isQuote,isConversationControlled,month_year,illuminating_scored_message,election_integrity_Truth_illuminating,advocacy_msg_type_illuminating,issue_msg_type_illuminating,attack_msg_type_illuminating,image_msg_type_illuminating,cta_msg_type_illuminating,engagement_cta_subtype_illuminating,fundraising_cta_subtype_illuminating,voting_cta_subtype_illuminating,covid_topic_illuminating,economy_topic_illuminating,education_topic_illuminating,environment_topic_illuminating,foreign_policy_topic_illuminating,governance_topic_illuminating,health_topic_illuminating,immigration_topic_illuminating,lgbtq_issues_topic_illuminating,military_topic_illuminating,race_and_ethnicity_topic_illuminating,safety_topic_illuminating,social_and_cultural_topic_illuminating,technology_and_privacy_topic_illuminating,womens_issue_topic_illuminating,incivility_illuminating,scam_illuminating,freefair_illuminating,fraud_illuminating
str,str,str,str,f64,f64,f64,f64,f64,str,str,f64,f64,f64,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""","""27281""","""27281""","""27281""",27281.0,27281.0,27281.0,27281.0,27281.0,"""27281""","""27281""",27281.0,27281.0,27281.0,27281.0,27281.0,"""27281""","""27281""",26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,26014.0,27281.0,27281.0
"""null_count""","""0""","""0""","""0""",0.0,0.0,0.0,0.0,0.0,"""0""","""0""",0.0,0.0,0.0,0.0,0.0,"""0""","""0""",1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,1267.0,0.0,0.0
"""mean""",,,,1322.428833,1064.435431,6913.519886,128.15531,507323.401525,,,136.269528,0.123529,0.0,0.118581,0.000293,,,0.037172,0.564042,0.508034,0.307834,0.226609,0.10971,0.066964,0.00788,0.01676,0.007611,0.160337,0.018452,0.028562,0.042285,0.022988,0.055701,0.065349,0.003075,0.010994,0.015415,0.037634,0.05201,0.002037,0.023334,0.178711,0.012378,0.00143,0.002749
"""std""",,,,3405.843096,3176.194336,21591.844518,1132.005714,3213500.0,,,712.864518,,,,,,,0.189187,0.495891,0.499945,0.461606,0.418645,0.312534,0.249964,0.088423,0.128374,0.086912,0.366925,0.13458,0.166574,0.201242,0.149867,0.229347,0.247146,0.055371,0.104277,0.123198,0.190312,0.222052,0.045092,0.150964,0.383118,0.110568,0.037783,0.052361
"""min""","""0000635d0c9e7bdf89dfc13811d080…","""0000179c6b90798f167528aaaaf678…","""Canva""",0.0,0.0,0.0,0.0,5.0,"""2023-09-01 00:30:00""","""en""",0.0,0.0,0.0,0.0,0.0,"""2023-09""","""0000f20a94aa332e2e6ed7a0620f98…",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""25%""",,,,84.0,43.0,394.0,5.0,27873.0,,,4.0,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""50%""",,,,334.0,131.0,1408.0,17.0,70968.0,,,21.0,,,,,,,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""75%""",,,,1071.0,502.0,5011.0,69.0,303786.0,,,76.0,,,,,,,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""max""","""fffbb471d8b0bd6d990b4f9f22283b…","""ffffd63fa71574c0127b90e12fdba3…","""Twitter for iPhone""",144615.0,121270.0,915221.0,123320.0,333502775.0,"""2024-11-04 23:40:00""","""en""",42693.0,1.0,0.0,1.0,1.0,"""2024-11""","""fffe6f31ba97d01463912106398493…",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0



=== Unique Value Counts ===
id: 27281
url: 27281
source: 14
retweetCount: 5194
replyCount: 4516
likeCount: 10199
quoteCount: 1346
viewCount: 25349
createdAt: 25085
lang: 1
bookmarkCount: 1440
isReply: 2
isRetweet: 1
isQuote: 2
isConversationControlled: 2
month_year: 15
illuminating_scored_message: 27113
election_integrity_Truth_illuminating: 3
advocacy_msg_type_illuminating: 3
issue_msg_type_illuminating: 3
attack_msg_type_illuminating: 3
image_msg_type_illuminating: 3
cta_msg_type_illuminating: 3
engagement_cta_subtype_illuminating: 3
fundraising_cta_subtype_illuminating: 3
voting_cta_subtype_illuminating: 3
covid_topic_illuminating: 3
economy_topic_illuminating: 3
education_topic_illuminating: 3
environment_topic_illuminating: 3
foreign_policy_topic_illuminating: 3
governance_topic_illuminating: 3
health_topic_illuminating: 3
immigration_topic_illuminating: 3
lgbtq_issues_topic_illuminating: 3
military_topic_illuminating: 3
race_and_ethnicity_topic_illuminating: 3
safety_topic_illum

source,retweetCount_count,replyCount_count,likeCount_count,quoteCount_count,viewCount_count,bookmarkCount_count,election_integrity_Truth_illuminating_count,advocacy_msg_type_illuminating_count,issue_msg_type_illuminating_count,attack_msg_type_illuminating_count,image_msg_type_illuminating_count,cta_msg_type_illuminating_count,engagement_cta_subtype_illuminating_count,fundraising_cta_subtype_illuminating_count,voting_cta_subtype_illuminating_count,covid_topic_illuminating_count,economy_topic_illuminating_count,education_topic_illuminating_count,environment_topic_illuminating_count,foreign_policy_topic_illuminating_count,governance_topic_illuminating_count,health_topic_illuminating_count,immigration_topic_illuminating_count,lgbtq_issues_topic_illuminating_count,military_topic_illuminating_count,race_and_ethnicity_topic_illuminating_count,safety_topic_illuminating_count,social_and_cultural_topic_illuminating_count,technology_and_privacy_topic_illuminating_count,womens_issue_topic_illuminating_count,incivility_illuminating_count,scam_illuminating_count,freefair_illuminating_count,fraud_illuminating_count,retweetCount_mean,replyCount_mean,…,scam_illuminating_max,freefair_illuminating_max,fraud_illuminating_max,retweetCount_std,replyCount_std,likeCount_std,quoteCount_std,viewCount_std,bookmarkCount_std,election_integrity_Truth_illuminating_std,advocacy_msg_type_illuminating_std,issue_msg_type_illuminating_std,attack_msg_type_illuminating_std,image_msg_type_illuminating_std,cta_msg_type_illuminating_std,engagement_cta_subtype_illuminating_std,fundraising_cta_subtype_illuminating_std,voting_cta_subtype_illuminating_std,covid_topic_illuminating_std,economy_topic_illuminating_std,education_topic_illuminating_std,environment_topic_illuminating_std,foreign_policy_topic_illuminating_std,governance_topic_illuminating_std,health_topic_illuminating_std,immigration_topic_illuminating_std,lgbtq_issues_topic_illuminating_std,military_topic_illuminating_std,race_and_ethnicity_topic_illuminating_std,safety_topic_illuminating_std,social_and_cultural_topic_illuminating_std,technology_and_privacy_topic_illuminating_std,womens_issue_topic_illuminating_std,incivility_illuminating_std,scam_illuminating_std,freefair_illuminating_std,fraud_illuminating_std
str,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,f64,f64,…,f64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""TweetDeck Web App""",7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,1911.142857,659.285714,…,0.0,0,0,2627.973581,838.41432,10342.089825,147.918736,616922.33044,807.004101,0.0,0.534522,0.48795,0.48795,0.534522,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.377964,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.377964,0.0,0.48795,0.0,0.0,0.0
"""Twitter for iPad""",266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,266,65.37594,21.300752,…,0.0,0,0,67.971037,33.990228,282.273989,14.113905,38020.971255,17.438577,0.121932,0.500035,0.487145,0.181144,0.238213,0.358113,0.292358,0.061314,0.199482,0.0,0.350587,0.160374,0.199482,0.086547,0.086547,0.223718,0.160374,0.0,0.105797,0.105797,0.086547,0.171116,0.061314,0.061314,0.207943,0.0,0.0,0.0
"""Twitter Web Client""",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,98.0,26.0,…,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""Twitter for iPhone""",8482,8482,8482,8482,8482,8482,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8158,8482,8482,613.22813,290.058595,…,1.0,1,1,2157.374885,1036.654487,15967.564564,190.823497,955147.50349,403.235279,0.198734,0.500031,0.4947,0.4234,0.388398,0.308835,0.265405,0.077272,0.096699,0.081837,0.302164,0.134354,0.159834,0.226869,0.154654,0.184246,0.199581,0.055276,0.089587,0.130787,0.153898,0.177614,0.042843,0.088911,0.349142,0.100358,0.030698,0.058376
"""Twitter for Android""",5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,10.6,8.4,…,0.0,0,0,5.899152,6.107373,37.439284,0.547723,9744.059847,0.0,0.0,0.0,0.0,0.0,0.0,0.447214,0.447214,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Twitter Web App""",14920,14920,14920,14920,14920,14920,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14190,14920,14920,1316.789276,719.317627,…,1.0,1,1,3571.870333,2689.952978,22470.095887,1465.05583,3.9675e6,843.680549,0.188763,0.493703,0.497709,0.483155,0.443818,0.302438,0.229434,0.096362,0.119328,0.095641,0.386346,0.12682,0.176366,0.191506,0.148699,0.207381,0.285575,0.052355,0.113135,0.1199,0.207998,0.199469,0.041938,0.12216,0.411624,0.127357,0.037492,0.052984
"""Later Media""",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,93.0,18.0,…,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""Twitter for Advertisers""",7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,97.0,138.714286,…,0.0,0,0,96.472794,84.555138,820.346646,12.447987,52027.093234,3.728909,0.0,0.48795,0.48795,0.0,0.0,0.377964,0.377964,0.0,0.0,0.0,0.0,0.377964,0.0,0.377964,0.0,0.0,0.377964,0.0,0.0,0.0,0.377964,0.0,0.0,0.0,0.48795,0.0,0.0,0.0
"""Canva""",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,69.0,324.0,…,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


source,id,retweetCount_count,replyCount_count,likeCount_count,quoteCount_count,viewCount_count,bookmarkCount_count,election_integrity_Truth_illuminating_count,advocacy_msg_type_illuminating_count,issue_msg_type_illuminating_count,attack_msg_type_illuminating_count,image_msg_type_illuminating_count,cta_msg_type_illuminating_count,engagement_cta_subtype_illuminating_count,fundraising_cta_subtype_illuminating_count,voting_cta_subtype_illuminating_count,covid_topic_illuminating_count,economy_topic_illuminating_count,education_topic_illuminating_count,environment_topic_illuminating_count,foreign_policy_topic_illuminating_count,governance_topic_illuminating_count,health_topic_illuminating_count,immigration_topic_illuminating_count,lgbtq_issues_topic_illuminating_count,military_topic_illuminating_count,race_and_ethnicity_topic_illuminating_count,safety_topic_illuminating_count,social_and_cultural_topic_illuminating_count,technology_and_privacy_topic_illuminating_count,womens_issue_topic_illuminating_count,incivility_illuminating_count,scam_illuminating_count,freefair_illuminating_count,fraud_illuminating_count,retweetCount_mean,…,scam_illuminating_max,freefair_illuminating_max,fraud_illuminating_max,retweetCount_std,replyCount_std,likeCount_std,quoteCount_std,viewCount_std,bookmarkCount_std,election_integrity_Truth_illuminating_std,advocacy_msg_type_illuminating_std,issue_msg_type_illuminating_std,attack_msg_type_illuminating_std,image_msg_type_illuminating_std,cta_msg_type_illuminating_std,engagement_cta_subtype_illuminating_std,fundraising_cta_subtype_illuminating_std,voting_cta_subtype_illuminating_std,covid_topic_illuminating_std,economy_topic_illuminating_std,education_topic_illuminating_std,environment_topic_illuminating_std,foreign_policy_topic_illuminating_std,governance_topic_illuminating_std,health_topic_illuminating_std,immigration_topic_illuminating_std,lgbtq_issues_topic_illuminating_std,military_topic_illuminating_std,race_and_ethnicity_topic_illuminating_std,safety_topic_illuminating_std,social_and_cultural_topic_illuminating_std,technology_and_privacy_topic_illuminating_std,womens_issue_topic_illuminating_std,incivility_illuminating_std,scam_illuminating_std,freefair_illuminating_std,fraud_illuminating_std
str,str,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,f64,…,f64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""Twitter Web App""","""e8954407728610dc3ae9f82af73468…",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1797.0,…,1.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""Twitter for iPhone""","""b0cc4330e344fb9ff527c8981c5e4e…",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,196.0,…,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""Twitter for iPhone""","""634677400e60992f46ea38e75e4904…",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,58.0,…,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""Twitter Web App""","""c8f763d9697b31d595cc00be32c44a…",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,12.0,…,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""Twitter Web App""","""70aab97e35818facf0a26205285f3f…",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,285.0,…,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Twitter Web App""","""7b4ac822e6d19ff1be0d95859af611…",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,499.0,…,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""Twitter for iPhone""","""8839f61b443eaa2131111b514468e6…",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0.0,…,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""Twitter for iPhone""","""49ff8e345f78a7a3a3465ee49839ba…",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,101.0,…,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""Twitter Web App""","""32bdb7eb6e2f6e8548f4867c06c796…",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,653.0,…,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
