In [11]:
import pandas as pd
import gc

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# ---------- Function: Full Dataset Descriptive Stats ----------
def pandas_full_describe(file_path):
    df = pd.read_csv(file_path)
    dataset_name = file_path.split("/")[-1]

    print(f"\n\n📊 Dataset: {dataset_name}")

    # --- Numeric Summary ---
    numeric_df = df.select_dtypes(include='number')
    if not numeric_df.empty:
        print("\n--- Numeric Summary ---")
        display(numeric_df.describe().round(2))
    else:
        print("⚠️ No numeric columns found.")

    # --- Categorical Summary ---
    cat_df = df.select_dtypes(include='object')
    if not cat_df.empty:
        print("\n--- Categorical Summary ---")
        summary = pd.DataFrame({
            'unique': cat_df.nunique(),
            'top': cat_df.mode().iloc[0],
            'top_count': cat_df.apply(lambda x: x.value_counts(dropna=False).iloc[0])
        })
        display(summary)
    else:
        print("⚠️ No categorical columns found.")
    
    del df
    gc.collect()

# ---------- Function: Grouped Summary ----------
def pandas_groupby_summary(file_path, group_cols, sample_n=3, max_columns=10):
    df = pd.read_csv(file_path)
    dataset_name = file_path.split("/")[-1]

    print(f"\n\n📊 Grouped Summary for: {dataset_name} | Grouped by: {group_cols}")

    if not all(col in df.columns for col in group_cols):
        print("⚠️ Skipping: One or more group columns not found.")
        return

    numeric_cols = df.select_dtypes(include='number').columns[:max_columns]
    if numeric_cols.empty:
        print("⚠️ No numeric columns to summarize.")
        return

    try:
        sample_keys = df[group_cols].drop_duplicates().head(sample_n)
    except Exception as e:
        print(f"⚠️ Error extracting group keys: {e}")
        return

    for _, row in sample_keys.iterrows():
        mask = (df[group_cols] == row.values).all(axis=1) if len(group_cols) > 1 else (df[group_cols[0]] == row.iloc[0])
        sub_df = df[mask]
        print(f"\n🔹 Group: {tuple(row.values)} — {len(sub_df)} rows")
        display(sub_df[numeric_cols].describe().round(2))

    del df
    gc.collect()

# ---------- Dataset Configuration ----------
dataset_config = [
    {
        "file": "C:/Users/unnat/OneDrive/Desktop/RA/Task_04_Descriptive_Stats/data/period_03/2024_fb_ads_president_scored_anon.csv",
        "group_cols": ["page_id", "ad_id"]  # as per instructions
    },
    {
        "file": "C:/Users/unnat/OneDrive/Desktop/RA/Task_04_Descriptive_Stats/data/period_03/2024_fb_posts_president_scored_anon.csv",
        "group_cols": ["Page Admin Top Country"]  # logical aggregation
    },
    {
        "file": "C:/Users/unnat/OneDrive/Desktop/RA/Task_04_Descriptive_Stats/data/period_03/2024_tw_posts_president_scored_anon.csv",
        "group_cols": ["source"]  # Twitter platform
    }
]

# ---------- Run All Analyses ----------
for config in dataset_config:
    pandas_full_describe(config["file"])
    pandas_groupby_summary(config["file"], config["group_cols"])




📊 Dataset: 2024_fb_ads_president_scored_anon.csv

--- Numeric Summary ---


Unnamed: 0,estimated_audience_size,estimated_impressions,estimated_spend,scam_illuminating,election_integrity_Truth_illuminating,advocacy_msg_type_illuminating,issue_msg_type_illuminating,attack_msg_type_illuminating,image_msg_type_illuminating,cta_msg_type_illuminating,engagement_cta_subtype_illuminating,fundraising_cta_subtype_illuminating,voting_cta_subtype_illuminating,covid_topic_illuminating,economy_topic_illuminating,education_topic_illuminating,environment_topic_illuminating,foreign_policy_topic_illuminating,governance_topic_illuminating,health_topic_illuminating,immigration_topic_illuminating,lgbtq_issues_topic_illuminating,military_topic_illuminating,race_and_ethnicity_topic_illuminating,safety_topic_illuminating,social_and_cultural_topic_illuminating,technology_and_privacy_topic_illuminating,womens_issue_topic_illuminating,incivility_illuminating,freefair_illuminating,fraud_illuminating
count,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0,246745.0
mean,556462.86,45601.53,1061.29,0.07,0.05,0.55,0.38,0.27,0.22,0.57,0.12,0.23,0.14,0.02,0.12,0.01,0.02,0.01,0.03,0.11,0.03,0.0,0.0,0.01,0.03,0.11,0.0,0.08,0.19,0.01,0.0
std,409864.76,136790.77,4992.56,0.26,0.22,0.5,0.49,0.44,0.42,0.49,0.33,0.42,0.35,0.16,0.33,0.12,0.14,0.07,0.16,0.31,0.18,0.06,0.05,0.11,0.18,0.31,0.03,0.27,0.39,0.08,0.05
min,0.0,499.0,49.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,75000.0,499.0,49.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,300000.0,3499.0,49.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1000001.0,22499.0,449.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1000001.0,1000000.0,474999.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0



--- Categorical Summary ---


Unnamed: 0,unique,top,top_count
page_id,4475,4d66f5853f0365dba032a87704a634f023d15babde973b...,55503
ad_id,246745,0000a88a64484883df6ca1fabd357e4c5950e443123ce3...,1
ad_creation_time,547,2024-10-27,8619
bylines,3790,HARRIS FOR PRESIDENT,49788
currency,18,USD,246599
delivery_by_region,141122,{},30989
demographic_distribution,215622,{},30989
publisher_platforms,9,"['facebook', 'instagram']",214434
illuminating_scored_message,26338,e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b93...,5972
illuminating_mentions,278,[],73205




📊 Grouped Summary for: 2024_fb_ads_president_scored_anon.csv | Grouped by: ['page_id', 'ad_id']

🔹 Group: ('4ff23a48b53d988df50ddfebb0e442a984ab8f94e874ef9b9cb34394e0c5d230', '0ddb025b8544e2d58e6977ad417e742a52522b3e1fc1c9d9b61c57148f8d72fc') — 1 rows


Unnamed: 0,estimated_audience_size,estimated_impressions,estimated_spend,scam_illuminating,election_integrity_Truth_illuminating,advocacy_msg_type_illuminating,issue_msg_type_illuminating,attack_msg_type_illuminating,image_msg_type_illuminating,cta_msg_type_illuminating
count,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
mean,30000.0,47499.0,249.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
std,,,,,,,,,,
min,30000.0,47499.0,249.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
25%,30000.0,47499.0,249.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
50%,30000.0,47499.0,249.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
75%,30000.0,47499.0,249.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
max,30000.0,47499.0,249.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0



🔹 Group: ('4ff23a48b53d988df50ddfebb0e442a984ab8f94e874ef9b9cb34394e0c5d230', '86229868e6bde3661724fe02da93504bb4fb5da8c2550d7b7cf193c687e89fa6') — 1 rows


Unnamed: 0,estimated_audience_size,estimated_impressions,estimated_spend,scam_illuminating,election_integrity_Truth_illuminating,advocacy_msg_type_illuminating,issue_msg_type_illuminating,attack_msg_type_illuminating,image_msg_type_illuminating,cta_msg_type_illuminating
count,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
mean,75000.0,22499.0,49.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0
std,,,,,,,,,,
min,75000.0,22499.0,49.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0
25%,75000.0,22499.0,49.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0
50%,75000.0,22499.0,49.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0
75%,75000.0,22499.0,49.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0
max,75000.0,22499.0,49.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0



🔹 Group: ('4ff23a48b53d988df50ddfebb0e442a984ab8f94e874ef9b9cb34394e0c5d230', '07b5aefc27e872e971f793e49aac38496fa62e484f3928e2b6a2b6e3e08cac8d') — 1 rows


Unnamed: 0,estimated_audience_size,estimated_impressions,estimated_spend,scam_illuminating,election_integrity_Truth_illuminating,advocacy_msg_type_illuminating,issue_msg_type_illuminating,attack_msg_type_illuminating,image_msg_type_illuminating,cta_msg_type_illuminating
count,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
mean,75000.0,32499.0,149.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
std,,,,,,,,,,
min,75000.0,32499.0,149.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
25%,75000.0,32499.0,149.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
50%,75000.0,32499.0,149.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
75%,75000.0,32499.0,149.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
max,75000.0,32499.0,149.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0




📊 Dataset: 2024_fb_posts_president_scored_anon.csv

--- Numeric Summary ---


Unnamed: 0,Likes,Comments,Shares,Love,Wow,Haha,Sad,Angry,Care,Post Views,Total Views,Total Views For All Crossposts,Sponsor Id,Sponsor Name,Sponsor Category,Overperforming Score,illuminating_scored_messageelection_integrity_Truth_illuminating,advocacy_msg_type_illuminating,issue_msg_type_illuminating,attack_msg_type_illuminating,image_msg_type_illuminating,cta_msg_type_illuminating,engagement_cta_subtype_illuminating,fundraising_cta_subtype_illuminating,voting_cta_subtype_illuminating,covid_topic_illuminating,economy_topic_illuminating,education_topic_illuminating,environment_topic_illuminating,foreign_policy_topic_illuminating,governance_topic_illuminating,health_topic_illuminating,immigration_topic_illuminating,lgbtq_issues_topic_illuminating,military_topic_illuminating,race_and_ethnicity_topic_illuminating,safety_topic_illuminating,social_and_cultural_topic_illuminating,technology_and_privacy_topic_illuminating,womens_issue_topic_illuminating,incivility_illuminating,scam_illuminating,freefair_illuminating,fraud_illuminating
count,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,16544.0,16544.0,16544.0,0.0,0.0,0.0,16544.0,0.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,19009.0,18060.0,19009.0,19009.0
mean,2377.7,901.58,320.54,413.88,5.87,105.72,10.17,20.06,34.93,6485.06,7461.85,3555.94,,,,-2.74,,0.55,0.46,0.22,0.15,0.13,0.09,0.02,0.02,0.05,0.09,0.01,0.02,0.04,0.03,0.05,0.04,0.0,0.01,0.02,0.03,0.06,0.0,0.03,0.13,0.02,0.0,0.01
std,11253.47,3681.98,1722.16,3730.94,52.95,942.03,418.33,156.02,790.1,90392.96,95976.28,88094.05,,,,7.81,,0.5,0.5,0.41,0.36,0.34,0.29,0.13,0.15,0.22,0.29,0.12,0.15,0.19,0.17,0.22,0.2,0.06,0.07,0.15,0.18,0.24,0.05,0.16,0.33,0.14,0.05,0.09
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,-198.75,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,31.0,8.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,-3.87,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,139.0,48.0,21.0,4.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,-1.62,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,738.0,354.0,102.0,70.0,3.0,29.0,2.0,9.0,6.0,0.0,0.0,0.0,,,,1.16,,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,351979.0,93872.0,76150.0,244482.0,4345.0,99276.0,56111.0,11814.0,85236.0,4276477.0,4462155.0,4499458.0,,,,246.78,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0



--- Categorical Summary ---


Unnamed: 0,unique,top,top_count
Facebook_Id,21,32fc18da91029ff09bf74fe9887eace6b5d2145809d583...,9013
post_id,19009,0001e4a1dbadf84f0f43719972b60597cc79f1908bf601...,1
Page Category,6,PERSON,9453
Page Admin Top Country,1,US,16280
Post Created,18951,2023-11-14 11:11:44 EST,2
Post Created Date,425,2024-10-31,103
Post Created Time,16102,17:30:01,7
Type,9,Link,7404
Total Interactions,5665,15,115
Video Share Status,3,owned,15738




📊 Grouped Summary for: 2024_fb_posts_president_scored_anon.csv | Grouped by: ['Page Admin Top Country']

🔹 Group: ('US',) — 16280 rows


Unnamed: 0,Likes,Comments,Shares,Love,Wow,Haha,Sad,Angry,Care,Post Views
count,16280.0,16280.0,16280.0,16280.0,16280.0,16280.0,16280.0,16280.0,16280.0,16280.0
mean,1478.92,695.04,226.16,483.17,6.85,123.41,11.87,23.41,40.78,6588.37
std,8224.84,3174.51,1378.69,4027.41,57.15,1016.87,452.01,168.36,853.63,91119.3
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,26.0,7.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
50%,107.0,37.0,17.0,11.0,1.0,5.0,0.0,2.0,1.0,0.0
75%,566.25,283.0,78.0,95.0,4.0,46.0,2.0,12.0,8.0,0.0
max,315973.0,93872.0,76150.0,244482.0,4345.0,99276.0,56111.0,11814.0,85236.0,4276477.0



🔹 Group: (nan,) — 0 rows


Unnamed: 0,Likes,Comments,Shares,Love,Wow,Haha,Sad,Angry,Care,Post Views
count,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,,,,,,,,,,
std,,,,,,,,,,
min,,,,,,,,,,
25%,,,,,,,,,,
50%,,,,,,,,,,
75%,,,,,,,,,,
max,,,,,,,,,,




📊 Dataset: 2024_tw_posts_president_scored_anon.csv

--- Numeric Summary ---


Unnamed: 0,retweetCount,replyCount,likeCount,quoteCount,viewCount,bookmarkCount,quoteId,inReplyToId,election_integrity_Truth_illuminating,advocacy_msg_type_illuminating,issue_msg_type_illuminating,attack_msg_type_illuminating,image_msg_type_illuminating,cta_msg_type_illuminating,engagement_cta_subtype_illuminating,fundraising_cta_subtype_illuminating,voting_cta_subtype_illuminating,covid_topic_illuminating,economy_topic_illuminating,education_topic_illuminating,environment_topic_illuminating,foreign_policy_topic_illuminating,governance_topic_illuminating,health_topic_illuminating,immigration_topic_illuminating,lgbtq_issues_topic_illuminating,military_topic_illuminating,race_and_ethnicity_topic_illuminating,safety_topic_illuminating,social_and_cultural_topic_illuminating,technology_and_privacy_topic_illuminating,womens_issue_topic_illuminating,incivility_illuminating,scam_illuminating,freefair_illuminating,fraud_illuminating
count,27304.0,27304.0,27304.0,27304.0,27304.0,27304.0,3287.0,3345.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,26034.0,27304.0,27304.0
mean,1322.06,1063.79,6913.69,128.08,507084.7,136.21,1.764298e+18,1.758286e+18,0.04,0.56,0.51,0.31,0.23,0.11,0.07,0.01,0.02,0.01,0.16,0.02,0.03,0.04,0.02,0.06,0.07,0.0,0.01,0.02,0.04,0.05,0.0,0.02,0.18,0.01,0.0,0.0
std,3405.0,3174.98,21590.31,1131.53,3212174.0,712.58,6.894687e+16,4.361197e+16,0.19,0.5,0.5,0.46,0.42,0.31,0.25,0.09,0.13,0.09,0.37,0.13,0.17,0.2,0.15,0.23,0.25,0.06,0.1,0.12,0.19,0.22,0.05,0.15,0.38,0.11,0.04,0.05
min,0.0,0.0,0.0,0.0,5.0,0.0,7.912639e+17,1.240067e+18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,84.0,43.0,393.0,5.0,27852.75,4.0,1.726459e+18,1.726801e+18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,333.0,131.0,1406.0,17.0,70942.0,21.0,1.756496e+18,1.746641e+18,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1071.0,501.25,5010.0,69.0,303663.0,76.0,1.816599e+18,1.789226e+18,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,144615.0,121270.0,915221.0,123320.0,333502800.0,42693.0,1.853576e+18,1.853531e+18,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0



--- Categorical Summary ---


Unnamed: 0,unique,top,top_count
id,27304,0000635d0c9e7bdf89dfc13811d080dbffac5489459435...,1
url,27304,0000179c6b90798f167528aaaaf67886a975ea68af78f4...,1
source,14,Twitter Web App,14930
createdAt,27014,2023-10-06 04:55:21,4
lang,12,en,27281
month_year,15,2024-10,3586
illuminating_scored_message,27136,36cb7d55fcf85362ca03f624c2f574f1f55f89db559b17...,21




📊 Grouped Summary for: 2024_tw_posts_president_scored_anon.csv | Grouped by: ['source']

🔹 Group: ('Twitter for iPhone',) — 8494 rows


Unnamed: 0,retweetCount,replyCount,likeCount,quoteCount,viewCount,bookmarkCount,quoteId,inReplyToId,election_integrity_Truth_illuminating,advocacy_msg_type_illuminating
count,8494.0,8494.0,8494.0,8494.0,8494.0,8494.0,1856.0,1798.0,8168.0,8168.0
mean,612.62,289.79,3741.96,43.78,245622.12,69.8,1.75062e+18,1.748088e+18,0.04,0.5
std,2155.93,1035.97,15956.71,190.69,954516.49,402.96,6.502825e+16,3.456482e+16,0.2,0.5
min,0.0,0.0,0.0,0.0,9.0,0.0,7.957424e+17,1.240067e+18,0.0,0.0
25%,16.0,11.0,97.0,1.0,11731.75,1.0,1.72198e+18,1.726772e+18,0.0,0.0
50%,82.5,57.0,413.0,6.0,35590.5,5.0,1.74154e+18,1.737525e+18,0.0,0.0
75%,383.0,202.75,2030.75,26.0,141409.0,28.0,1.787349e+18,1.764762e+18,0.0,1.0
max,67113.0,30215.0,588065.0,6552.0,31294362.0,22901.0,1.853576e+18,1.853134e+18,1.0,1.0



🔹 Group: ('Twitter Web App',) — 14930 rows


Unnamed: 0,retweetCount,replyCount,likeCount,quoteCount,viewCount,bookmarkCount,quoteId,inReplyToId,election_integrity_Truth_illuminating,advocacy_msg_type_illuminating
count,14930.0,14930.0,14930.0,14930.0,14930.0,14930.0,1390.0,1482.0,14199.0,14199.0
mean,1317.11,719.2,6669.75,120.05,474031.2,154.31,1.782319e+18,1.771888e+18,0.04,0.58
std,3571.55,2689.14,22475.18,1464.57,3966176.0,843.42,7.067792e+16,4.978459e+16,0.19,0.49
min,0.0,0.0,0.0,0.0,5.0,0.0,7.912639e+17,1.478769e+18,0.0,0.0
25%,188.0,60.0,694.0,7.0,34470.75,10.0,1.743129e+18,1.728961e+18,0.0,0.0
50%,418.0,132.5,1660.0,19.0,71613.0,28.0,1.799066e+18,1.771728e+18,0.0,1.0
75%,1021.0,363.0,4505.5,59.0,236467.5,86.0,1.827992e+18,1.822177e+18,0.0,1.0
max,144615.0,121270.0,915221.0,123320.0,333502800.0,42693.0,1.85311e+18,1.853531e+18,1.0,1.0



🔹 Group: ('Sprout Social',) — 2933 rows


Unnamed: 0,retweetCount,replyCount,likeCount,quoteCount,viewCount,bookmarkCount,quoteId,inReplyToId,election_integrity_Truth_illuminating,advocacy_msg_type_illuminating
count,2933.0,2933.0,2933.0,2933.0,2933.0,2933.0,29.0,0.0,2769.0,2769.0
mean,3524.79,5186.27,18173.41,426.12,1501097.31,230.8,1.778639e+18,,0.03,0.68
std,4584.68,5804.89,28810.23,855.02,3432962.0,581.85,3.237946e+16,,0.18,0.47
min,13.0,12.0,60.0,0.0,2647.0,0.0,1.696504e+18,,0.0,0.0
25%,719.0,1382.0,3708.0,75.0,286290.0,24.0,1.758534e+18,,0.0,0.0
50%,2198.0,3361.0,9480.0,195.0,659392.0,74.0,1.782752e+18,,0.0,1.0
75%,4671.0,6920.0,21318.0,454.0,1467341.0,204.0,1.801976e+18,,0.0,1.0
max,80331.0,60598.0,423537.0,17200.0,76522204.0,13271.0,1.851262e+18,,1.0,1.0


In [9]:
files = [
    "C:/Users/unnat/OneDrive/Desktop/RA/Task_04_Descriptive_Stats/data/period_03/2024_fb_ads_president_scored_anon.csv",
    "C:/Users/unnat/OneDrive/Desktop/RA/Task_04_Descriptive_Stats/data/period_03/2024_fb_posts_president_scored_anon.csv",
    "C:/Users/unnat/OneDrive/Desktop/RA/Task_04_Descriptive_Stats/data/period_03/2024_tw_posts_president_scored_anon.csv"
]

for file_path in files:
    df = pd.read_csv(file_path, nrows=1)  # Only load the header row
    print(f"\n📄 Columns in: {file_path.split('/')[-1]}")
    print(df.columns.tolist())



📄 Columns in: 2024_fb_ads_president_scored_anon.csv
['page_id', 'ad_id', 'ad_creation_time', 'bylines', 'currency', 'delivery_by_region', 'demographic_distribution', 'estimated_audience_size', 'estimated_impressions', 'estimated_spend', 'publisher_platforms', 'illuminating_scored_message', 'illuminating_mentions', 'scam_illuminating', 'election_integrity_Truth_illuminating', 'advocacy_msg_type_illuminating', 'issue_msg_type_illuminating', 'attack_msg_type_illuminating', 'image_msg_type_illuminating', 'cta_msg_type_illuminating', 'engagement_cta_subtype_illuminating', 'fundraising_cta_subtype_illuminating', 'voting_cta_subtype_illuminating', 'covid_topic_illuminating', 'economy_topic_illuminating', 'education_topic_illuminating', 'environment_topic_illuminating', 'foreign_policy_topic_illuminating', 'governance_topic_illuminating', 'health_topic_illuminating', 'immigration_topic_illuminating', 'lgbtq_issues_topic_illuminating', 'military_topic_illuminating', 'race_and_ethnicity_topic_i