In [5]:
import pandas as pd
import sys
from tabulate import tabulate


pd.set_option('display.max_colwidth', 100)

filepath = r'C:\Users\vrush\Music\RA\period_03\2024_fb_posts_president_scored_anon.csv'
output_path = 'Output_Pandas_fb_posts_pandas_stats.txt'


group_columns = ['Facebook_Id', 'post_id']

def print_section(title, file):
    line = f"\n{'='*10} {title} {'='*10}\n"
    print(line)
    file.write(line)

def format_table(df):
    return tabulate(df, headers='keys', tablefmt='github', showindex=True)

def build_desc(df):
    desc = df.describe(include='all').transpose()
    desc['unique'] = df.nunique()
    desc['most_freq'] = df.apply(lambda x: x.value_counts().index[0] if not x.value_counts().empty else "-")
    desc['freq_count'] = df.apply(lambda x: x.value_counts().iloc[0] if not x.value_counts().empty else "-")

    desc = desc[['mean', 'min', 'max', 'unique', 'most_freq', 'freq_count']]
    desc = desc.rename(columns={
        'mean': 'Mean',
        'min': 'Min',
        'max': 'Max',
        'unique': 'Unique',
        'most_freq': 'Most Frequent',
        'freq_count': 'Freq Cnt'
    })

    for col in ['Mean', 'Min', 'Max']:
        desc[col] = pd.to_numeric(desc[col], errors='coerce').round(2)

    desc = desc.fillna('-')
    return desc

def main():
    df = pd.read_csv(filepath)

    with open(output_path, 'w', encoding='utf-8') as f:
        print_section('Header', f)
        header = f"Header: {list(df.columns)}"
        print(header)
        f.write(header + "\n\n")

        print_section('Descriptive Statistics for Entire Dataset', f)
        desc = build_desc(df)
        table = format_table(desc)
        print(table)
        f.write(table + "\n\n")

        print_section(f'Grouped by {group_columns} (First 3 Groups)', f)

        try:
            for i, (key, group) in enumerate(df.groupby(group_columns)):
                subtitle = f"\nGroup: {key} (Count: {len(group)})"
                print(subtitle)
                f.write(subtitle + "\n")
                gdesc = build_desc(group)
                table = format_table(gdesc)
                print(table)
                f.write(table + "\n\n")
                if i >= 2:
                    break
        except KeyError as e:
            print(f"\n❌ KeyError: {e}")
            print(f"Available columns: {list(df.columns)}")
            f.write(f"\n❌ KeyError: {e}\nAvailable columns: {list(df.columns)}\n")

        print_section('Script Completed', f)
        print('All stats saved in readable format.')
        f.write('All stats saved in readable format.\n')

if __name__ == "__main__":
    main()




Header: ['Facebook_Id', 'post_id', 'Page Category', 'Page Admin Top Country', 'Post Created', 'Post Created Date', 'Post Created Time', 'Type', 'Total Interactions', 'Likes', 'Comments', 'Shares', 'Love', 'Wow', 'Haha', 'Sad', 'Angry', 'Care', 'Video Share Status', 'Is Video Owner?', 'Post Views', 'Total Views', 'Total Views For All Crossposts', 'Video Length', 'Sponsor Id', 'Sponsor Name', 'Sponsor Category', 'Overperforming Score', 'illuminating_scored_messageelection_integrity_Truth_illuminating', 'advocacy_msg_type_illuminating', 'issue_msg_type_illuminating', 'attack_msg_type_illuminating', 'image_msg_type_illuminating', 'cta_msg_type_illuminating', 'engagement_cta_subtype_illuminating', 'fundraising_cta_subtype_illuminating', 'voting_cta_subtype_illuminating', 'covid_topic_illuminating', 'economy_topic_illuminating', 'education_topic_illuminating', 'environment_topic_illuminating', 'foreign_policy_topic_illuminating', 'governance_topic_illuminating', 'health_topic_illuminating'