In [1]:
"""
polars_stats_fb_posts_president.py
Clean descriptive stats for FB posts using Polars.
"""

import polars as pl
import sys


filepath = r'C:\Users\vrush\Music\RA\period_03\2024_fb_posts_president_scored_anon.csv'
output_path = 'Output_Polars_fb_posts_stats.txt'


group_columns = ['Facebook_Id', 'post_id']

def print_section(title):
    print(f"\n{'='*10} {title} {'='*10}")

def main():
    
    with open(output_path, 'w', encoding='utf-8') as f:
        sys.stdout = f

        # ---------- LOAD ----------
        df = pl.read_csv(filepath)
        print_section('Header')
        print(f"Header: {df.columns}\n")

        
        print_section('Descriptive Statistics for Entire Dataset')
        print(df.describe())

        
        print_section('Value Counts for Facebook_Id (Top 3)')
        vc = df.group_by('Facebook_Id').len().sort('len', descending=True).head(3)
        print(vc)

        
        print_section(f'Grouped by {group_columns} (First 3 Groups)')
        try:
            unique_pairs = df.select(group_columns).unique().rows()
            for i, pair in enumerate(unique_pairs[:3]):
                facebook_id, post_id = pair
                group = df.filter(
                    (pl.col('Facebook_Id') == facebook_id) & (pl.col('post_id') == post_id)
                )
                print(f"\nGroup: Facebook_Id={facebook_id}, post_id={post_id} (Count: {len(group)})")
                print(group.describe())
        except pl.ComputeError as e:
            print(f"\n❌ Polars error: {e}")
            print(f"Available columns: {df.columns}")

        print_section('Script Completed')
        print('All stats saved in readable format.')
        sys.stdout = sys.__stdout__

if __name__ == "__main__":
    main()
