In [1]:
!pip install polars tabulate


Defaulting to user installation because normal site-packages is not writeable


In [2]:
import polars as pl
from tabulate import tabulate

print(pl.__version__)
print("✅ Polars + tabulate installed OK!")


1.31.0
✅ Polars + tabulate installed OK!


In [3]:
"""
polars_stats_2024_fb_ads_president_scored_anon.py
Clean descriptive statistics with Polars for 2024_fb_ads_president_scored_anon.csv
Outputs ONLY to polars_stats_output.txt
"""

import polars as pl
import sys


filepath = r'C:\Users\vrush\Music\RA\period_03\2024_fb_ads_president_scored_anon.csv'
output_path = 'Output_polars_fb_ads_presidents.txt'

def print_section(title):
    print(f"\n{'='*10} {title} {'='*10}")

def main():
    with open(output_path, 'w', encoding='utf-8') as f:
        sys.stdout = f

        
        df = pl.read_csv(filepath)

        # Try to cast likely numeric columns if needed
        numeric_cols = ["estimated_audience_size", "estimated_impressions", "estimated_spend"]
        df = df.with_columns([
            pl.col(col).cast(pl.Float64, strict=False) for col in numeric_cols if col in df.columns
        ])

        
        print_section('Header')
        print('Header:', df.columns)

        
        print_section('Descriptive Statistics for Entire Dataset')
        print(df.describe())

        
        print_section('Value Counts for Non-Numeric Columns (Top 3)')
        for col in df.columns:
            if df[col].dtype == pl.Utf8:
                top_values = df.group_by(col).len().sort('len', descending=True).head(3)
                print(f"\nTop 3 for {col}:")
                print(top_values)

        
        print_section('Grouped by page_id (First 3 Groups)')
        unique_page_ids = df.select('page_id').unique().to_series().to_list()
        for page_id in unique_page_ids[:3]:
            group = df.filter(pl.col('page_id') == page_id)
            print(f"\nGroup: page_id={page_id} (Count: {group.height})")
            print(group.describe())

        
        print_section('Grouped by page_id and ad_id (First 3 Groups)')
        unique_combos = df.select(['page_id', 'ad_id']).unique().rows()
        for page_id, ad_id in unique_combos[:3]:
            group = df.filter((pl.col('page_id') == page_id) & (pl.col('ad_id') == ad_id))
            print(f"\nGroup: page_id={page_id}, ad_id={ad_id} (Count: {group.height})")
            print(group.describe())

        print_section('Script Completed')
        print('All Polars stats written to file. No output in notebook.')

        sys.stdout = sys.__stdout__  # Reset

if __name__ == "__main__":
    main()
