In [3]:
pip install tabulate


In [7]:
"""
pandas_stats_fb_ads_president_tabulate.py
Clean descriptive stats with Pandas + tabulate, no StdDev, no Count, rounded to 2 decimals, no head().
"""

import pandas as pd
import sys
from tabulate import tabulate


pd.set_option('display.max_colwidth', 100)

filepath = r'C:\Users\vrush\Music\RA\period_03\2024_fb_ads_president_scored_anon.csv'
output_path = 'fb_ads_pandas_stats_output.txt'

def print_section(title, file):
    line = f"\n{'='*10} {title} {'='*10}\n"
    print(line)
    file.write(line)

def format_table(df):
    return tabulate(df, headers='keys', tablefmt='github', showindex=True)

def build_desc(df):
    desc = df.describe(include='all').transpose()
    desc['unique'] = df.nunique()
    desc['most_freq'] = df.apply(lambda x: x.value_counts().index[0] if not x.value_counts().empty else "-")
    desc['freq_count'] = df.apply(lambda x: x.value_counts().iloc[0] if not x.value_counts().empty else "-")

    
    desc = desc[['mean', 'min', 'max', 'unique', 'most_freq', 'freq_count']]
    desc = desc.rename(columns={
        'mean': 'Mean',
        'min': 'Min',
        'max': 'Max',
        'unique': 'Unique',
        'most_freq': 'Most Frequent',
        'freq_count': 'Freq Cnt'
    })

    # Round numeric columns
    for col in ['Mean', 'Min', 'Max']:
        desc[col] = pd.to_numeric(desc[col], errors='coerce').round(2)

    desc = desc.fillna('-')
    return desc

def main():
    df = pd.read_csv(filepath)

    with open(output_path, 'w', encoding='utf-8') as f:
        # ---------- HEADER ----------
        print_section('Header', f)
        header = f"Header: {list(df.columns)}"
        print(header)
        f.write(header + "\n\n")

        # ---------- ENTIRE DATASET ----------
        print_section('Descriptive Statistics for Entire Dataset', f)
        desc = build_desc(df)
        table = format_table(desc)
        print(table)
        f.write(table + "\n\n")

        # ---------- GROUP BY page_id ----------
        print_section('Grouped by page_id (First 3 Groups)', f)
        for i, (page_id, group) in enumerate(df.groupby('page_id')):
            subtitle = f"\nGroup: page_id={page_id}"
            print(subtitle)
            f.write(subtitle + "\n")
            gdesc = build_desc(group)
            table = format_table(gdesc)
            print(table)
            f.write(table + "\n\n")
            if i >= 2:
                break

        # ---------- GROUP BY page_id and ad_id ----------
        print_section('Grouped by page_id and ad_id (First 3 Groups)', f)
        for i, ((page_id, ad_id), group) in enumerate(df.groupby(['page_id', 'ad_id'])):
            subtitle = f"\nGroup: page_id={page_id}, ad_id={ad_id}"
            print(subtitle)
            f.write(subtitle + "\n")
            gdesc = build_desc(group)
            table = format_table(gdesc)
            print(table)
            f.write(table + "\n\n")
            if i >= 2:
                break

        print_section('Script Completed', f)
        print('All stats saved, rounded to 2 decimals, no Count, no head().')
        f.write('All stats saved, rounded to 2 decimals, no Count, no head().\n')

if __name__ == "__main__":
    main()
