In [None]:
import csv
import math
from collections import defaultdict, Counter


file_path = r'C:\Users\vrush\Music\RA\period_03\2024_fb_posts_president_scored_anon.csv'
group_columns = ['page_id', 'Facebook_Id']
output_file = 'fb_posts_analysis.txt'


def load_csv(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        data = [row for row in reader]
    return data, reader.fieldnames


def is_float(value):
    try:
        float(value)
        return True
    except ValueError:
        return False

def get_column_values(data, column):
    return [row[column] for row in data if row[column] != '']

def analyze_column(values):
    stats = {}
    numeric_values = [float(v) for v in values if is_float(v)]
    non_numeric_values = [v for v in values if not is_float(v)]

    stats['count'] = len(values)

    if numeric_values:
        stats['mean'] = round(sum(numeric_values) / len(numeric_values), 2)
        stats['min'] = round(min(numeric_values), 2)
        stats['max'] = round(max(numeric_values), 2)
        if len(numeric_values) > 1:
            mean = stats['mean']
            variance = sum((x - mean) ** 2 for x in numeric_values) / (len(numeric_values) - 1)
            stats['std_dev'] = round(math.sqrt(variance), 2)
        else:
            stats['std_dev'] = "-"
    else:
        stats['mean'] = "-"
        stats['min'] = "-"
        stats['max'] = "-"
        stats['std_dev'] = "-"

    if non_numeric_values:
        counter = Counter(non_numeric_values)
        stats['unique_count'] = len(counter)
        most_common_value, count = counter.most_common(1)[0]
        stats['most_common'] = f"{most_common_value} (Count: {count})"
    else:
        stats['unique_count'] = "-"
        stats['most_common'] = "-"

    return stats

def truncate(value, length=40):
    value = str(value)
    if len(value) > length:
        return value[:length] + '...'
    return value

def print_table(headers, rows, title=None, file=None):
    if title:
        print(f"\n===== {title} =====")
        if file:
            file.write(f"\n===== {title} =====\n")

    rows_trunc = []
    for row in rows:
        row_trunc = []
        for i, cell in enumerate(row):
            if headers[i] == "Most Frequent":
                row_trunc.append(truncate(cell, length=40))
            else:
                row_trunc.append(truncate(cell))
        rows_trunc.append(row_trunc)

    col_widths = []
    for i, col in enumerate(zip(headers, *rows_trunc)):
        if headers[i] == "Column":
            col_widths.append(30)
        elif headers[i] in ["Count", "Unique"]:
            col_widths.append(8)
        elif headers[i] in ["Mean", "Min", "Max", "Std"]:
            col_widths.append(10)
        elif headers[i] == "Most Frequent":
            col_widths.append(40)
        else:
            col_widths.append(max(len(str(item)) for item in col))

    header_row = " | ".join(
        f"{headers[i]:<{col_widths[i]}}" for i in range(len(headers))
    )
    print(header_row)
    print("-" * len(header_row))
    if file:
        file.write(header_row + "\n")
        file.write("-" * len(header_row) + "\n")

    for row in rows_trunc:
        line = " | ".join(
            f"{row[i]:<{col_widths[i]}}" if headers[i] in ["Column", "Most Frequent"]
            else f"{row[i]:>{col_widths[i]}}"
            for i in range(len(row))
        )
        print(line)
        if file:
            file.write(line + "\n")

def analyze_groups(data, fieldnames, group_cols, file=None):
    groups = defaultdict(list)
    for row in data:
        key = tuple(row.get(col, 'NA') for col in group_cols)
        groups[key].append(row)

    for key, rows_data in groups.items():
        title = f"Group: {key} (Count: {len(rows_data)})"
        rows = []
        headers = ["Column", "Count", "Mean", "Min", "Max", "Std", "Unique", "Most Frequent"]
        for col in fieldnames:
            values = get_column_values(rows_data, col)
            stats = analyze_column(values)
            row = [
                col,
                str(stats.get('count', '-')),
                str(stats.get('mean', '-')),
                str(stats.get('min', '-')),
                str(stats.get('max', '-')),
                str(stats.get('std_dev', '-')),
                str(stats.get('unique_count', '-')),
                stats.get('most_common', '-')
            ]
            rows.append(row)
        print_table(headers, rows, title=title, file=file)


data, fieldnames = load_csv(file_path)
print(f"Fieldnames: {fieldnames}")
print(f"Total Rows: {len(data)}")

with open(output_file, 'w', encoding='utf-8') as f:
    f.write(f"Total Rows: {len(data)}\n")
    analyze_groups(data, fieldnames, group_columns, file=f)

print(f"\n✅ Grouped results saved to {output_file}")