<a href="https://colab.research.google.com/github/Kunal-Ahirrao/Task_04_Descriptive_Stats/blob/main/pure_python_stats_twitter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
import csv
import statistics
from collections import defaultdict, Counter

def clean_number(value):
    try:
        return float(value.replace(",", "").strip())
    except:
        return None

def read_csv(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        return list(csv.DictReader(f))

def compute_stats(data, group_by=None):
    output = []
    groups = defaultdict(list)

    for row in data:
        key = tuple(row[col] for col in group_by) if group_by else "all"
        groups[key].append(row)

    for group, rows in list(groups.items())[:10]:  # limit to first 10 groups
        output.append(f"\n=== Group: {group} ===")
        keys = rows[0].keys()

        for key in keys:
            values = [row[key] for row in rows if row[key].strip() != ""]
            float_vals = [clean_number(v) for v in values if clean_number(v) is not None]

            if float_vals:
                output.append(f"{key} | count={len(float_vals)}, mean={round(statistics.mean(float_vals), 2)}, min={min(float_vals)}, max={max(float_vals)}")
                if len(float_vals) > 1:
                    output.append(f"{key} | std_dev={round(statistics.stdev(float_vals), 2)}")
            else:
                freq = Counter(values)
                if freq:
                    output.append(f"{key} | unique={len(freq)}, top={freq.most_common(1)}")
    return output

# Load data
data = read_csv("/content/2024_tw_posts_president_scored_anon.csv")
output = []

# Overall
output.append("=== Overall Stats ===")
output.extend(compute_stats(data))

# Group by account_type
columns = data[0].keys()
if "account_type" in columns:
    output.append("\n=== Grouped by account_type ===")
    output.extend(compute_stats(data, group_by=["account_type"]))
else:
    output.append("Column 'account_type' not found — skipping.\n")

# Group by account_id and post_id
if "account_id" in columns and "post_id" in columns:
    output.append("\n=== Grouped by account_id and post_id ===")
    output.extend(compute_stats(data, group_by=["account_id", "post_id"]))
else:
    output.append("Missing 'account_id' or 'post_id' — skipping group.\n")

# Save
with open("pure_python_output_twitter.txt", "w", encoding="utf-8") as f:
    f.write("\n".join(output))

print("✅ pure_python_output_twitter.txt saved.")


✅ pure_python_output_twitter.txt saved.
