In [2]:
import json
import pandas as pd
from collections import Counter

# Load the data
file_path = 'tagged_reddit_data.json'
with open(file_path, 'r') as f:
    data = json.load(f)

# Prepare a structure to aggregate statistics
subreddit_stats = []
for subreddit, details in data.items():
    total_posts = len(details['posts'])
    total_comments = sum(len(post['comments']) for post in details['posts'])
    
    # Count politeness strategies in posts and comments
    post_strategy_counts = Counter(post['politeness_tag']['strategy'] for post in details['posts'] if post['politeness_tag'])
    comment_strategy_counts = Counter(comment['politeness_tag']['strategy'] for post in details['posts'] for comment in post['comments'] if comment['politeness_tag'])
    
    # Merge the two counts
    combined_counts = post_strategy_counts + comment_strategy_counts
    
    subreddit_stats.append({
        'Subreddit': subreddit,
        'Total Posts': total_posts,
        'Total Comments': total_comments,
        'Positive Politeness': combined_counts.get('Positive Politeness', 0),
        'Negative Politeness': combined_counts.get('Negative Politeness', 0),
        'Bald On Record': combined_counts.get('Bald On Record', 0),
        'Off-Record': combined_counts.get('Off-Record', 0),
        'Unknown': combined_counts.get('Unknown', 0)
    })

# Create a DataFrame for better visualization
stats_df = pd.DataFrame(subreddit_stats)

stats_df


Unnamed: 0,Subreddit,Total Posts,Total Comments,Positive Politeness,Negative Politeness,Bald On Record,Off-Record,Unknown
0,LegalAdvice,80,259,28,98,185,10,18
1,TooAfraidToAsk,80,631,142,72,424,66,7
2,relationshipAdvice,80,342,95,158,157,10,2
3,ExplainLikeImFive,80,695,87,33,560,78,17
4,Awww,80,371,253,10,138,38,12
5,ChangeMyView,80,781,91,89,626,27,28
6,books,80,672,286,37,316,94,19
7,gaming,80,757,292,42,415,76,12
