# BLM Activity and Sentiment Analysis

Steps:

1. Divide each major period into 3 sub-periods using change point analysis. This was performed in `tweet_counts.ipynb`.
2. For each period...
   * Process all tweets 
      * Process each tweet through `TweetsManager.process_tweet()`
      * Call `TweetsManager.process_deferred_interactions()` to handle retweet/reply corner cases.
      * Call `TweetsManager.analyze_graph()` to detect communities.
   * Discard too-small communities.
   * Assign BLM stance to communities.
      * Generate reports on largest communities.
      * Manually assign stance to those communities.
      * Build/use prediction model for remaining communities.
   * Save everything.

In [1]:
from   collections import Counter, defaultdict
import json
import numpy as np
from   os import path
import pandas as pd
import re
from   string import Template
from   typing import List

from elasticsearch import Elasticsearch as ES
from elasticsearch.helpers import scan

from blm_activity_db import BlmActivityDb
from community_classifier import get_blm_classifier, get_three_class_classifier
from community_report import generate_init_community_report
from tweet_mgr import TweetsManager, CommunityActivity, Stance
from tweet_sentiment import EmoScores, PronounCounts, SentimentAnalysis


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\chris\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\chris\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\chris\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\chris\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
# Configurable parameters
start_date = "2020-06-14"
end_date = "2020-10-31"
es_idx = 'tweets2'
# Template for query_body
#query_body = {
#  "query": {
#      "range": {
#          "doc.created_at": {
#            "gte": "Sun Nov 23 00:00:00 +0000 2014",
#            "lt": "Wed Dec 24 00:00:00 +0000 2014"
#          }
#      }
#  }
#}

query_body = {
  "query": {
      "range": {
          "doc.created_at": {
              "gte": "Sun Jun 14 00:00:00 +0000 2020",
              "lt": "Sun Nov 01 00:00:00 +0000 2020"
          }
      }
  }
}

period = 6
num_init_communities = 40
num_exemplars = 10



In [3]:
tm = TweetsManager()

es = ES(hosts=["localhost"])
scan_iter = scan(es, index=es_idx, query=query_body)
for result in scan_iter:
    tweet = result['_source']
    tm.process_tweet(tweet)

In [4]:
def get_tweet_text_by_id(id_):
    doc = es.get(index=es_idx, id=id_)
    return doc['_source']['doc']['text']

In [5]:
tm.process_deferred_interactions()
tm.analyze_graph(n_iterations=10)

In [6]:
community_size = Counter() # num_accounts -> num of communities having that size 
unique_tweets = Counter() # num_unique_tweets -> num of communities with that activity
for _, accounts in tm.community_user_map.items():
    community_size[len(accounts)] += 1
for c_activity in tm.community_activity_map.values():
    num_unique_tweets = c_activity.num_tweets - len(c_activity.retweet_sentiment_analyses)
    unique_tweets[num_unique_tweets] += 1


In [7]:
total_unique_tweets = {k: k*v for k, v in unique_tweets.items()}
for k, v in total_unique_tweets.items():
    print(k, ": ", v)

75642 :  75642
56276 :  56276
9003 :  9003
51336 :  51336
23918 :  23918
11560 :  11560
5214 :  5214
1674 :  1674
2732 :  2732
1418 :  1418
1354 :  1354
1038 :  1038
1698 :  1698
3981 :  3981
1525 :  1525
2535 :  2535
3644 :  3644
1727 :  1727
595 :  595
3888 :  3888
108 :  216
1668 :  1668
997 :  997
2085 :  2085
1020 :  1020
2393 :  2393
380 :  380
916 :  916
1835 :  1835
2408 :  2408
3282 :  3282
1759 :  1759
5409 :  5409
683 :  683
1481 :  1481
2076 :  2076
2694 :  2694
2224 :  2224
3390 :  3390
1131 :  1131
1673 :  1673
1326 :  1326
3007 :  3007
751 :  751
2998 :  2998
75 :  75
678 :  678
2044 :  2044
985 :  985
831 :  831
1461 :  1461
866 :  866
1715 :  1715
2182 :  2182
2950 :  2950
1986 :  1986
833 :  833
2165 :  2165
576 :  576
1889 :  1889
153 :  153
1220 :  2440
742 :  742
1316 :  1316
1214 :  2428
1064 :  1064
788 :  788
3669 :  3669
583 :  583
1795 :  1795
734 :  734
898 :  898
1158 :  1158
1008 :  1008
103 :  103
165 :  165
837 :  837
1684 :  1684
609 :  609
944 :  944
14

In [8]:
print("total tweets:", len(tm.tweets))
print("total unique tweets:", sum(v for v in total_unique_tweets.values()))
print(len(tm.user_community_map), "accounts in", len(tm.community_user_map), "communities")
print("If 20 used as threshold for unique tweets per community...")
num_communities = sum(v for k, v in unique_tweets.items() if k >= 20)
num_unique_tweets = sum(v for k, v in total_unique_tweets.items() if k >= 20)
print(num_communities, "communities and", num_unique_tweets, "unique tweets remain.")

total tweets: 1758603
total unique tweets: 487201
708918 accounts in 38819 communities
If 20 used as threshold for unique tweets per community...
426 communities and 409735 unique tweets remain.


In [9]:
tm.filter_low_activity_communities(unique_tweets_threshold=20)

In [10]:
# Get Initial Report
report_dir = f'../data/Reports/{period}/'
report_name = "Largest Communities Hashtags and Tweets"
report_path = report_dir + f"{report_name}.md"

report = f"# {report_name} in Period {period}\n\n"

# Add section for each of top num_init_communities by membership count
comm_user_counts = sorted(tm.community_user_map.items(), key=lambda x: len(x[1]), reverse=True)

# derive inter-community replies, retweets
top_community_ids = set(x[0] for i, x in enumerate(comm_user_counts) if i < num_init_communities)
reply_counter = Counter()
replied_to_counter = Counter()
retweeted_counter = Counter()
for (_, comm_reply), count in tm.inter_comm_reply_counter.items():
    if comm_reply.replying in top_community_ids:
        reply_counter[comm_reply.replying] += count
    if comm_reply.replied_to in top_community_ids:
        replied_to_counter[comm_reply.replied_to] += count
for (_, comm_retweet), count in tm.inter_comm_retweet_counter.items():
    if comm_retweet.retweeted in top_community_ids:
        retweeted_counter[comm_retweet.retweeted] += count

# extract other community metrics, create report section
for k, (comm_id, members) in enumerate(comm_user_counts):
    if k == num_init_communities:
        break
    num_members = len(members)
    ca: CommunityActivity = tm.community_activity_map[comm_id]
    num_tweets = ca.num_tweets
    num_retweets = sum(count for count in ca.retweet_counter.values())
    # influence ranks
    ranks = []
    for member in members:
        ranks.append(tm.user_activity[member].influence_rank)
    top10_influence_ranks = sorted(ranks)[:10]
    # memes
    hashtags = []
    ht_counts = []
    meme_counts = sorted(ca.meme_counter.items(), key=lambda x:x[1], reverse=True)
    for i, (tag, count) in enumerate(meme_counts):
        if i == num_exemplars:
            break
        hashtags.append(tag)
        ht_counts.append(count)
    # retweets
    tweet_ids = []
    rt_counts = []
    retweet_counts = sorted(ca.retweet_counter.items(), key=lambda x:x[1], reverse=True)
    for i, (tweet_id, count) in enumerate(retweet_counts):
        if i == num_exemplars:
            break
        tweet_ids.append(tweet_id)
        rt_counts.append(count)
    rts = []
    for id_ in tweet_ids:
        if id_ in tm.tweets:
            rts.append(tm.tweets[id_])
        else:
            rts.append(get_tweet_text_by_id(id_))
    report += generate_init_community_report(
        comm_id,
        num_members,
        num_tweets,
        num_retweets,
        hashtags, 
        ht_counts, 
        rts, 
        rt_counts,
        retweeted_counter[comm_id],
        reply_counter[comm_id],
        replied_to_counter[comm_id],
        top10_influence_ranks,
)
with open(report_path, 'w', encoding="utf-8") as f:
    f.write(report)

In [14]:
counter_comm_ids = [0, 4, 12, 16,  21, 32, 38]
excluded_comm_ids = []

In [15]:
# blm_comm_ids = [i for i in range(num_init_communities) if not i in counter_comm_ids]
blm_comm_ids = [x[0] for i, x in enumerate(comm_user_counts) 
                if i < num_init_communities and
                x[0] not in counter_comm_ids and
                x[0] not in excluded_comm_ids]

In [16]:
   
def get_tweet_texts(tweet_ids, tm):
    texts = []
    for id_ in tweet_ids:
        if id_ in tm.tweets:
            texts.append(tm.tweets[id_])
        else:
            texts.append(get_tweet_text_by_id(id_))
    return texts

blm_retweet_set = set()
counter_retweet_set = set()
excluded_retweet_set = set()
for k, (comm_id, _ ) in enumerate(comm_user_counts):
    if k == num_init_communities:
        break
    num_exemplars = 288 if comm_id not in blm_comm_ids else 18
    retweet_counts = sorted(
        tm.community_activity_map[comm_id].retweet_counter.items(), 
        key=lambda x:x[1], 
        reverse=True
    )
    for i, (tweet_id, _ ) in enumerate(retweet_counts):
        if i == num_exemplars:
            break
        if comm_id in counter_comm_ids:
            counter_retweet_set.add(tweet_id)
        elif comm_id in excluded_comm_ids:
            excluded_retweet_set.add(tweet_id)
        else:
            blm_retweet_set.add(tweet_id)
blm_retweets = get_tweet_texts(blm_retweet_set, tm)
counter_retweets = get_tweet_texts(counter_retweet_set, tm)
excluded_retweets = get_tweet_texts(excluded_retweet_set, tm)
blm_clf, cv_results = get_blm_classifier(blm_retweets, counter_retweets)    

In [17]:
results_df = pd.DataFrame(cv_results)
print(results_df)

   mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0       5.466185      0.182284         1.340854        0.084047   
1       5.400599      0.044618         1.351356        0.032278   
2       5.524687      0.099842         1.386248        0.053888   

  param_model__alpha param_model__fit_prior  \
0                  1                  False   
1                  1                  False   
2                  1                  False   

                               param_vec__stop_words  \
0          [rt, #blacklivesmatter, blacklivesmatter]   
1  [rt, blacklivesmatter, #blacklivesmatter, i, m...   
2  [i, me, my, myself, we, our, ours, ourselves, ...   

                                              params  split0_test_score  \
0  {'model__alpha': 1.0, 'model__fit_prior': Fals...           0.894636   
1  {'model__alpha': 1.0, 'model__fit_prior': Fals...           0.896552   
2  {'model__alpha': 1.0, 'model__fit_prior': Fals...           0.894636   

   split1_test_s

In [18]:
num_samples_per_comm = 20
for k, (comm_id, _ ) in enumerate(comm_user_counts):
    if k < num_init_communities:
        continue
    sample_tweet_ids = []
    counts = []
    retweet_counts = sorted(
        tm.community_activity_map[comm_id].retweet_counter.items(), 
        key=lambda x:x[1], 
        reverse=True
    )
    for i, (tweet_id, count) in enumerate(retweet_counts):
        if i == num_samples_per_comm:
            break
        sample_tweet_ids.append(tweet_id)
        counts.append(count)
    if sum(counts) < num_samples_per_comm:
        excluded_comm_ids.append(comm_id)
        continue
    sample_tweets = get_tweet_texts(sample_tweet_ids, tm)
    stance_predictions = blm_clf.predict(sample_tweets)
    weighted_sum = np.dot(np.array(counts), np.array(stance_predictions))
    stance_probability = weighted_sum / sum(counts)
    if stance_probability < -0.3:
        counter_comm_ids.append(comm_id)
    elif stance_probability > 0.3:
        blm_comm_ids.append(comm_id)
    else:
        excluded_comm_ids.append(comm_id)

In [19]:
print(f"counter communities: {counter_comm_ids}")
print(f"{len(excluded_comm_ids)} unknown communities")
print(f"{len(blm_comm_ids)} BLM communities")

counter communities: [0, 4, 12, 16, 21, 32, 38]
294 unknown communities
125 BLM communities


In [20]:
for id_ in counter_comm_ids:
    tm.community_activity_map[id_].stance = Stance.CounterProtest
for id_ in excluded_comm_ids:
    tm.community_activity_map[id_].stance = Stance.Unknown
for id_ in blm_comm_ids:
    tm.community_activity_map[id_].stance = Stance.Protest

In [21]:
db = BlmActivityDb() 
db.save_tweets_mgr(tm, period)


In [22]:
num_examples = 25 # i.e, number of tweets or memes to display
movement_template = Template('''
## MOVEMENT $movement

Communities: $num_communities  
Members: $num_members  
Retweets: $num_retweets  
Tweets: $num_tweets

### Top Hashtags

| Count | Hashtag |
|------:|:------|
$hashtag_list

### Top Retweets

| Count | Tweet |
|------:|:------|
$retweet_list

### Sentiment

All Tweet Polarity = $at_polarity  
Retweet Polarity = $rt_polarity

### Emotions

| Emotion | All Tweets | Retweets |
|------:|:------:|:-------|
$emo_list

### Pronoun Usage

| Person | All Tweets | Retweets |
|------:|:------:|:-------|
$pronoun_columns

''')

def printable_top_hashtag_list(meme_counter):
    top_memes = sorted(meme_counter.items(), key = lambda x: x[1], reverse = True)
    hashtag_list = ""
    for i, (ht, count) in enumerate(top_memes):
        if i == num_examples:
            break
        hashtag_list += f"| {count} | {ht} |\n"
    return hashtag_list


line_feeds = re.compile("[\r\n]")
def printable_top_retweet_list(retweet_counter):
    top_retweets = sorted(retweet_counter.items(), key = lambda x: x[1], reverse = True)
    retweet_list = ""
    for i, (tweet_id, count) in enumerate(top_retweets):
        if i == num_examples:
            break
        tweet = get_tweet_text_by_id(tweet_id)
        tweet = line_feeds.sub('', tweet)
        retweet_list += f"| {count} | {tweet} |\n"
    return retweet_list


def printable_emo_scores_columns(left: EmoScores, right: EmoScores):
    emo_list = ""
    emo_list += f"| trust | {round(left.trust, 3)} | {round(right.trust, 3)} |\n"
    emo_list += f"| anticipation | {round(left.anticipation, 3)} | {round(right.anticipation, 3)} |\n"
    emo_list += f"| joy | {round(left.joy, 3)} | {round(right.joy, 3)} |\n"
    emo_list += f"| surprise | {round(left.surprise, 3)} | {round(right.surprise, 3)} |\n"
    emo_list += f"| anger | {round(left.anger, 3)} | {round(right.anger, 3)} |\n"
    emo_list += f"| disgust | {round(left.disgust, 3)} | {round(right.disgust, 3)} |\n"
    emo_list += f"| fear | {round(left.fear, 3)} | {round(right.fear, 3)} |\n"
    emo_list += f"| sadness | {round(left.sadness, 3)} | {round(right.sadness, 3)} |\n" 
    return emo_list  


def printable_pronoun_usage_columns(left: PronounCounts, right: PronounCounts):
    printable_columns = ""
    printable_columns += f"| First Singular | {round(left.first_singular, 3)} | {round(right.first_singular, 3)} |\n"
    printable_columns += f"| First Plural | {round(left.first_plural, 3)} | {round(right.first_plural, 3)} |\n"
    printable_columns += f"| Second | {round(left.second,3)} | {round(right.second, 3)} |\n"
    printable_columns += f"| Third | {round(left.third, 3)} | {round(right.third, 3)} |\n"
    return printable_columns  


def store_movement_reports(movement, report_dir, comm_ids, tm):
    '''Write files with salient data on BLM or counter movement during a period
    
    Parameters:
    -----------
    movement : str
        "BLM" or "Counter"
    report_dir : str
        FS directory where reports are to be written
    comm_ids : list of int
        IDs of communities in movement
    tm : TweetManager instance
    '''
    # movement stats
    ## counts
    num_communities = len(comm_ids)
    if num_communities == 0:
        return
    num_members = 0
    total_tweets = 0
    total_retweets = 0
    meme_counter = Counter()
    retweet_counter = Counter()
    retweet_pc, retweet_emo, retweet_sentiment = PronounCounts(), EmoScores(), 0.0
    all_tweet_pc, all_tweet_emo, all_tweet_sentiment = PronounCounts(), EmoScores(), 0.0
    for community_id in comm_ids:
        num_members += len(tm.community_user_map[community_id])
        c_activity = tm.community_activity_map[community_id]
        total_tweets += c_activity.num_tweets
        for tweet_id, count in c_activity.retweet_counter.items():
            total_retweets += count
            retweet_counter[tweet_id] += count
        for meme, count in c_activity.meme_counter.items():
            meme_counter[meme] += count
        rss = c_activity.retweet_sentiment_summary
        num_retweets = len(c_activity.retweet_sentiment_analyses)
        retweet_pc += rss.pronoun_counts * num_retweets
        retweet_emo += rss.emo_scores * num_retweets
        retweet_sentiment += rss.sentiment * num_retweets
        atss = c_activity.all_sentiment_summary
        all_tweet_pc += atss.pronoun_counts * c_activity.num_tweets
        all_tweet_emo += atss.emo_scores * c_activity.num_tweets
        all_tweet_sentiment += atss.sentiment * c_activity.num_tweets
    
    retweet_pc /= total_retweets
    retweet_emo /= total_retweets
    retweet_sentiment /= total_retweets
    all_tweet_pc /= total_tweets
    all_tweet_emo /= total_tweets
    all_tweet_sentiment /= total_tweets

    ## 25 most important hashtags             
    hashtag_list = printable_top_hashtag_list(meme_counter)
    
    ## 25 most retweeted
    retweet_list = printable_top_retweet_list(retweet_counter)
    
    ## emotions
    emo_list = printable_emo_scores_columns(left=all_tweet_emo, right=retweet_emo)
    
    ## Write to file
    subs = {
        "movement": movement,
        "num_communities": num_communities,
        "num_members": num_members,
        "num_tweets": total_tweets,
        "num_retweets": total_retweets,
        "hashtag_list": hashtag_list,
        "retweet_list": retweet_list,
        "at_polarity": round(all_tweet_sentiment, 3),
        "rt_polarity": round(retweet_sentiment, 3),
        "emo_list": emo_list,
        "pronoun_columns": printable_pronoun_usage_columns(all_tweet_pc, retweet_pc),
    }    
    movement_summary = movement_template.safe_substitute(subs)
    report_name = f"{movement}_summary.md"
    report_path = path.join(report_dir, report_name)
    with open(report_path, 'w', encoding="utf-8") as f:
        f.write(movement_summary)
        
store_movement_reports("Counter", report_dir, counter_comm_ids, tm)
store_movement_reports("BLM", report_dir, blm_comm_ids, tm)


In [23]:
# serialize the graph
graph_file_name = "graph.pkl"
dir_stem = "D:/BLM-db/graphs/" + f"{period}/"  
graph_file_path = path.join(dir_stem, graph_file_name)
tm.urg.g.write_pickle(graph_file_path, version = -1)

In [24]:
stance_and_previous_activity_template = Template("""
## Analysis by Previous Activity for Stance $stance

Number of previously active accounts: $num_experienced  
Number of first-time accounts:        $num_noob

### Activity 

| Activity | No Previous Activity | Previously Active |
|------:|:------:|:-------|
| Avg Tweets | $noob_tweets | $experienced_tweets |
| Avg Retweets | $noob_retweets | $experienced_retweets |
| Avg Replies | $noob_replies | $experienced_replies |

### Sentiment Analysis

| Measure | No Previous Activity | Previously Active |
|------:|:------:|:-------|
| Avg Sentiment | $noob_sentiment | $experienced_sentiment |
$emo_list

### Pronoun Usage

| Pronoun | No Previous Activity | Previously Active |
|------:|:------:|:-------|
$pronoun_columns

### Top Memes

#### No Previous Activity

| Count | Hashtag |
|------:|:------|
$noob_hashtag_list

#### Previously Active

| Count | Hashtag |
|------:|:------|
$experienced_hashtag_list

""")

global_experienced_accounts = db.get_account_list(end_period = period - 1)
global_experienced_accounts = set(global_experienced_accounts)


def activity_and_sentiment_for_accounts(accounts: List[str], tm: TweetsManager):
    num_accounts = len(accounts)
    num_tweets, num_retweets, num_replies = 0, 0, 0
    sentiment = 0.0
    pronoun_counts = PronounCounts()
    emo_scores = EmoScores()
    meme_counter = Counter()
    if num_accounts > 0:
        for account_id in accounts:
            ua = tm.user_activity[account_id]
            num_tweets += ua.tweet_count
            num_retweets += ua.retweet_count
            num_replies += ua.reply_count
            sentiment += ua.sentiment_summary.sentiment
            pronoun_counts += ua.sentiment_summary.pronoun_counts
            emo_scores += ua.sentiment_summary.emo_scores
            for meme, count in ua.meme_counter.items():
                meme_counter[meme] += count
        num_tweets /= num_accounts
        num_retweets /= num_accounts
        num_replies /= num_accounts
        sentiment /= num_accounts
        pronoun_counts /= num_accounts
        emo_scores /= num_accounts
    sentiment_analysis = SentimentAnalysis(pronoun_counts, emo_scores, sentiment)
    return num_accounts, num_tweets, num_retweets, num_replies, sentiment_analysis, meme_counter    


def publish_experience_analysis(stance: str, community_ids: List[int], tm: TweetsManager):
    if len(community_ids) == 0:
        return
    experienced_accounts = []
    noob_accounts = []
    for community_id in community_ids:
        for user_id in tm.community_user_map[community_id]:
            if user_id in global_experienced_accounts:
                experienced_accounts.append(user_id)
            else:
                noob_accounts.append(user_id)
    num_noob, noob_tweets, noob_retweets, noob_replies, noob_sa, noob_memes = \
        activity_and_sentiment_for_accounts(noob_accounts, tm)
    num_exp, exp_tweets, exp_retweets, exp_replies, exp_sa, exp_memes = \
        activity_and_sentiment_for_accounts(experienced_accounts, tm)
    subs = {
        "stance": stance,
        "num_noob": num_noob,
        "num_experienced": num_exp,
        "noob_tweets": round(noob_tweets, 3),
        "experienced_tweets": round(exp_tweets, 3),
        "noob_retweets": round(noob_retweets, 3),
        "experienced_retweets": round(exp_retweets, 3),
        "noob_replies": round(noob_replies, 3),
        "experienced_replies": round(exp_replies, 3),
        "noob_sentiment": round(noob_sa.sentiment, 3),
        "experienced_sentiment": round(exp_sa.sentiment, 3),
        "emo_list": printable_emo_scores_columns(left=noob_sa.emo_scores, right=exp_sa.emo_scores),
        "pronoun_columns": printable_pronoun_usage_columns(left=noob_sa.pronoun_counts, right=exp_sa.pronoun_counts),
        "noob_hashtag_list": printable_top_hashtag_list(noob_memes),
        "experienced_hashtag_list": printable_top_hashtag_list(exp_memes),
    }
    experience_summary = stance_and_previous_activity_template.safe_substitute(subs)
    report_name = f"{stance}_experience_analysis.md"
    report_path = path.join(report_dir, report_name)
    with open(report_path, 'w', encoding="utf-8") as f:
        f.write(experience_summary)

publish_experience_analysis("BLM", blm_comm_ids, tm)
publish_experience_analysis("CounterProtest", counter_comm_ids, tm)

In [25]:
# overview report
overview_template = Template('''
## OVERVIEW of PERIOD $start_date to $end_date

| What  | How Many |
|:-------|--------:|
| Tweets | $num_tweets |
| Retweets | $num_retweets |  
| Communities | $num_communities |  
| Accounts | $num_accounts |
| Size of largest community | $largest_comm_size |

''')
total_tweets = sum(ua.tweet_count for ua in tm.user_activity.values())
total_retweets = sum(ua.retweet_count for ua in tm.user_activity.values())
num_communities = len(tm.community_user_map)
num_accounts = len(tm.user_community_map)
largest_comm_size = len(comm_user_counts[0][1])

subs = {
    'start_date': start_date,
    'end_date': end_date,
    'num_tweets': total_tweets,
    'num_retweets': total_retweets,
    'num_communities': num_communities,
    'num_accounts': num_accounts,
    'largest_comm_size': largest_comm_size,
}
overview_report_name = "OverviewReport.md"
overview_path = path.join(report_dir, overview_report_name)
overview = overview_template.safe_substitute(subs)
with open(overview_path, 'w', encoding="utf-8") as f:
    f.write(overview)
