# Community Detection

In [2]:
%load_ext autoreload
%autoreload 2

import json
import os
import sys
sys.path.insert(0, '/Users/yu-hung/Downloads/pheme-rumour-scheme-dataset')
import header as head

annotations_path = "/Users/yu-hung/Downloads/pheme-rumour-scheme-dataset/annotations"
community_path = "/Users/yu-hung/Downloads/pheme-rumour-scheme-dataset/community_detection"

We separate all the users into rumour and non-rumour communities based on keys of `support` and `certainty`. For the sake of this analysis, source tweets and reply tweets expresses same level value of user opinion. The categorization of a user depends on the number of tweets that support or deny a rumour. If there are same number of rumour and non-rumour tweets, we compare the number of certain tweets between rumour and non-rumour tweets. 

### **source tweets**  
We only consider rumour threads with "suporting" and "denying" values for their support key. In addition, their certainty as to be "certain" or "somewhat-certain". 

### **replies**  
The classification of Direct replies and Deep replies will be based on their `responsetype-vs-source` in comparison to its source tweet. Similar to source tweet classification, only certainties of "certain" and "somewhat-certain" will be considered. 

In [None]:
# Community Detection

with open("/Users/yu-hung/Downloads/pheme-rumour-scheme-dataset/annotations/tweetId_annotations.json") as f:
    annots = json.load(f)

with open("/Users/yu-hung/Downloads/pheme-rumour-scheme-dataset/Id-conversions/tweet_to_user.json") as f:
    tweet_to_user = json.load(f)

sources = annots["Source_Tweets"]
replies = annots["Reply_Tweets"]
dic = {}

for s in sources:
    tweet = sources[s]
    
    try:
        user = dic[tweet_to_user[s]]
    except:
        dic[tweet_to_user[s]] = {
            "supporting" : {
                "certain" : [],
                "somewhat-certain" : []
            },
            "denying" : {
                "certain" : [],
                "somewhat-certain" : []
            }
        }
        user = dic[tweet_to_user[s]]

    if tweet["support"] == "supporting":
        user = user["supporting"]
    elif tweet["support"] == "denying":
        user = user["denying"]
    else:
        print("source: "+s)
        
    if tweet["certainty"] == "certain":
        user["certain"].append(s)
    elif tweet["certainty"] == "somewhat-certain":
        user["somewhat-certain"].append(s)

for r in replies:
    tweet = replies[r]

    # Check wheather the user responded to a valid source
    try:
        src_tweet = sources[tweet["threadid"]]
        src_support = src_tweet["support"]
    except:
        continue

    try:
        user = dic[tweet_to_user[r]]
    except:
        dic[tweet_to_user[r]] = {
            "supporting" : {
                "certain" : [],
                "somewhat-certain" : []
            },
            "denying" : {
                "certain" : [],
                "somewhat-certain" : []
            }
        }
        user = dic[tweet_to_user[r]]

    user_support = tweet["responsetype-vs-source"]
    
    if src_support == "supporting":
        if user_support == "agreed":
            user = user["supporting"]
        elif user_support == "disagreed":
            user = user["denying"]

    elif src_support == "denying":
        if user_support == "agreed":
            user = user["denying"]
        elif user_support == "disagreed":
            user = user["supporting"]
    
    else:
        print("rely: "+r)
    
    if tweet["certainty"] == "certain":
        user["certain"].append(s)
    elif tweet["certainty"] == "somewhat-certain":
        user["somewhat-certain"].append(s)
    else:
        print(r+" "+tweet["certainty"])
    


    
head.writeToJSON(annotations_path,"userId_annotations",dic)

In [5]:
# categorize all users into rumours and non-rumours
with open("/Users/yu-hung/Downloads/pheme-rumour-scheme-dataset/annotations/userId_annotations.json") as f:
    users = json.load(f)

communities = {
    "rumours" : [],
    "non_rumours" : [],
    "uncategorized" : []
}
rumours = communities["rumours"]
non_rumours = communities["non_rumours"]
uncategorized = communities["uncategorized"]

for u in users:
    user = users[u]
    deny = user["denying"]
    support = user["supporting"]
    deny_size = len(deny["certain"]) + len(deny["somewhat-certain"])
    support_size = len(support["certain"]) + len(support["somewhat-certain"])
    if deny_size > support_size : non_rumours.append(u)
    elif deny_size < support_size : rumours.append(u)
    else:
        if len(deny["certain"]) > len(support["certain"]): non_rumours.append(u)
        elif len(deny["certain"]) < len(support["certain"]): rumours.append(u)
        else:
            if len(deny["somewhat-certain"]) > len(support["somewhat-certain"]) : non_rumours.append(u)
            elif len(deny["somewhat-certain"]) < len(support["somewhat-certain"]): rumours.append(u)
            else : uncategorized.append(u)
            
head.writeToJSON(community_path,"communities",communities)

Identify users who are bridges

In [6]:
# A bridge has followers from both communities
with open("/Users/yu-hung/Downloads/pheme-rumour-scheme-dataset/bridges/user_followed_by.json") as f:
    user_followed_by = json.load(f)

two_com_bridges = {}
for id in user_followed_by:
   user = user_followed_by[id]
   if len(user["rumours"]) > 0 and len(user["non_rumours"]) > 0:
       two_com_bridges[id] = user_followed_by[id]

head.writeToJSON(community_path,"bridges",two_com_bridges)

Differentiate bridges from other communities

In [10]:
with open("/Users/yu-hung/Downloads/pheme-rumour-scheme-dataset/bridges/type 1/type1.json") as f:
    bi_commun_bridges = json.load(f)
    bi_commun_bridges = bi_commun_bridges.keys()

with open("/Users/yu-hung/Downloads/pheme-rumour-scheme-dataset/community_detection/communities.json") as f:
    communities = json.load(f)

new_communities = {
    "rumours":[],
    "non_rumours":[],
    "uncategorized":[],
    "bridges":[]
}

bridges = new_communities["bridges"]
rumours = new_communities["rumours"]
non_rumours = new_communities["non_rumours"]
uncategorized = new_communities["uncategorized"]

for id in communities["rumours"]:
    if id in bi_commun_bridges : bridges.append(id)
    else : rumours.append(id)
        
for id in communities["non_rumours"]:
    if id in bi_commun_bridges : bridges.append(id)
    else: non_rumours.append(id)

for id in communities["uncategorized"]:
    if id in bi_commun_bridges : bridges.append(id)
    else: uncategorized.append(id)

head.writeToJSON(community_path,"with_bridges",new_communities)

### Follow+React

Extract follow+react type1 bridges from the community

In [3]:
with open("/Users/yu-hung/Downloads/pheme-rumour-scheme-dataset/community_detection/follow/with_bridges.json") as file:
    follow_community = json.load(file)

with open("/Users/yu-hung/Downloads/pheme-rumour-scheme-dataset/bridges/react/type1.json") as file:
    react_type1 = json.load(file)
    react_type1 = react_type1.keys()

for key, values in follow_community.items():
    if key == "bridges":
        follow_community[key] = list(set(values).union(set(react_type1)))
        continue 
    
    follow_community[key] = list(set(values).difference(set(react_type1)))

head.writeToJSON(head.makePath([community_path,"follow+react"]),"communities",follow_community)