In [1]:
import pandas as pd
import numpy as np
import glob as gl
import networkx as nx
import os
import sys
import joblib
from sklearn.metrics import classification_report

In [2]:
id_handle_mapping_df = pd.read_table('./data/handle_id_mapping', delimiter='\t', names=['Handle'])

  """Entry point for launching an IPython kernel.


In [3]:
id_handle_mapping_df.to_dict(orient='dict')

{'Handle': {1000000131100545029: 'wcartwrite',
  1000000831: 'saxonfawn',
  1000000884850483200: 'louiseauthenti1',
  1000000927775019011: 'abdulha65061530',
  1000000986046517248: 'lincsyngtories',
  1000001060: 'amypashby',
  1000001165260619776: 'niajacksonx',
  1000001257329889280: 'cahill62509501',
  1000001979060576256: 'neuswindon',
  1000001985817411585: 'kriahnakumar12',
  1000002127350165505: 'roymbotela9',
  1000002140: 'donnasummer8',
  1000002259: 'laurenjade995',
  1000002300: 'tomrbond',
  100000238: 'jessblaikie',
  1000002456: 'wtc_careersteam',
  1000002715999784961: 'boomersdgp4indy',
  1000002836430811136: 'spendil09409108',
  1000002906169446400: 'jamesro66722765',
  1000003020690739201: 'murshhid',
  1000003472: 'fionadempsey',
  1000004096: 'vforvargdetta',
  1000004489959563264: 'johndoe14883083',
  1000004748681072640: 'zaherxrapidz1',
  1000005028931874818: 'billcampbelltv',
  1000005419115393025: 'alexgib26210754',
  1000005677073477633: 'butsbuts100',
  1000

In [5]:
def print_progress(iteration, total, prefix='', suffix='', decimals=1, length=100):

    str_format = "{0:." + str(decimals) + "f}"
    percents = str_format.format(100 * (iteration / float(total)))
    filled_length = int(round(length * iteration / float(total)))
    bar = '*' * filled_length + '-' * (length - filled_length)

    sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),

    if iteration == total:
        sys.stdout.write('\n')
    sys.stdout.flush()

## Generate Full Follower Graph

In [129]:
path = './followers/*.txt'

files = gl.glob(path)

j = 0

g = nx.Graph()

for f in files:
    mp_df = pd.read_csv(f, names=['handle'])
    base = os.path.basename(f)
    mp = os.path.splitext((base))[0].lower()
    handle_mapping_dictionary = id_handle_mapping_df.to_dict(orient='dict')
    for v in handle_mapping_dictionary.values():
        handle_mapping_dictionary = v
    print_progress(j + 1, 579, prefix='Progress:', suffix='Complete')
    follower_list = []
    
    for ele in list(mp_df.values):
        if ele[0] not in g:
            if handle_mapping_dictionary.get(ele[0]) is not None:
                follower_list.append(handle_mapping_dictionary.get(ele[0]).lower())

    j += 1
    l = [mp]
    if l[0] not in g:                         
        node_list = l + follower_list
        tail = node_list
        tail.pop(0)

    else:
        node_list = follower_list
        tail = node_list
        
    g.add_nodes_from(node_list)
                                 
    edge_list = []

    for h in tail:
        edge_list.append((mp, h))

    g.add_edges_from(edge_list)

Progress: |████████████████████████████████████████████████████████████████████████████████████████████████████| 99.8% Complete

## Generate MP Only Follower Graph

In [None]:
mp_list_names = []

path = './followers/*.txt'

files = gl.glob(path)

for f in files:
    base = os.path.basename(f)
    mp = os.path.splitext((base))[0].lower()
    mp_list_names.append(mp)

In [None]:
path = './followers/*.txt'

files = gl.glob(path)

j = 0

g = nx.Graph()

for f in files:
    mp_df = pd.read_csv(f, names=['handle'])
    base = os.path.basename(f)
    mp = os.path.splitext((base))[0].lower()
    handle_mapping_dictionary = id_handle_mapping_df.to_dict(orient='dict')
    for v in handle_mapping_dictionary.values():
        handle_mapping_dictionary = v
    print_progress(j + 1, 579, prefix='Progress:', suffix='Complete')
    follower_list = []
    
    for ele in list(mp_df.values):
        if ele[0] not in g:
            if handle_mapping_dictionary.get(ele[0]) is not None:
                if handle_mapping_dictionary.get(ele[0]) in mp_list_names
                    follower_list.append(handle_mapping_dictionary.get(ele[0]).lower())

    j += 1
    l = [mp]
    if l[0] not in g:                         
        node_list = l + follower_list
        tail = node_list
        tail.pop(0)

    else:
        node_list = follower_list
        tail = node_list
        
    g.add_nodes_from(node_list)
                                 
    edge_list = []

    for h in tail:
        edge_list.append((mp, h))

    g.add_edges_from(edge_list)

In [100]:
labels = joblib.load('./all_new_2019_brexit_vote_labels.y')

{'label': 1, 'score': 1}

In [102]:
import random
from sklearn.metrics import classification_report
from random import randrange

random.seed(17)

for n in g.nodes:
    g.node[n]['label'] = None

for k in labels.keys():
    g.node[k]['label'] = labels.get(k)

def initialize_scores(G, init_value):
    for node_id in G.nodes():
        label = G.node[node_id]['label']
        if label == 0:
          # Labeled nodes: RED
          G.node[node_id]['score'] = 0
        elif label == 1:
          # Labeled nodes: BLUE
          G.node[node_id]['score'] = 1
        else:
          # Unlabeled nodes
          G.node[node_id]['score'] = init_value
        
def calculate_avg_score(G, node_id):
  score_sum = 0
  n_neighbors = 0
  for neighbor_id in G[node_id]:
    if G.node[neighbor_id]['score'] is not None:
        score_sum += G.node[neighbor_id]['score']
        n_neighbors += 1
  return score_sum / n_neighbors

def propagate(G):
  next_scores = {}
  for node_id in G.nodes():
    if G.node[node_id]['label'] is not None:
      # scores of labeled nodes do not change
      next_scores[node_id] = G.node[node_id]['score']
    else:
      next_scores[node_id] = calculate_avg_score(G, node_id)
  for node_id in next_scores:
    G.node[node_id]['score'] = next_scores[node_id]

test = list(labels.values())
random_index = []
nodes

seed_ = 57
count = 0
while count < seed_nodes:
    x = randrange(len(test))
    if x not in random_index:
        random_index.append(randrange(len(test)))
        count += 1


mp_handles_keys = list(labels.keys())

for i in random_index:
    g.node[mp_handles_keys[i]]['label'] = None
    g.node[mp_handles_keys[i]]['score'] = None

initialize_scores(g, 0.5)
n_steps = 6
for i in range(n_steps):
    propagate(g)

final_predicted_scores = []

for i in random_index:
    x = g.node[mp_handles_keys[i]]['score']
    if x < 0.5:
        final_predicted_scores.append(0)
    else:
        final_predicted_scores.append(1)
        
actual_scores = []
for i in random_index:
    actual_scores.append(labels.get(mp_handles_keys[i]))

print(classification_report(actual_scores, final_predicted_scores, labels=[0,1]))

              precision    recall  f1-score   support

           0       1.00      0.98      0.99        62
           1       0.98      1.00      0.99        52

    accuracy                           0.99       114
   macro avg       0.99      0.99      0.99       114
weighted avg       0.99      0.99      0.99       114



In [130]:
degrees = [(node,val) for (node, val) in g.degree()]
sorted(degrees, key=lambda x: x[1], reverse=True)

[('jeremycorbyn', 1256551),
 ('nigel_farage', 829721),
 ('theresa_may', 658149),
 ('ed_miliband', 545038),
 ('borisjohnson', 361083),
 ('davidlammy', 306628),
 ('chukaumunna', 282513),
 ('carolinelucas', 272859),
 ('yvettecoopermp', 198349),
 ('tom_watson', 181507),
 ('hackneyabbott', 176963),
 ('timfarron', 153290),
 ('johnmcdonnellmp', 147179),
 ('emilythornberry', 132893),
 ('harrietharman', 130489),
 ('vincecable', 121081),
 ('mhairiblack', 109717),
 ('jacob_rees_mogg', 106764),
 ('hilarybennmp', 100291),
 ('lucianaberger', 95020),
 ('angelarayner', 94470),
 ('michaelgove', 92592),
 ('keir_starmer', 82929),
 ('philiphammonduk', 82404),
 ('andrealeadsom', 81476),
 ('amberruddhr', 77542),
 ('jessphillips', 77435),
 ('leicesterliz', 75850),
 ('jeremy_hunt', 75258),
 ('sajidjavid', 73286),
 ('liamfox', 71578),
 ('stellacreasy', 70555),
 ('labourlewis', 68327),
 ('tulipsiddiq', 67689),
 ('owensmith_mp', 65322),
 ('anna_soubry', 63130),
 ('rachelreevesmp', 62001),
 ('angelaeagle', 61723)