In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import os.path as path

## Load datasets

In [3]:
# dataset
DATASET = 'uselections'  # other datasets: "brexit", "abortion"
DATASETS_PATH = path.join(path.pardir, "datasets", "balanced_exposure")
DATASET_PATH = path.join(DATASETS_PATH, DATASET)

In [4]:
# load probabilities
PROBS_PATH = path.join(DATASET_PATH, f"{DATASET}_network_heterogeneous.txt")
df = pd.read_csv(PROBS_PATH, sep='\t', names=['User', 'Follower', 'Pa', 'Pb'])

In [5]:
# load seeds
A = [l.strip() for l in open(path.join(DATASET_PATH, "side1_seeds.txt")).readlines()]  # A = pro Hillary
B = [l.strip() for l in open(path.join(DATASET_PATH, "side2_seeds.txt")).readlines()]  # B = pro Trump
len(A), len(B)

(85, 99)

In [6]:
# set labels and membership to a group
labeled = df[(df.User.isin(A) | df.User.isin(B)) & (df.Follower.isin(A) | df.Follower.isin(B))].copy()
labeled['Group1'] = labeled.User.isin(A).replace({True: 'A', False: 'B'})
labeled['Group2'] = labeled.Follower.isin(A).replace({True: 'A', False: 'B'})

In [None]:
nodes = (pd.concat([labeled[['User', 'Group1']].rename(columns={'User': 'Label', 'Group1': 'Group'}),
                    labeled[['Follower', 'Group2']].rename(columns={'Follower': 'Label', 'Group2': 'Group'})])
           .drop_duplicates()
           .reset_index(drop=True)
           .reset_index())
 
node_dict = {v: k for k, v in nodes.Label.to_dict().items()}

nodes.rename(columns={'index': 'Id'}).to_csv('nodes.csv', index=False)

In [None]:
edges = (labeled[['User', 'Follower']].rename(columns={'User': 'Target', 'Follower': 'Source'})
                                      .reset_index(drop=True))

edges.replace(node_dict).to_csv('edges.csv', index=False)