In [3]:
import pandas as pd
import numpy as np
import networkx as nx

import spacy
from spacy import displacy

import matplotlib.pyplot as plt

In [4]:
df = pd.read_csv("./Data/wiki_RfA_2010_2013.csv")
df

Unnamed: 0,SRC,TGT,VOT,RES,YEA,DAT,TXT
0,Steel1943,BDD,1,1,2013,2013-04-19 23:13:00,'''Support''' as co-nom.
1,Cuchullain,BDD,1,1,2013,2013-04-20 01:04:00,'''Support''' as nominator.--
2,INeverCry,BDD,1,1,2013,2013-04-19 23:43:00,'''Support''' per noms.
3,Cncmaster,BDD,1,1,2013,2013-04-20 00:11:00,'''Support''' per noms. BDD is a strong contri...
4,Miniapolis,BDD,1,1,2013,2013-04-20 00:56:00,"'''Support''', with great pleasure. I work wit..."
...,...,...,...,...,...,...,...
32305,Atama,ZooPro,-1,-1,2010,2010-02-22 18:17:00,"'''Oppose''' - Per Polargeo, and per [http://e..."
32306,Bradjamesbrown,ZooPro,-1,-1,2010,2010-02-22 18:18:00,'''Oppose''' per SilkTork's diff above. Assert...
32307,Ottawa4ever,ZooPro,0,-1,2010,2010-02-22 18:11:00,"'''Neutral''' Not to pile on, neutral. I canno..."
32308,Tryptofish,ZooPro,0,-1,2010,2010-02-22 17:58:00,'''Neutral''' I've interacted with this editor...


In [6]:

# Assuming df is the DataFrame that you've already filtered and processed
# Create a signed graph
def create_signed_graph(df):
    """
    Create a MultiDiGraph from a DataFrame with edges labeled by interaction type.

    Parameters:
        df (pd.DataFrame): DataFrame containing columns for edges and attributes.

    Returns:
        G (nx.MultiDiGraph): The created graph.
    """
    G = nx.MultiDiGraph()  # Use MultiDiGraph to allow multiple edges between nodes

    # Iterate through the rows of the DataFrame to add edges and nodes
    for _, row in df.iterrows():
        src = row['SRC']  # Source node
        tgt = row['TGT']  # Target node
        vot = row['VOT']  # Label: -1, 1, or 0
        txt = row['TXT']  # Additional textual information
        res = row['RES']  # Admin status: 1 for admin, 0 for nonAdmin
        dat = row['DAT']  # Date or timestamp attribute

        # Add nodes if they don't already exist
        if src not in G:
            G.add_node(src)
        if tgt not in G:
            G.add_node(tgt)

        # Determine admin status for the edge
        admin_status = "admin" if res == 1 else "nonAdmin"

        # Add edge with attributes, storing `VOT` as a label, not a weight
        edge_attrs = {
            'label': vot,   # Interaction type (-1, 0, 1)
            'txt': txt,     # Additional text data
            'admin': admin_status,  # Whether the interaction involves an admin
            'DAT': dat      # Timestamp or date
        }
        G.add_edge(src, tgt, **edge_attrs)  # Add the edge with all attributes
    
    return G

# Create the signed graph
G = create_signed_graph(df)

### Graph Characteristics

##### Number of Nodes

In [8]:
G.number_of_nodes()

2968

##### Number of Edges

In [10]:
G.number_of_edges()

32310

#### Max Degree - TOP 20

In [12]:
# Assuming G is your directed graph

# Get the in-degree and out-degree of all nodes
in_degree_dict = dict(G.in_degree())
out_degree_dict = dict(G.out_degree())

# Combine in-degree and out-degree for each node
degree_info = {
    node: {"in_degree": in_degree_dict.get(node, 0), "out_degree": out_degree_dict.get(node, 0)}
    for node in G.nodes()
}

# Sort nodes by in-degree and out-degree
top_20_in_degree = sorted(degree_info.items(), key=lambda x: x[1]["in_degree"], reverse=True)[:20]
top_20_out_degree = sorted(degree_info.items(), key=lambda x: x[1]["out_degree"], reverse=True)[:20]

# Print the top 20 nodes with in-degree and out-degree
print("Top 20 nodes by in-degree:")
for node, degrees in top_20_in_degree:
    print(f"User: {node}, In-degree: {degrees['in_degree']}")

print("\nTop 20 nodes by out-degree:")
for node, degrees in top_20_out_degree:
    print(f"User: {node}, Out-degree: {degrees['out_degree']}")


Top 20 nodes by in-degree:
User: HJ Mitchell, In-degree: 341
User: Connormah, In-degree: 326
User: Ironholds, In-degree: 294
User: Lord Roem, In-degree: 272
User: The Thing That Should Not Be, In-degree: 265
User: Everyking, In-degree: 239
User: SarekOfVulcan, In-degree: 237
User: DeltaQuad, In-degree: 227
User: GiantSnowman, In-degree: 220
User: SarahStierch, In-degree: 218
User: Σ, In-degree: 217
User: Drmies, In-degree: 208
User: Richwales, In-degree: 205
User: My76Strat, In-degree: 201
User: MZMcBride, In-degree: 199
User: Slon02, In-degree: 196
User: Theopolisme, In-degree: 189
User: Dabomb87, In-degree: 189
User: 28bytes, In-degree: 188
User: Secret, In-degree: 187

Top 20 nodes by out-degree:
User: Boing! said Zebedee, Out-degree: 274
User: Fetchcomms, Out-degree: 253
User: Fastily, Out-degree: 251
User: Ktr101, Out-degree: 249
User: Axl, Out-degree: 211
User: RP459, Out-degree: 198
User: Kudpung, Out-degree: 197
User: Minimac, Out-degree: 195
User: Mkativerata, Out-degree: 191


In [15]:
# Initialize counters
count_1 = 0
count_neg1 = 0
count_0 = 0

# Loop through the edges of the graph
for u, v, data in G.edges(data=True):
    vot = data.get('label', 0)  # Assuming 'weight' represents the VOT attribute
    
    # Count the VOT values
    if vot == 1:
        count_1 += 1
    elif vot == -1:
        count_neg1 += 1
    elif vot == 0:
        count_0 += 1

# Print the counts
print(f"Edges with VOT = 1: {count_1}")
print(f"Edges with VOT = -1: {count_neg1}")
print(f"Edges with VOT = 0: {count_0}")


Edges with VOT = 1: 23035
Edges with VOT = -1: 7217
Edges with VOT = 0: 2058


In [16]:
# Initialize dictionaries to count the occurrences of VOT values for each SRC
src_0_count = {}
src_1_count = {}
src_neg1_count = {}

# Iterate through the edges of the graph
for u, v, data in G.edges(data=True):
    vot = data.get('label', 0)  # Assuming 'label' represents the VOT attribute
    
    # Count the occurrences based on VOT values
    if vot == 0:
        if u in src_0_count:
            src_0_count[u] += 1
        else:
            src_0_count[u] = 1
    elif vot == 1:
        if u in src_1_count:
            src_1_count[u] += 1
        else:
            src_1_count[u] = 1
    elif vot == -1:
        if u in src_neg1_count:
            src_neg1_count[u] += 1
        else:
            src_neg1_count[u] = 1

# Sort the dictionaries by the count in descending order and get the top nodes
top_0_src = sorted(src_0_count.items(), key=lambda x: x[1], reverse=True)[:10]
top_1_src = sorted(src_1_count.items(), key=lambda x: x[1], reverse=True)[:10]
top_neg1_src = sorted(src_neg1_count.items(), key=lambda x: x[1], reverse=True)[:10]

# Print the top nodes with the most VOT as SRC for 0, 1, and -1
print("Top 10 nodes with the most 0 VOT as SRC:")
for src, count in top_0_src:
    print(f"{src}: {count}")

print("\nTop 10 nodes with the most 1 VOT as SRC:")
for src, count in top_1_src:
    print(f"{src}: {count}")

print("\nTop 10 nodes with the most -1 VOT as SRC:")
for src, count in top_neg1_src:
    print(f"{src}: {count}")


Top 10 nodes with the most 0 VOT as SRC:
Fetchcomms: 32
Axl: 31
Kudpung: 29
Phantomsteve: 29
Boing! said Zebedee: 28
NSD: 25
RP459: 24
Ottawa4ever: 24
Doc Quintana: 23
Graeme Bartlett: 21

Top 10 nodes with the most 1 VOT as SRC:
Ktr101: 213
Boing! said Zebedee: 179
Newyorkbrad: 146
RP459: 146
Fetchcomms: 137
Tryptofish: 135
Wizardman: 130
Ret.Prof: 130
Secret: 129
Airplaneman: 129

Top 10 nodes with the most -1 VOT as SRC:
Fastily: 123
Townlake: 85
Fetchcomms: 84
Salvio giuliano: 74
Cirt: 74
Malleus Fatuorum: 73
Boing! said Zebedee: 67
Kudpung: 64
Colonel Warden: 60
Minimac: 59


In [17]:
# Initialize dictionaries to store the count of votes per node
src_0_count = {}
src_1_count = {}
src_neg1_count = {}
src_total_count = {}

# Iterate through the edges of the graph
for u, v, data in G.edges(data=True):
    vot = data.get('label', 0)  # Assuming 'weight' represents the VOT attribute
    
    # Track the total number of votes for each node (SRC)
    if u in src_total_count:
        src_total_count[u] += 1
    else:
        src_total_count[u] = 1
    
    # Count the occurrences based on VOT values
    if vot == 0:
        src_0_count[u] = src_0_count.get(u, 0) + 1
    elif vot == 1:
        src_1_count[u] = src_1_count.get(u, 0) + 1
    elif vot == -1:
        src_neg1_count[u] = src_neg1_count.get(u, 0) + 1

# Sort the nodes by total votes and select the top 100
top_100_voters = sorted(src_total_count.items(), key=lambda x: x[1], reverse=True)[:100]

# Now calculate the proportion of each type of vote for the top 100 voters
vote_proportions = []

for node, total_votes in top_100_voters:
    prop_0 = src_0_count.get(node, 0) / total_votes
    prop_1 = src_1_count.get(node, 0) / total_votes
    prop_neg1 = src_neg1_count.get(node, 0) / total_votes
    
    vote_proportions.append({
        'node': node,
        'total_votes': total_votes,
        '0': prop_0,
        '1': prop_1,
        '-1': prop_neg1
    })

# Rank voters within the top 100 by their proportion of votes of each type
ranked_0 = sorted(vote_proportions, key=lambda x: x['0'], reverse=True)[:10]
ranked_1 = sorted(vote_proportions, key=lambda x: x['1'], reverse=True)[:10]
ranked_neg1 = sorted(vote_proportions, key=lambda x: x['-1'], reverse=True)[:10]

# Output the results
print("Top 10 voters by proportion of '0' votes:")
for voter in ranked_0:
    print(f"Node {voter['node']}: {voter['total_votes']} total votes, {voter['0']*100:.2f}% were '0'")

print("\nTop 10 voters by proportion of '1' votes:")
for voter in ranked_1:
    print(f"Node {voter['node']}: {voter['total_votes']} total votes, {voter['1']*100:.2f}% were '1'")

print("\nTop 10 voters by proportion of '-1' votes:")
for voter in ranked_neg1:
    print(f"Node {voter['node']}: {voter['total_votes']} total votes, {voter['-1']*100:.2f}% were '-1'")


Top 10 voters by proportion of '0' votes:
Node WFCforLife: 75 total votes, 28.00% were '0'
Node Phantomsteve: 115 total votes, 25.22% were '0'
Node Doc Quintana: 93 total votes, 24.73% were '0'
Node Graeme Bartlett: 87 total votes, 24.14% were '0'
Node Dennis Brown: 80 total votes, 22.50% were '0'
Node Ottawa4ever: 117 total votes, 20.51% were '0'
Node Begoon: 80 total votes, 20.00% were '0'
Node NSD: 139 total votes, 17.99% were '0'
Node N5iln: 79 total votes, 17.72% were '0'
Node Worm That Turned: 78 total votes, 16.67% were '0'

Top 10 voters by proportion of '1' votes:
Node Newyorkbrad: 146 total votes, 100.00% were '1'
Node Rogerd: 86 total votes, 100.00% were '1'
Node FeydHuxtable: 100 total votes, 99.00% were '1'
Node BuickCenturyDriver: 87 total votes, 96.55% were '1'
Node Orphan Wiki: 74 total votes, 95.95% were '1'
Node The Blade of the Northern Lights: 71 total votes, 95.77% were '1'
Node Pharaoh of the Wizards: 106 total votes, 95.28% were '1'
Node Stephen: 93 total votes, 

In [20]:
# Filter rows where the target ('TGT') is "Neelix"
votes_on_neelix = df[df['TGT'].str.contains("Neelix", case=False, na=False)]

# Count the types of votes based on the 'VOT' column
vote_counts = votes_on_neelix['VOT'].value_counts()

# Display the result
print("Vote types for Neelix:")
print(vote_counts)


Vote types for Neelix:
VOT
 1    69
-1    14
 0    12
Name: count, dtype: int64


### Centrality Measures