In [1]:
import pandas as pd
import numpy as np
import networkx as nx

import spacy
from spacy import displacy

import matplotlib.pyplot as plt


In [2]:
rfaSet = 'Data/wiki_RfA_2010_2013.csv'

In [5]:
df = pd.read_csv(rfaSet)  

df

Unnamed: 0,SRC,TGT,VOT,RES,YEA,DAT,TXT
0,Steel1943,BDD,1,1,2013,"23:13, 19 April 2013",'''Support''' as co-nom.
1,Cuchullain,BDD,1,1,2013,"01:04, 20 April 2013",'''Support''' as nominator.--
2,INeverCry,BDD,1,1,2013,"23:43, 19 April 2013",'''Support''' per noms.
3,Cncmaster,BDD,1,1,2013,"00:11, 20 April 2013",'''Support''' per noms. BDD is a strong contri...
4,Miniapolis,BDD,1,1,2013,"00:56, 20 April 2013","'''Support''', with great pleasure. I work wit..."
...,...,...,...,...,...,...,...
32567,Atama,ZooPro,-1,-1,2010,"18:17, 22 February 2010","'''Oppose''' - Per Polargeo, and per [http://e..."
32568,Bradjamesbrown,ZooPro,-1,-1,2010,"18:18, 22 February 2010",'''Oppose''' per SilkTork's diff above. Assert...
32569,Ottawa4ever,ZooPro,0,-1,2010,"18:11, 22 February 2010","'''Neutral''' Not to pile on, neutral. I canno..."
32570,Tryptofish,ZooPro,0,-1,2010,"17:58, 22 February 2010",'''Neutral''' I've interacted with this editor...


In [48]:

# Assuming df is the DataFrame that you've already filtered and processed
# Create a signed graph
def create_signed_graph(df):
    G = nx.DiGraph()  # Directed graph, as SRC votes on TGT
    
    # Iterate through the rows of the DataFrame to add edges and nodes
    for _, row in df.iterrows():
        src = row['SRC']
        tgt = row['TGT']
        vot = row['VOT']
        txt = row['TXT']  # Assuming there's a TXT column in the dataframe
        res = row['RES']  # Assuming there's a RES column in the dataframe
        
        # Add nodes with attributes: define whether the node is admin or nonAdmin
        if src not in G:
            G.add_node(src, status="admin" if res == 1 else "nonAdmin")  # If RES is 1, it's an admin
        if tgt not in G:
            G.add_node(tgt, status="admin" if res == 1 else "nonAdmin")  # If RES is 1, it's an admin

        # Add edge for VOT of 1, -1, or 0
        if vot == 1:
            G.add_edge(src, tgt, weight=1, txt=txt)  # Support for admin
        elif vot == -1:
            G.add_edge(src, tgt, weight=-1, txt=txt)  # Oppose admin
        elif vot == 0:
            G.add_edge(src, tgt, weight=0, txt=txt)  # Neutral/No vote edge
    
    return G


# Visualize the signed graph
def visualize_graph(G):
    # Drawing the graph
    pos = nx.spring_layout(G)  # Positioning for better visualization
    plt.figure(figsize=(12, 12))
    
    # Draw nodes, edges, and labels
    nx.draw(G, pos, with_labels=True, node_color='skyblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray')
    
    # Draw edge labels for the weights
    edge_labels = nx.get_edge_attributes(G, 'weight')
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
    
    plt.title("Signed Graph of SRC -> TGT Votes")
    plt.show()



# Create the signed graph
G = create_signed_graph(df)

In [49]:
G.number_of_edges()

31876

### GML Format to Gephi

In [50]:
nx.write_gml(G, 'cleaned_graph.gml') 

### Graph Characteristics


In [51]:
G = create_signed_graph(df)

number of nodes

In [52]:
G.number_of_nodes()

2986

### Number of Admins and NonAdmins

In [53]:
# Count the number of nodes with 'status' as 'admin'
admin_count = sum(1 for node, data in G.nodes(data=True) if data['status'] == 'admin')

print(f"Number of admin nodes: {admin_count}")


Number of admin nodes: 2238


In [54]:
# Count the number of nodes with 'status' as 'nonAdmin'
non_admin_count = sum(1 for node, data in G.nodes(data=True) if data['status'] == 'nonAdmin')

print(f"Number of nonAdmin nodes: {non_admin_count}")



Number of nonAdmin nodes: 748


### Number of Edges

In [55]:
G.number_of_edges()

31876