In [51]:
import pandas as pd
from itertools import combinations
from collections import Counter

In [37]:
df = pd.read_csv('../datas/interim/matched_edits_all.csv')

In [40]:
def get_user_type(row):
    if any(c.isdigit() for c in row["user"].split(".") if row["user"].count(".") == 3):
        return "Anonymous (IP)"
    else:
        return "Registered"

df["user_type"] = df.apply(get_user_type, axis=1)

In [45]:
df["is_weaponised"] = df["weaponised"].apply(lambda x: 1 if str(x).lower().strip() == "weaponised" else 0)
weaponising_ratio_df = (
    df.groupby(["user", "article"])
      .agg(
          total_edits=("is_weaponised", "count"),
          weaponised_edits=("is_weaponised", "sum")
      )
      .reset_index()
)

weaponising_ratio_df["weaponising_ratio"] = (
    weaponising_ratio_df["weaponised_edits"] / weaponising_ratio_df["total_edits"]
)

weaponising_ratio_df

Unnamed: 0,user,article,total_edits,weaponised_edits,weaponising_ratio
0,*Kat*,2004_Ukrainian_presidential_election_analysis,3,0,0.0
1,.:Ajvol:.,Alexander_II_of_Russia_analysis,1,0,0.0
2,.:Ajvol:.,Eastern_Front_(World_War_II)_analysis,1,0,0.0
3,.:Ajvol:.,History_of_Ukraine_analysis,6,0,0.0
4,.anacondabot,Communist_Party_of_the_Soviet_Union_analysis,1,0,0.0
...,...,...,...,...,...
2873,–ö–æ–∫—É—à–µ–≤ –°–µ—Ä–≥–µ–π,Abortion_in_Ukraine_analysis,1,0,0.0
2874,–°–∞–≤–µ–ª–∏–π –í –ê,History_of_Ukraine_analysis,6,0,0.0
2875,–£–ê–í—è—á–µ—Å–ª–∞–≤,Crimea_analysis,22,22,1.0
2876,–ß—Ä—ä–Ω—ã–π —á–µ–ª–æ–≤–µ–∫,Epiphanius_I_of_Ukraine_analysis,2,0,0.0


In [50]:
weaponising_ratio_df["article_id"] = weaponising_ratio_df["article"].astype("category").cat.codes
weaponising_ratio_df["user_type"] = weaponising_ratio_df.apply(get_user_type, axis=1)
df_graph = (
    weaponising_ratio_df.groupby(["user", "article_id", "user_type", "weaponising_ratio"])
      .size()
      .reset_index(name="n_edits")
)
df_graph

Unnamed: 0,user,article_id,user_type,weaponising_ratio,n_edits
0,*Kat*,0,Registered,0.0,1
1,.:Ajvol:.,5,Registered,0.0,1
2,.:Ajvol:.,23,Registered,0.0,1
3,.:Ajvol:.,39,Registered,0.0,1
4,.anacondabot,16,Registered,0.0,1
...,...,...,...,...,...
2873,–ö–æ–∫—É—à–µ–≤ –°–µ—Ä–≥–µ–π,3,Registered,0.0,1
2874,–°–∞–≤–µ–ª–∏–π –í –ê,39,Registered,0.0,1
2875,–£–ê–í—è—á–µ—Å–ª–∞–≤,17,Registered,1.0,1
2876,–ß—Ä—ä–Ω—ã–π —á–µ–ª–æ–≤–µ–∫,27,Registered,0.0,1


In [52]:
pairs = []

for article, group in user_article_df.groupby("article_id"):
    users = group["user"].unique()
    for u1, u2 in combinations(sorted(users), 2):
        pairs.append((u1, u2))

coedit_counts = Counter(pairs)

edges_df = pd.DataFrame(
    [(u1, u2, w) for (u1, u2), w in coedit_counts.items()],
    columns=["user1", "user2", "coedit_count"]
)

In [None]:
import igraph as ig

# Create graph from edge list
g = ig.Graph.DataFrame(
    coedit_df[["user1", "user2", "jaccard"]],
    directed=False
)

# Add node attributes from user metadata
for attr in ["n_edits", "user_type", "weaponising_ratio"]:
    attr_dict = df.groupby("user")[attr].first().to_dict()
    g.vs[attr] = [attr_dict.get(v["name"], None) for v in g.vs]


In [None]:
import leidenalg

partition = leidenalg.find_partition(
    g, leidenalg.ModularityVertexPartition, weights=g.es["jaccard"]
)

g.vs["cluster"] = partition.membership


In [None]:
layout = g.layout_drl()
ig.plot(
    g,
    layout=layout,
    vertex_size=[5 + 20 * v["weaponising_ratio"] for v in g.vs],
    vertex_color=[v["cluster"] for v in g.vs],
    vertex_label=None,
    edge_width=[e["jaccard"] * 5 for e in g.es]
)