In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms import bipartite

In [2]:
data = pd.read_csv("edges.csv")
data

Unnamed: 0,hero,comic
0,24-HOUR MAN/EMMANUEL,AA2 35
1,3-D MAN/CHARLES CHAN,AVF 4
2,3-D MAN/CHARLES CHAN,AVF 5
3,3-D MAN/CHARLES CHAN,COC 1
4,3-D MAN/CHARLES CHAN,H2 251
...,...,...
96099,ZZZAX,H2 326
96100,ZZZAX,H2 327
96101,ZZZAX,M/CP 8/4
96102,ZZZAX,PM 47


In [3]:
filter1 = data["comic"].str.startswith(("A ", "A2", "A3"))
data[filter1].drop_duplicates(subset = "comic")

Unnamed: 0,hero,comic
84,ABSORBING MAN/CARL C,A 183
85,ABSORBING MAN/CARL C,A 184
86,ABSORBING MAN/CARL C,A 20/4
87,ABSORBING MAN/CARL C,A 270
88,ABSORBING MAN/CARL C,A 273
...,...,...
61178,PHARAOH RAMA-TUT,A 21/4
71356,SET,A 18/4
78065,"STANKOWICZ, FABIAN",A 321/2
88719,VISION,A 20/3


In [4]:
layers = []
for i in ["A ", "A2", "A3"]:
    filters = data["comic"].str.startswith(i)
    layers.append(data[filters])

In [5]:
layers[0]

Unnamed: 0,hero,comic
84,ABSORBING MAN/CARL C,A 183
85,ABSORBING MAN/CARL C,A 184
86,ABSORBING MAN/CARL C,A 20/4
87,ABSORBING MAN/CARL C,A 270
88,ABSORBING MAN/CARL C,A 273
...,...,...
95975,ZIRCONIUM,A 188
95983,"ZOLA, ARNIM",A 13
96038,"ZOTA, CARLO",A 12
96039,"ZOTA, CARLO",A 262


In [6]:
layers[0]['issue'] = layers[0]['comic'].str[2:]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  layers[0]['issue'] = layers[0]['comic'].str[2:]


In [7]:
issues1 = layers[0][~layers[0]['issue'].str.startswith("'")]
issues1

Unnamed: 0,hero,comic,issue
84,ABSORBING MAN/CARL C,A 183,183
85,ABSORBING MAN/CARL C,A 184,184
86,ABSORBING MAN/CARL C,A 20/4,20/4
87,ABSORBING MAN/CARL C,A 270,270
88,ABSORBING MAN/CARL C,A 273,273
...,...,...,...
95975,ZIRCONIUM,A 188,188
95983,"ZOLA, ARNIM",A 13,13
96038,"ZOTA, CARLO",A 12,12
96039,"ZOTA, CARLO",A 262,262


In [8]:
for i in range(len(issues1)):
    issues1.iloc[i, 2] = issues1.iloc[i, 2].split("/",1)[0]
    issues1.iloc[i, 2] = issues1.iloc[i, 2].split("-",1)[0]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  issues1.iloc[i, 2] = issues1.iloc[i, 2].split("/",1)[0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  issues1.iloc[i, 2] = issues1.iloc[i, 2].split("-",1)[0]


In [9]:
silver = issues1[issues1['issue'].astype(float) <= 82]
silver = silver[["hero", "comic"]]
silver.drop_duplicates(subset = "hero")

Unnamed: 0,hero,comic
86,ABSORBING MAN/CARL C,A 20/4
207,"ADAMS, NICOLE NIKKI",A 77
1003,ANGEL/WARREN KENNETH,A 3
1525,ANT-MAN/DR. HENRY J.,A 1
2208,APE MAN/GORDON MONK,A 12
...,...,...
95795,"ZEDRAO, GENERAL",A 14
95824,ZEMO/BARON HEINRICH,A 10
95867,ZEUS,A 23
95983,"ZOLA, ARNIM",A 13


In [10]:
bronze = issues1[(issues1['issue'].astype(float) > 82) & (issues1['issue'].astype(float) <= 242)]
bronze = bronze[["hero", "comic"]]
bronze.drop_duplicates(subset = "hero")

Unnamed: 0,hero,comic
84,ABSORBING MAN/CARL C,A 183
675,AMERICAN EAGLE II/JA,A 85
696,AMPHIBIAN/KINGLEY RI,A 148
709,AMPHIBIUS,A 105
960,ANELLE,A 94
...,...,...
95298,YELLOW CLAW/TZING JA,A 204
95481,ZABU,A 118
95825,ZEMO/BARON HEINRICH,A 131
95865,ZEUS,A 100


In [11]:
modern = issues1[issues1['issue'].astype(float) > 242]
modern = modern[["hero", "comic"]]
modern = pd.concat([modern, layers[1]])
modern.drop_duplicates(subset = "hero")

Unnamed: 0,hero,comic
87,ABSORBING MAN/CARL C,A 270
246,ADORA,A 260
363,AGINAR,A 248
430,AJAK/TECUMOTZIN [ETE,A 248
668,AMENHOTEP,A 353
...,...,...
95868,ZEUS,A 281
95958,ZIRAN,A 339
96039,"ZOTA, CARLO",A 262
20106,"DE LA SALLE, BEATRIC",A2 3


In [12]:
heroes = pd.concat([layers[2], layers[0][layers[0]['issue'].str.startswith("'")]])
heroes.drop_duplicates(subset = "hero")

Unnamed: 0,hero,comic,issue
1626,ANT-MAN/DR. HENRY J.,A3 1,
2096,ANT-MAN II/SCOTT HAR,A3 26,
2564,ARCANNA/ARCANNA JONE,A3 5,
2750,ARKON,A3 17,
3088,ATTUMA,A3 42,
...,...,...,...
87935,VAKUME,A '00,'00
88419,VERTIGO,A '00,'00
89783,"WALKER, MICKEY",A '00,'00
93030,"WOLFE, HEDY",A '00,'00


In [13]:
# The variables with dataframes are silver, bronze, modern, and heroes

In [14]:
B = nx.Graph()
B.add_nodes_from(silver['hero'].drop_duplicates(), bipartite=0)
B.add_nodes_from(silver['comic'].drop_duplicates(), bipartite=1)
B.add_edges_from([(row['hero'], row['comic']) for idx, row in silver.iterrows()])
Gsilver = bipartite.projected_graph(B, silver['hero'])
Gsilver_weighted = bipartite.weighted_projected_graph(B, silver['hero'].drop_duplicates())

In [15]:
# pos = {node:[0, i] for i,node in enumerate(silver['hero'])}
# pos.update({node:[1, i] for i,node in enumerate(silver['comic'])})
# nx.draw(B, pos, with_labels=False)
# for p in pos:  # raise text positions
#     pos[p][1] += 0.25
# nx.draw_networkx_labels(B, pos)

# plt.show()

In [16]:
# Gsilver_weighted.edges.data()

In [17]:
B = nx.Graph()
B.add_nodes_from(bronze['hero'].drop_duplicates(), bipartite=0)
B.add_nodes_from(bronze['comic'].drop_duplicates(), bipartite=1)
B.add_edges_from([(row['hero'], row['comic']) for idx, row in bronze.iterrows()])
Gbronze = bipartite.projected_graph(B, bronze['hero'])
Gbronze_weighted = bipartite.weighted_projected_graph(B, bronze['hero'].drop_duplicates())

In [18]:
B = nx.Graph()
B.add_nodes_from(modern['hero'].drop_duplicates(), bipartite=0)
B.add_nodes_from(modern['comic'].drop_duplicates(), bipartite=1)
B.add_edges_from([(row['hero'], row['comic']) for idx, row in modern.iterrows()])
Gmodern = bipartite.projected_graph(B, modern['hero'])
Gmodern_weighted = bipartite.weighted_projected_graph(B, modern['hero'].drop_duplicates())

In [19]:
B = nx.Graph()
B.add_nodes_from(heroes['hero'].drop_duplicates(), bipartite=0)
B.add_nodes_from(heroes['comic'].drop_duplicates(), bipartite=1)
B.add_edges_from([(row['hero'], row['comic']) for idx, row in heroes.iterrows()])
Gheroes = bipartite.projected_graph(B, heroes['hero'])
Gheroes_weighted = bipartite.weighted_projected_graph(B, heroes['hero'].drop_duplicates())

### The graphs (nodes = heros, edge = they appear in same comic) are named Gsilver, Gbronze, Gmodern, and Gheroes.
### The weighted ones have weight = number of times appear in same comic, and the graphs are named with _weighted.

## Remove nodes that low degrees and appear 

In [20]:
for node in Gsilver.nodes():
    print(Gsilver.degree[node] > 3)
    print(Gsilver.has_node(node))
    print(Gsilver.has_node(node))
    break

False
True
True


In [21]:
def remove_nodes(G, low_deg, low_layer, weight, *args):
    ''' remove nodes from a networkx graph G, that have too low a degree (lower than or equal to low_deg) and
    exist in too few layers (lower than or equal to low_layer). 
    The other layers (of form of networkx graphs as well) are given in args.
    For weighted graph, can use weight = 'weight'
    '''
    need_remove = []
    for node in G.nodes():
        layers = 1
        for L in args:
            if (L.has_node(node) == True):
                layers = layers + 1
        if ((G.degree[node] <= low_deg) & (layers <= low_layer)):
            need_remove.append(node)
    print(need_remove)
    G.remove_nodes_from(need_remove)

In [22]:
remove_nodes(Gsilver, 10, 3, None, Gbronze, Gmodern, Gheroes)
remove_nodes(Gbronze, 10, 3, None, Gsilver, Gmodern, Gheroes)
remove_nodes(Gmodern, 10, 3, None, Gsilver, Gbronze, Gheroes)
remove_nodes(Gheroes, 10, 3, None, Gsilver, Gbronze, Gmodern)

['ABSORBING MAN/CARL C', 'ADAMS, NICOLE NIKKI', 'APOLLO [GREEK GOD]', 'BEETLE/ABNER RONALD', 'BYRD, SEN. HARRINGTO', 'CARTER, PEGGY', 'DE LA FONTAINE, CONT', 'EGGHEAD/PROF. ELIHAS', 'JAMESON, J. JONAH', 'MACLAIN, DR. MYRON', 'MERLIN', 'PENDRAGON, ARTHUR', 'PLANTMAN/SAM SMITHER', 'PUPPET MASTER/PHILLI', 'REDWING', 'SET', 'STILT-MAN/WILBUR DAY', 'SUPER-ADAPTOID/ALESS', 'SURTUR', 'TAKU', 'TULAK', 'WATER WIZARD/PETE', "W'KABI", 'WRECKER III/DIRK GAR', 'YMIR']
['ARKON', 'BEERE, JASON', 'BERSERKER', 'BLACK TALON II/SAMUE', 'BOGGS, MORDECAI P.', 'CANTOR, VERA', 'CHAMPION, IMUS', 'DAMBALLAH II', 'DRAX/ARTHUR DOUGLAS', 'HARDY, MORGAN MACNEI', 'LANG, CASSANDRA CASS', 'LETTERMAN, DAVID', 'MANIPULATOR/', 'MOLECULE MAN/OWEN RE', 'SANTINI, DR. JOSE', 'SOLARR', 'SOUTHERN, CANDY', 'TYPHON', 'VIPER II', 'YELLOW CLAW/TZING JA']
['AMPERE/', 'ARIDES', 'BARON BRIMSTONE/', 'BERDITCHEV, ITZHAK', 'BLOOD BROTHERS', 'BRAWLER', 'DEVLOR', 'DOC SAMSON/DR. LEONA', 'DOOMSDAY MAN II', 'EHMAN, HEIDI', 'GALEN KOR, ADMI

### May need to loop several times because more will have "too little degree" after removal.

In [23]:
# for i in range(10):
#     remove_nodes(Gsilver, 10, 3, Gbronze, Gmodern, Gheroes)
#     remove_nodes(Gbronze, 10, 3, Gsilver, Gmodern, Gheroes)
#     remove_nodes(Gmodern, 10, 3, Gsilver, Gbronze, Gheroes)
#     remove_nodes(Gheroes, 10, 3, Gsilver, Gbronze, Gmodern)

In [24]:
remove_nodes(Gsilver_weighted, 10, 3, 'weight', Gbronze_weighted, Gmodern_weighted, Gheroes_weighted)
remove_nodes(Gbronze_weighted, 10, 3, 'weight', Gsilver_weighted, Gmodern_weighted, Gheroes_weighted)
remove_nodes(Gmodern_weighted, 10, 3, 'weight', Gsilver_weighted, Gbronze_weighted, Gheroes_weighted)
remove_nodes(Gheroes_weighted, 10, 3, 'weight', Gsilver_weighted, Gbronze_weighted, Gmodern_weighted)

['ABSORBING MAN/CARL C', 'ADAMS, NICOLE NIKKI', 'APOLLO [GREEK GOD]', 'BEETLE/ABNER RONALD', 'BYRD, SEN. HARRINGTO', 'CARTER, PEGGY', 'DE LA FONTAINE, CONT', 'EGGHEAD/PROF. ELIHAS', 'JAMESON, J. JONAH', 'MACLAIN, DR. MYRON', 'MERLIN', 'PENDRAGON, ARTHUR', 'PLANTMAN/SAM SMITHER', 'PUPPET MASTER/PHILLI', 'REDWING', 'SET', 'STILT-MAN/WILBUR DAY', 'SUPER-ADAPTOID/ALESS', 'SURTUR', 'TAKU', 'TULAK', 'WATER WIZARD/PETE', "W'KABI", 'WRECKER III/DIRK GAR', 'YMIR']
['ARKON', 'BEERE, JASON', 'BERSERKER', 'BLACK TALON II/SAMUE', 'BOGGS, MORDECAI P.', 'CANTOR, VERA', 'CHAMPION, IMUS', 'DAMBALLAH II', 'DRAX/ARTHUR DOUGLAS', 'HARDY, MORGAN MACNEI', 'LANG, CASSANDRA CASS', 'LETTERMAN, DAVID', 'MANIPULATOR/', 'MOLECULE MAN/OWEN RE', 'SANTINI, DR. JOSE', 'SOLARR', 'SOUTHERN, CANDY', 'TYPHON', 'VIPER II', 'YELLOW CLAW/TZING JA']
['AMPERE/', 'ARIDES', 'BARON BRIMSTONE/', 'BERDITCHEV, ITZHAK', 'BLOOD BROTHERS', 'BRAWLER', 'DEVLOR', 'DOC SAMSON/DR. LEONA', 'DOOMSDAY MAN II', 'EHMAN, HEIDI', 'GALEN KOR, ADMI