# Create direct citation network

In this notebook we create a direct-citation network for CARPs using `networkX` and the corrected `../data/arp_grp_2010_2019_references_matched.csv` data file:

In [1]:
import networkx as nx
import pandas as pd
from pyvis.network import Network

In [14]:
articles = pd.read_csv('../data/arp_grp_2010_2019_references_matched.csv')
articles.rename(columns={'Unnamed: 0': 'key', 'CR_matched': 'cited'}, inplace=True)
articles = articles[['key', 'cited']]

In [15]:
G = nx.DiGraph()

In [16]:
G.add_nodes_from(articles['key'].values)

In [17]:
for i in range(len(articles)):
    key = articles.loc[i, 'key']
    citations = articles.loc[i, 'cited']
    if not pd.isna(citations):
        ind_citations = citations.split(';')
        arcs = [(key, x) for x in ind_citations]
        G.add_edges_from(arcs)

In [18]:
sorted(d for n, d in G.degree())

[0,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 9,
 9,
 9,
 9,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 13,
 13,
 13,
 13,
 13,
 13,
 13,
 13,
 13,
 14,
 14,
 14,
 14,
 14,
 15,
 15,
 15,
 15,
 15,
 16,
 16,
 16,
 16,
 16,
 16,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 18,
 18,
 18,
 18,
 18,
 18,
 19,
 19,
 19,
 20,
 20,
 20,
 20,
 21,
 22,
 22,
 22,
 22,
 23,
 23,
 23,
 23,
 24,
 24,
 25,
 26,
 26,
 27,
 28,
 28,
 28,
 28,
 29,
 29,
 32,
 32,
 32,
 33,
 34,
 35,
 35,
 36,
 37,
 40,
 40,
 40,
 41,
 44,
 44,
 45,
 46,
 46,
 48,
 50,
 57,
 58,
 62,
 65,
 65,
 71,
 78,
 81,
 86,
 93]

In [19]:
G2 = Network(height="750px", width="100%", bgcolor="#222222", font_color="white")
G2.show_buttons(filter_=['physics'])
G2.from_nx(G)

In [20]:
G2.show("mygraph.html")

In [21]:
added_info = pd.read_csv('../data/arp_grp_2010_2019_carps_downloaded_benchmark_annotated_full.csv')

In [22]:
added_info

Unnamed: 0,X,author,year,title,journal,dwn_.,Downloaded,Benchmarks_origins,Benchmarks_used,Solution_tech,Version
0,"AHR D, 2014, ARC ROUTING: PROBLEMS, METHODS, A...",AHR D;REINELT G,2014,THE CAPACITATED ARC ROUTING PROBLEM: COMBINATO...,"ARC ROUTING: PROBLEMS, METHODS, AND APPLICATIONS",0,0,,,,
1,"AMAYA CA, 2010, J OPER RES SOC",AMAYA CA;LANGEVIN A;TREPANIER M,2010,A HEURISTIC METHOD FOR THE CAPACITATED ARC ROU...,JOURNAL OF THE OPERATIONAL RESEARCH SOCIETY,2,1,random;real,random;real,heuristic,
2,"ARAKAKI RK, 2018, COMPUT OPER RES",ARAKAKI RK;USBERTI FL,2018,HYBRID GENETIC ALGORITHM FOR THE OPEN CAPACITA...,COMPUTERS \& OPERATIONS RESEARCH,5,1,gdb;val;egl;A;B,o-gdb;o-val;o-egl;o-A;o-B,EA,
3,"ARAKAKI RK, 2019, COMPUT OPER RES",ARAKAKI RK;USBERTI FL,2019,AN EFFICIENCY-BASED PATH-SCANNING HEURISTIC FO...,COMPUTERS \& OPERATIONS RESEARCH,6,1,gdb;val;egl;bmcv,gdb;val;egl;bmcv,heuristic,
4,"ARCHETTI C, 2010, COMPUT OPER RES",ARCHETTI C;FEILLET D;HERTZ A;SPERANZA MG,2010,THE UNDIRECTED CAPACITATED ARC ROUTING PROBLEM...,COMPUTERS \& OPERATIONS RESEARCH,7,1,val,p-val,exact;TS;VNS,
5,"ARCHETTI C, 2017, EUR J OPER RES",ARCHETTI C;BERTAZZI L;LAGANA D;VOCATURO F,2017,THE UNDIRECTED CAPACITATED GENERAL ROUTING PRO...,EUROPEAN JOURNAL OF OPERATIONAL RESEARCH,8,1,gdb;val,p-ggdb;p-gval,exact,
6,"BACH L, 2013, COMPUT OPER RES",BACH L;HASLE G;WOHLK S,2013,"A LOWER BOUND FOR THE NODE, EDGE, AND ARC ROUT...",COMPUTERS \& OPERATIONS RESEARCH,9,1,gdb;val;egl;CBMix;DI-NEARP,BHW;DI-NEARP;CBMix,exact,
7,"BACH L, 2016, NETWORKS",BACH L;LYSGAARD J;WOHLK S,2016,A BRANCH-AND-CUT-AND-PRICE ALGORITHM FOR THE M...,NETWORKS,10,1,CBMix;gdb;val;egl;,CBMix;mgval;mggdb;BHW,exact,
8,"BARTOLINI E, 2013, MATH PROGRAM",BARTOLINI E;CORDEAU JF;LAPORTE G,2013,IMPROVED LOWER BOUNDS AND EXACT ALGORITHM FOR ...,MATHEMATICAL PROGRAMMING,12,1,gdb;kshs;bmcv;egl;val;,gdb;kshs;bmcv;egl;val;,exact,
9,"BARTOLINI E, 2013, OPER RES",BARTOLINI E;CORDEAU JF;LAPORTE G,2013,AN EXACT ALGORITHM FOR THE CAPACITATED ARC ROU...,OPERATIONS RESEARCH,13,1,bmcv;egl;tcarp;egl;gdb;val;egl,2val;2bmcv;2egl;tcarp;tegl;tgb;tval;tegl,exact,


In [23]:
added_info.columns

Index(['X', 'author', 'year', 'title', 'journal', 'dwn_.', 'Downloaded',
       'Benchmarks_origins', 'Benchmarks_used', 'Solution_tech', 'Version'],
      dtype='object')

In [24]:
articles_m = articles.merge(added_info, left_on=['key'], right_on=['X'], how='inner')
articles_m = articles_m.loc[articles_m['Downloaded'] == 1]
articles_ea = articles_m[articles_m['Solution_tech'] == 'exact']
articles_ea = articles_m[articles_m['Solution_tech'] == 'exact']

In [25]:
print(len(added_info))
print(len(articles))
print(len(articles_m))
print(len(articles_ea))

136
192
129
28


In [26]:
G2_ea = nx.Graph()
G2_ea.add_nodes_from(articles_ea['key'].values)

keys = articles_ea['key'].values
for i in range(len(articles_ea)):
    key = articles_ea.iloc[i]['key']
    citations = articles_ea.iloc[i]['cited']
    if not pd.isna(citations):
        ind_citations = citations.split(';')
        arcs = [(key, x) for x in ind_citations if x in keys]
        G2_ea.add_edges_from(arcs)

In [27]:
G2 = Network(height="750px", width="100%", bgcolor="#222222", font_color="white")
G2.show_buttons(filter_=['physics'])
G2.from_nx(G2_ea)
G2.show("mygraph.html")

In [28]:
print(nx.info(G2))

AttributeError: 'Network' object has no attribute 'name'

In [46]:
sol_tech = []
benchmark_orig = []
benchmar_adapt = []

articles_m_there = articles_m.loc[articles_m['Solution_tech'].notnull()]
articles_m_there['exact'] = False
articles_m_there['heuristic'] = False
articles_m_there['EA'] = False
articles_m_there = articles_m_there.reset_index(drop=True)
articles_m_there['sol_tech'] = False
for i, x in enumerate(articles_m_there['Solution_tech'].str.split(';')):
    for y in x:
        if y == 'exact':
            articles_m_there.loc[i, 'exact'] = True
            articles_m_there.loc[i, 'sol_tech'] = 'exact'
        elif y == 'EA':
            articles_m_there.loc[i, 'EA'] = True
            articles_m_there.loc[i, 'sol_tech'] = 'EA'
        else:
            articles_m_there.loc[i, 'heuristic'] = True
            articles_m_there.loc[i, 'sol_tech'] = 'heuristic'
    sol_tech.append(articles_m_there.loc[i, 'sol_tech'])
articles_m_there.to_csv('../data/sol_tech_counts.csv')
sol_tech

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


['heuristic',
 'heuristic',
 'exact',
 'heuristic',
 'exact',
 'heuristic',
 'EA',
 'exact',
 'EA',
 'EA',
 'EA',
 'heuristic',
 'heuristic',
 'heuristic',
 'exact',
 'heuristic',
 'heuristic',
 'heuristic',
 'heuristic',
 'EA',
 'exact',
 'EA',
 'heuristic',
 'EA',
 'exact',
 'EA',
 'heuristic',
 'heuristic',
 'heuristic',
 'exact',
 'EA',
 'heuristic',
 'EA',
 'heuristic',
 'exact',
 'EA',
 'EA',
 'heuristic',
 'heuristic',
 'EA',
 'heuristic',
 'heuristic',
 'exact',
 'heuristic',
 'EA',
 'EA',
 'exact',
 'exact',
 'EA',
 'exact',
 'exact',
 'exact',
 'exact',
 'exact',
 'exact',
 'heuristic',
 'heuristic',
 'EA',
 'heuristic',
 'exact',
 'exact',
 'heuristic',
 'EA',
 'exact',
 'EA',
 'EA',
 'EA',
 'heuristic',
 'heuristic',
 'heuristic',
 'EA',
 'heuristic',
 'EA',
 'heuristic',
 'heuristic',
 'exact',
 'exact',
 'EA',
 'exact',
 'exact',
 'heuristic',
 'EA',
 'exact',
 'EA',
 'EA',
 'EA',
 'heuristic',
 'heuristic',
 'exact',
 'heuristic',
 'EA',
 'EA',
 'EA',
 'heuristic',
 'EA'

In [38]:
articles_m_there

Unnamed: 0,key,cited,X,author,year,title,journal,dwn_.,Downloaded,Benchmarks_origins,Benchmarks_used,Solution_tech,Version,exact,heuristic,EA,sol_tech
0,"WILLEMSE EJ, 2019, COMPUT OPER RES","BELENGUER JM, 2006, COMPUT OPER RES;BEULLENS P...","WILLEMSE EJ, 2019, COMPUT OPER RES",WILLEMSE EJ;JOUBERT JW,2019,EFFICIENT LOCAL SEARCH STRATEGIES FOR THE MIXE...,COMPUTERS \& OPERATIONS RESEARCH,167,1,cen-if;act-if;mval;lpr,lpr;mval;cen-IF;act-IF;lpr-IF;mval-IF-3L,LS,,False,True,False,heuristic
1,"ARAKAKI RK, 2019, COMPUT OPER RES","ARAKAKI RK, 2018, COMPUT OPER RES;BELENGUER JM...","ARAKAKI RK, 2019, COMPUT OPER RES",ARAKAKI RK;USBERTI FL,2019,AN EFFICIENCY-BASED PATH-SCANNING HEURISTIC FO...,COMPUTERS \& OPERATIONS RESEARCH,6,1,gdb;val;egl;bmcv,gdb;val;egl;bmcv,heuristic,,False,True,False,heuristic
2,"TFAILI S, 2019, RAIRO-OPER RES","BELENGUER JM, 2006, COMPUT OPER RES;BELENGUER ...","TFAILI S, 2019, RAIRO-OPER RES",TFAILI S;DKHIL H;SBIHI A;YASSINE A,2019,EFFICIENT ALGORITHMS UNDER DYNAMIC GRAPHS TO S...,RAIRO-OPERATIONS RESEARCH,149,1,family,family,exact,,True,False,False,exact
3,"MOFID-NAKHAEE E, 2019, WASTE MANAGE RES","AMPONSAH SK, 2004, WASTE MANAGE;BAUTISTA J, 20...","MOFID-NAKHAEE E, 2019, WASTE MANAGE RES",MOFID NAKHAEE E;BARZINPOUR F,2019,A MULTI-COMPARTMENT CAPACITATED ARC ROUTING PR...,WASTE MANAGEMENT \& RESEARCH,113,1,gdb;mval;lpr,gdb-IF-3L;mval-IF-3L;lpr-IF-3L,metaheuristic;adaptive-large-neighbourhood-search,,False,True,False,heuristic
4,"LI M, 2018, COMPUT IND ENG","LACOMME P, 2005, EUR J OPER RES;LOPES RB, 2014...","LI M, 2018, COMPUT IND ENG",LI M;ZHEN L;WANG S;LV W;QU X,2018,UNMANNED AERIAL VEHICLE SCHEDULING PROBLEM FOR...,COMPUTERS \& INDUSTRIAL ENGINEERING,89,1,real,real,exact,,True,False,False,exact
5,"TIRKOLAEE EB, 2018, WASTE MANAGE","CHU F, 2005, J INTELL MANUF;LACOMME P, 2005, E...","TIRKOLAEE EB, 2018, WASTE MANAGE",TIRKOLAEE EB;MANDAVI I;ESFAHANI MMS,2018,A ROBUST PERIODIC CAPACITATED ARC ROUTING PROB...,WASTE MANAGEMENT,152,1,random,random,heuristic,,False,True,False,heuristic
6,"SHANG R, 2018, NAT COMPUT","BALDACCI R, 2006, NETWORKS;BELENGUER JM, 2003,...","SHANG R, 2018, NAT COMPUT",SHANG R;DU B;DAI K;JIAO L;XUE Y,2018,MEMETIC ALGORITHM BASED ON EXTENSION STEP AND ...,NATURAL COMPUTING,139,1,bmcv;egl-g,bmcv;egl-g,EA,,False,False,True,EA
7,"CIANCIO C, 2018, EUR J OPER RES","ARCHETTI C, 2017, EUR J OPER RES;BACH L, 2016,...","CIANCIO C, 2018, EUR J OPER RES",CIANCIO C;LAGANA D;VOCATURO F,2018,BRANCH-PRICE-AND-CUT FOR THE MIXED CAPACITATED...,EUROPEAN JOURNAL OF OPERATIONAL RESEARCH,39,1,gdb;val;egl;TW,BHW;TW,exact,,True,False,False,exact
8,"TIRKOLAEE EB, 2018, SUSTAINABILITY","CORBERAN A, 2010, NETWORKS;GHIANI G, 2010, J H...","TIRKOLAEE EB, 2018, SUSTAINABILITY",TIRKOLAEE EB;HOSSEINABADI AAR;SOLTANI M;SANGAI...,2018,A HYBRID GENETIC ALGORITHM FOR MULTI-TRIP GREE...,SUSTAINABILITY,151,1,random,random,EA,,False,False,True,EA
9,"SHANG R, 2018, MEMET COMPUT","HERTZ A, 2000, OPER RES;LACOMME P, 2004, ANN O...","SHANG R, 2018, MEMET COMPUT",SHANG R;DU B;DAI K;JIAO L;ESFAHANI AMG;STOLKIN R,2018,QUANTUM-INSPIRED IMMUNE CLONAL ALGORITHM FOR S...,MEMETIC COMPUTING,138,1,gdb;val;bmcv,gdb;val;bmcv,EA,,False,False,True,EA


In [39]:
new_benchmarks = set()
original_benchmarks = set()

In [62]:
articles_m_bench = articles_m.loc[articles_m['Benchmarks_origins'].notnull()]
articles_m_bench = articles_m_bench.reset_index(drop=True)
bench_origs = []
sol_tech = []
for i, x in enumerate(articles_m_bench['Benchmarks_origins'].str.split(';')):
    for y in x:
        if y:
            #print(articles_m_there.loc[i, 'sol_tech'])
            sol_tech.append(articles_m_there.loc[i, 'sol_tech'])
            bench_origs.append(y.lower())
            print(y.lower())
            #original_benchmarks.add(y.lower())

cen-if
act-if
mval
lpr
gdb
val
egl
bmcv
tcarp
egl
val
rural
family
gdb
mval
lpr
real
random
bmcv
egl-g
gdb
val
egl
tw
random
gdb
val
bmcv
gdb
val
egl
a
b
val
egl
f
k
n
o
s
real
real
a
b
c
d
e
f
k
n
o
s
gdb
gdb
val
egl-g
hefei
beijing
egl-g
lpr
mval
gdb
val
bmcv
egl
egl-l
mggdb
mgval
cbmix
di-nearp
gdb
val
gdb
val
egl-l
gdb
val
egl
bmcv
egl-g
egl-l
real
gdb
kshs
bmcv
egl
mine
lpr
seix
cbmix
gdb
val
egl
di-nearp
cbmix
gdb
val
egl
egl
bmcv
egl-g
cen-if
lpr
cen-if
act-if
mval
gdb
val
lpr
gdb
val
egl
bmcv
egl-g
real
gdb
gdb
egl
val
gdb
val
egl
egl-g
bmcv
cen-if
act-if
mval
gdb
val
lpr
gdb
val
egl
val
val
gdb
val
egl
egl
bmcv
egl-g
gdb
val
bmcv
egl
mval
lpr
val
egl
t
real
cbmix
gdb
val
val
egl
bmcv
alba
madri
alda
mval
val
egl
gdb
mval
altominho
gdb
real
gdb
val
gdb
val
egl
gdb
val
cbmix
gdb
jpr
val
egl
bmcv
gdb
mine
gdb
val
egl
egl-g
real
egl-g
random
gdb
val
egl
real
real
gdb
val
egl
random
random
egl-g
egl
bmcv
egl-g
real
gdb
val
egl
gdb
val
mval
di-nearp
kshs
gdb
val
bmcv
gdb
val
egl
gdb

KeyError: 'the label [117] is not in the [index]'

In [63]:
original_benchmarks = articles_m.loc[articles_m['Benchmarks_used'].notnull()]
original_benchmarks = articles_m_bench.reset_index(drop=True)
bench_news = []
sol_tech2 = []
for i, x in enumerate(articles_m_bench['Benchmarks_used'].str.split(';')):
    for y in x:
        if y:
            #print(articles_m_there.loc[i, 'sol_tech'])
            sol_tech2.append(articles_m_there.loc[i, 'sol_tech'])
            bench_news.append(y.lower())
            print(y.lower())
            #original_benchmarks.add(y.lower())

lpr
mval
cen-if
act-if
lpr-if
mval-if-3l
gdb
val
egl
bmcv
constructive
biased-ranomdised
family
gdb-if-3l
mval-if-3l
lpr-if-3l
real
random
bmcv
egl-g
bhw
tw
random
gdb
val
bmcv
o-gdb
o-val
o-egl
o-a
o-b
val
egl
f
k
n
o
s
real
real
a
b
c
d
e
f
k
n
o
s
s-gdb
pgdb
pval
pegl-g
hefei
beijing
egl-g
lpr
mval
ob-lpr
ob-egl
gdb
val
bmcv
egl
egl-l
mggdb
mgval
cbmix
bhw
di-nearp
p-ggdb
p-gval
gdb
val
egl-l
gdb
val
egl
bmcv
egl-g
egl-l
real
gdb
kshs
bmcv
egl
mine
slpr
seix
cbmix
bhw
di-nearp
cbmix
mgval
mggdb
bhw
egl
bmcv
egl-g
cen-if
lpr-if
cen-if
act-if
gdb
val
egl
bmcv
egl-g
real
mggdb
gdb
egl
val
gdb
val
egl
egl-g
bmcv
cen-if
act-if
lpr-if
mval-if-3l
bccm-if-3l
gdb-if-3l
bccm-if
gdb-if
ugdb
uval
egl
val
rp-val
gdb
val
egl
egl
bmcv
egl-g
gdb
val
bmcv
egl
mval
lpr
val
egl
chic
cord
dach
kyiv
mons
ny
rome
tour
tue
vypas
cbmix
mgval
mggdb
val
egl
bmcv
alba
madri
alda
pmval
lpr
val
gdb-lml
mval-lml
altominho
mggdbsd
real
gdb
val
mdh-gdb
mdh-val
mdh-egl
cbmix
mggdb
mgval
gdbj
jpr
val
egl
bmcv
gdbj
m

KeyError: 'the label [117] is not in the [index]'

In [69]:
banchmarks = pd.DataFrame.from_dict({'sol_tech' : sol_tech, 'orig_benchmark': bench_origs})

In [70]:
new_benchmarks = pd.DataFrame.from_dict({'sol_tech' : sol_tech2, 'orig_benchmark': bench_news})

In [44]:
articles_m_there['sol_tech'].unique()

array(['heuristic', 'exact', 'EA'], dtype=object)

In [72]:
banchmarks.to_csv('../data/sol_tech_orig_bench.csv')

In [73]:
banchmarks.to_csv('../data/sol_tech_new_bench.csv')