In [1]:
from matplotlib.pyplot import figure
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import nxviz.plots as nv
import nxviz as nv
from nxviz import annotate

nxviz has a new API! Version 0.7.3 onwards, the old class-based API is being
deprecated in favour of a new API focused on advancing a grammar of network
graphics. If your plotting code depends on the old API, please consider
pinning nxviz at version 0.7.3, as the new API will break your old code.

To check out the new API, please head over to the docs at
https://ericmjl.github.io/nxviz/ to learn more. We hope you enjoy using it!

(This deprecation message will go away in version 1.0.)



**Loading the Data**
>Loading the data and doing some cleaning. Basic cleaning

In [2]:
df = pd.read_csv(r'/Users/hernanadasme/Projects/data_challenges/exports_29_11/rectype-63.csv')

In [3]:
df.rename({'Source': 'source'}, axis = 1, inplace=True)
df.rename({'Source year': 'source_year'}, axis = 1, inplace=True)
df.rename({'Boxers challenging RecordTitle': 'boxer_challenging'}, axis = 1, inplace=True)
df.rename({'Boxer Challenged RecordTitle': 'boxer_challenged'}, axis = 1, inplace=True)
df.rename({'Location Description': 'location_descp'}, axis = 1, inplace=True)
df.rename({'Status': 'status'}, axis = 1, inplace=True)
df.rename({'Conditions': 'condition'}, axis = 1, inplace=True)
df.rename({'City':'city'}, axis = 1, inplace=True)
df.rename({'Location':'location'}, axis = 1, inplace=True)
df.rename({'Challenge H-ID': 'challenge_id'}, axis = 1, inplace=True)
df.rename({'Boxers challenging H-ID': 'challenging_id'}, axis = 1, inplace=True)
df.rename({'Boxer Challenged H-ID': 'challenged_id'}, axis = 1, inplace=True)

In [14]:
df.head(1)

Unnamed: 0,challenge_id,rec_Title,source,source_year,challenging_id,boxer_challenging,challenged_id,boxer_challenged,status,condition,Basis,location_descp,city,location,weight
0,969,Castillo\nBaeza,Semanario La Prensa. Los Andes,1928-03-02,865,"Castillo, Ernesto",836,"Baeza, Florencio",,,,,,POINT(-70.597218 -32.833799),2


**Making an edgelist**
>object D includes the nodes from de dataframe `df`. 

In [19]:
D = nx.from_pandas_edgelist(df, 'challenging_id', 'challenged_id', edge_attr='weight', create_using=nx.DiGraph())
D.add_nodes_from(df['challenging_id'], bipartite = 'challenger')
D.add_nodes_from(df['challenged_id'], bipartite = 'challenged')
len(D.nodes())

180

In [20]:
len(D.edges())

120

>Adding name of the boxer as metadata

In [23]:
# Adding metadata attributes to the nodes
for box, row in df.iterrows():
    D.nodes[row['challenging_id']]['name'] = row['boxer_challenging']
    D.nodes[row['challenged_id']]['name'] = row['boxer_challenged']

>Adding degree centrality as metadata

In [24]:
# Add the degree centrality score of each node to their metadata dictionary
dcs = nx.degree_centrality(D)
for n in D.nodes():
    D.nodes[n]['centrality'] = dcs[n]

>Adding a new column with the weight, that is, how many connections (edges) between nodes

In [21]:
df['weight'] = df.groupby(['challenging_id', 'challenged_id'])['challenging_id'].transform('size')

>Chequing the nodes and the edges

In [25]:
print(D.nodes(data=True))
print(D.edges(data=True))

[(865, {'bipartite': 'challenger', 'name': 'Castillo, Ernesto', 'centrality': 0.00558659217877095}), (836, {'bipartite': 'challenged', 'name': 'Baeza, Florencio', 'centrality': 0.03910614525139665}), (882, {'bipartite': 'challenged', 'name': 'Araya, Manuel', 'centrality': 0.0111731843575419}), (780, {'bipartite': 'challenged', 'name': 'Bolli, Carlos', 'centrality': 0.00558659217877095}), (833, {'bipartite': 'challenged', 'name': 'Valdes, Mario', 'centrality': 0.0223463687150838}), (272, {'bipartite': 'challenged', 'name': 'Pérez, Amador', 'centrality': 0.00558659217877095}), (976, {'bipartite': 'challenger', 'name': 'Ortiz, Rafael', 'centrality': 0.00558659217877095}), (827, {'bipartite': 'challenged', 'name': 'Gallardo, Pedro', 'centrality': 0.00558659217877095}), (189, {'bipartite': 'challenger', 'name': 'Silva, Carlos', 'centrality': 0.00558659217877095}), (987, {'bipartite': 'challenged', 'name': 'Iñiguez, Mario', 'centrality': 0.00558659217877095}), (302, {'bipartite': 'challenger