<a href="https://colab.research.google.com/github/aaubs/ds-master/blob/main/courses/ds4b-m2-1-nw/notebooks/s4-nw-2mode-exercise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Packaging
import pandas as pd
import seaborn as sns
import networkx as nx
import numpy as np

import matplotlib.pyplot as plt

from sklearn.metrics.pairwise import cosine_distances

sns.set(color_codes=True, rc={'figure.figsize':(10,8)})

# Adittional dataviz
!pip install nxviz
import nxviz as nv

# 2-Mode Case: Crime networks

This bipartite network contains persons who appeared in at least one crime case as either a suspect, a victim, a witness or both a suspect and victim at the same time. A left node represents a person and a right node represents a crime. An edge between two nodes shows that the left node was involved in the crime represented by the right node.


More information about the network is provided here:
http://konect.uni-koblenz.de/networks/moreno_crime

Files:
* meta.moreno_crime -- Metadata about the network
* out.moreno_crime -- The adjacency matrix of the network in space separated values format, with one edge per line

The meaning of the columns in out.moreno_crime are:
* First column: ID of from node
* Second column: ID of to node
* ent.moreno_crime.person.name -- Contains the attribute `name` of entity `person` of the network
* ent.moreno_crime.person.sex -- Contains the attribute `sex` of entity `person` of the network

In [None]:
curr_path = "https://raw.githubusercontent.com/ericmjl/nxviz/master/examples/datasets/"

In [None]:
df = pd.read_csv(
    curr_path + "/moreno_crime/out.moreno_crime_crime",
    sep=" ",
    skiprows=2,
    header=None,
    )

df = df[[0, 1]]
df.columns = ["personID", "crimeID"]
df.index += 1

# Read in the role metadata
roles = pd.read_csv(
    curr_path + "/moreno_crime/rel.moreno_crime_crime.person.role",
    header=None,
    )

roles.columns = ["roles"]
roles.index += 1

In [None]:
df.head()

In [None]:
# Add the edge data to the graph.
G = nx.Graph()
for r, d in df.join(roles).iterrows():
  pid = "p{0}".format(d["personID"])  # pid stands for "Person I.D."
  cid = "c{0}".format(d["crimeID"])  # cid stands for "Crime I.D."
  G.add_node(pid, bipartite="person")
  G.add_node(cid, bipartite="crime")
  G.add_edge(pid, cid, role=d["roles"])    

In [None]:
# Read in the gender metadata
gender = pd.read_csv(
    curr_path + "/moreno_crime/ent.moreno_crime_crime.person.sex",
    header=None,
    )

gender.index += 1
for n, gender_code in gender.iterrows():
  nodeid = "p{0}".format(n)
  G.nodes[nodeid]["gender"] = gender_code[0]

In [None]:
# Annotate each node with connectivity score
for n in G.nodes():
    dcs = nx.degree_centrality(G)
    G.nodes[n]["connectivity"] = dcs[n]

In [None]:
# Make a CircosPlot of the bipartite graph
c = nv.CircosPlot(
    G,
    node_grouping="bipartite",
    node_order="connectivity",
    node_color="bipartite",
)
c.draw()

In [None]:
# Make the "people" projection of the bipartite graph.
person_nodes = [n for n in G.nodes() if G.nodes[n]["bipartite"] == "person"]
pG = nx.bipartite.projection.projected_graph(G, person_nodes)

for n in pG.nodes():
    dcs = nx.degree_centrality(pG)
    pG.nodes[n]["connectivity"] = dcs[n]

c = nv.CircosPlot(
    pG, node_grouping="gender", node_order="connectivity", node_color="gender"
)
c.draw()
plt.show()