In [None]:
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import networkx as nx
from networkx.algorithms import bipartite

import statistics as stats

import seaborn as sns
import numpy as np

In [None]:
al = '..//Data//out.moreno_crime_crime'
gender = '..//Data//ent.moreno_crime_crime.person.sex'
name = '..//Data//ent.moreno_crime_crime.person.name'
role = '..//Data//rel.moreno_crime_crime.person.role'

In [None]:
al_df = pd.read_csv(al, sep=" ", names=['person', 'crime'], index_col=False)
al_df['person'] = 'p' + al_df['person'].astype(str)
al_df['crime'] = 'c' + al_df['crime'].astype(str)
al_df

In [None]:
gender_df = pd.read_csv(gender, sep=" ", header=None, names=['gender'])
gender_df['person'] = 'p' + gender_df.index.astype(str)
gender_df.head(3)

In [None]:
name_df = pd.read_csv(name, sep=" ", header=None, names=['name'])
name_df['person'] = 'p' + gender_df.index.astype(str)
name_df.head(3)
# name_df.shape

In [None]:
role_df = pd.read_csv(role, sep=",", header=None)
role_df.head(3)
# role_df[0][2]
# role_df.shape

In [None]:
people = al_df['person'].unique()
crimes = al_df['crime'].unique()

In [None]:
len(people)

In [None]:
# create networkx graph
G=nx.Graph()

# # add nodes
for i in range(len(people)):
    G.add_node(people[i], name=name_df['name'][i], gender=gender_df['gender'][i], bipartite=0)

for i in range(len(crimes)):
    G.add_node(crimes[i], bipartite=1)

# # add edges
for i in range(len(al_df)):
    G.add_edge(al_df['person'][i], al_df['crime'][i], role=role_df[0][i])

In [None]:
# Creating dict with all node degrees to add as attribute
node_degrees = dict()

# Createing dict for each node type
people_degrees = dict()
crimes_degrees = dict()

# for loop to populate dicts above
for node in G.nodes:
    # print(G.edges(node, data=True))
    node_degrees[node] = G.degree(node)
    if node.startswith('p') == True:
        people_degrees[node] = G.degree(node)
    else:
        crimes_degrees[node] = G.degree(node)


In [None]:
people_list = list()
crime_list = list()
role_list = list()

In [None]:
nx.make_clique_bipartite(G)

In [None]:
# people_degrees
# crimes_degrees

In [None]:
# Add node degree as node attribute in graph G
nx.set_node_attributes(G, node_degrees, "node_degree")

In [None]:
# code from https://stackoverflow.com/a/50775962
# make pandas dataframe from graph with node attributes
pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index')

In [None]:
nx.get_node_attributes(G, 'node_degree')

In [None]:
sns.displot(data=pd.DataFrame.from_dict(people_degrees, orient='index'), x=0, kind="kde")

In [None]:
sns.displot(data=pd.DataFrame.from_dict(crimes_degrees, orient='index'), x=0, kind="kde")

In [None]:
print(
    'Num. of nodes: {} \nNum. of edges: {} \nIs bipartite? {} \nIs connected? {}'.format(
        G.number_of_nodes(), 
        G.number_of_edges(), 
        nx.is_bipartite(G),
        nx.is_connected(G)
        )
    )

In [None]:
# for e in G.edges(data=True):
#     print(e[0], e[1], e[2]['role'])

In [None]:
people_list = list()
crime_list = list()
role_list = list()

In [None]:

for e in G.edges(data=True):
    # print(e[0], e[1], e[2]['role'])
    people_list.append(e[0])
    crime_list.append(e[1])
    role_list.append(e[2]['role'])


In [None]:
zipped = list(zip(people_list, crime_list, role_list))
df_crimes = pd.DataFrame(zipped, columns=['person', 'crime', 'role'])
df_crimes

In [None]:
table = pd.pivot_table(df_crimes, values='person', index=['person'], columns=['role'], aggfunc='count')
table['total_crimes'] = table.sum(axis=1)
table

In [None]:
table.corr()

In [None]:
table = pd.pivot_table(df_crimes, values='person', columns=['role'], aggfunc='count')
table['total_crimes'] = table.sum(axis=1)
table

In [None]:
# df_crimes.groupby(['Role','Person']).size()

From the official Networkx docs: https://networkx.org/documentation/stable/reference/algorithms/bipartite.html

Many algorithms of the bipartite module of NetworkX require, as an argument, a container with all the nodes that belong to one set, in addition to the bipartite graph B. The functions in the bipartite package do not check that the node set is actually correct nor that the input graph is actually bipartite. If B is connected, you can find the two node sets using a two-coloring algorithm:

```py
>>> nx.is_connected(B)
True
>>> bottom_nodes, top_nodes = bipartite.sets(B)
```
However, if the input graph is not connected, there are more than one possible colorations. This is the reason why we require the user to pass a container with all nodes of one bipartite node set as an argument to most bipartite functions. In the face of ambiguity, we refuse the temptation to guess and raise an AmbiguousSolution Exception if the input graph for bipartite.sets is disconnected.

Using the bipartite node attribute, you can easily get the two node sets:

In [None]:
people_nodes = {n for n, d in G.nodes(data=True) if d["bipartite"] == 0}
crime_nodes = set(G) - people_nodes

# Graphing

In [None]:
import nxviz as nv
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(7, 7))
nv.circos(
    G, 
    # sort_by="degree", 
    group_by="bipartite", 
    node_color_by="bipartite", 
    node_enc_kwargs={"size_scale": 3}
    )

In [None]:
options = {
    'node_color': 'orange',
    'node_size': 10,
    # 'width': 3,
    'edge_color': 'black', 
    'linewidths': 1, 
    'font_size': 8,
    'with_labels': False
}

pos=nx.bipartite_layout(G, people_nodes)
nx.draw(G, pos, **options)

In [None]:
options = {
    'node_color': 'orange',
    'node_size': 10,
    'edge_color': 'gray', 
    'linewidths': 1, 
    'font_size': 8,
    'with_labels': False
}

# Get the edge labels for ratings
edge_labels = nx.get_edge_attributes(G, 'role')
# edge_labels

# pos=nx.bipartite_layout(G, top_nodes)
# nx.draw(G,pos, **options)


pos=nx.spring_layout(G)
nx.draw(G, pos, **options)
# Draw the edge labels
# nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)

In [None]:
nx.get_node_attributes(G, 'bipartite').values()

# Stats

In [None]:
bipartite.density(G, people_nodes)

In [None]:
bipartite.average_clustering(G)

In [None]:
clustering = bipartite.clustering(G)
clustering

In [None]:
vals_cc = bipartite.closeness_centrality(G, people_nodes)

In [None]:
print(
    'Closeness centrality \nMax : {} \nMin: {} \nMean: {} \nMedian: {}'.format(
        max(vals_cc, key=vals_cc.get),
        min(vals_cc, key=vals_cc.get),
        stats.mean(vals_cc.values()),
        stats.median(vals_cc.values())
        )
    )

    # why min is a crime?? using top nodes

In [None]:
vals_bc = bipartite.betweenness_centrality(G, people_nodes)

In [None]:
print(
    'Betweenness centrality \nMax : {} \nMin: {} \nMean: {} \nMedian: {}'.format(
        max(vals_bc, key=vals_bc.get),
        min(vals_bc, key=vals_bc.get),
        stats.mean(vals_bc.values()),
        stats.median(vals_bc.values())
        )
    )

In [None]:
# nx.get_edge_attributes(G, 'role')
set(nx.get_edge_attributes(G, 'role').values())

In [None]:
# for i in G.edges():
#     print(i)

In [None]:
G.edges[('p21', 'c53')]

In [None]:
pos = nx.spring_layout(G)
plt.figure(3,figsize=(12,12))
node_cmap = []
edge_cmap = []

for node in G:
    if node.startswith('p'):
        node_cmap.append('green')
    else:
        node_cmap.append('red')

for edge in G.edges():
    # edge_cmap.append() 
    e_role = G.edges[edge]['role'] 
    if e_role == 'Suspect':
        edge_cmap.append('red')
    if e_role == 'Victim':
        edge_cmap.append('blue')
    if e_role == 'Victim Suspect':
        edge_cmap.append('orange')
    if e_role == 'Witness':
        edge_cmap.append('green')
    



nx.draw(G, pos, node_color=node_cmap, edge_color=edge_cmap, with_labels = False, node_size = 2, width = 0.5)
plt.show()

# Projection

In [None]:
weighted_person_graph = bipartite.weighted_projected_graph(G, people_nodes, ratio=True)
list(weighted_person_graph.edges(data=True))[0:5]


In [None]:
weighted_crime_graph = bipartite.weighted_projected_graph(G, crime_nodes, ratio=True)
list(weighted_crime_graph.edges(data=True))[0:5]

## Newman collaboration

In [None]:
weighted_person_graph = bipartite.collaboration_weighted_projected_graph(G, people_nodes)
list(weighted_person_graph.edges(data=True))[0:5]

# Largest connected componenet

In [None]:
# components in the graph sorted in descendent order
sorted_components = sorted(nx.connected_components(G), key=len, reverse=True)

# selecting the biggest component
BCC = G.subgraph(sorted_components[0])

In [None]:
BCC_people_nodes = nx.bipartite.sets(BCC)[0]

# different layouts of graph
lay_spring = nx.spring_layout(BCC)
lay_bip = nx.bipartite_layout(BCC, BCC_people_nodes)

In [None]:
biggest_comp_graph = nx.draw(BCC, lay_spring, node_size=10)

In [None]:
# number of nodes in the all graph
num_people = len(set(al_df['person']))
num_crime = len(set(al_df['crime']))
f'Number of people: {num_people} and number of crimes: {num_crime}'

In [None]:
for e in BCC.edges(data=True):
    print(e)

In [None]:
print(
    'Num. of nodes: {} \nNum. of edges: {} \nIs bipartite? {} \nIs connected? {}'.format(
        BCC.number_of_nodes(), 
        BCC.number_of_edges(), 
        nx.is_bipartite(BCC),
        nx.is_connected(BCC)
        )
    )

flatting - weights indicate how many layers a node has in common
    - lose information about types of connections
    - if types of nodes are different you can't even do this

victim - out degree
suspect - in degree

In [None]:
# plot only biggest component
pos = nx.spring_layout(BCC)
posB = nx.bipartite_layout(BCC, BCC_people_nodes)
biggest_comp_graph = nx.draw_spring(BCC,node_size=10)

In [None]:
# Just the People from the LCC
P = bipartite.weighted_projected_graph(BCC, BCC_people_nodes)
nx.draw(P, node_size=10)

In [None]:
P.degree()

In [None]:
# node = G0.nodes['p271']
x = list(BCC.neighbors('p715'))
for i in x:
    print(i, list(BCC.neighbors(i)))