# Imports

In [1]:
import pandas as pd
import statistics as stats
import numpy as np

import networkx as nx
from networkx.algorithms import bipartite

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

# Read data

In [2]:
al = '..//Data//out.moreno_crime_crime'
gender = '..//Data//ent.moreno_crime_crime.person.sex'
name = '..//Data//ent.moreno_crime_crime.person.name'
role = '..//Data//rel.moreno_crime_crime.person.role'

## Adjancency list as DataFrame

In [11]:
df_al = pd.read_csv(al, sep=" ", names=['person', 'crime'], index_col=False)
df_al['person'] = 'p' + df_al['person'].astype(str)
df_al['crime'] = 'c' + df_al['crime'].astype(str)
df_al.head(3)
df_al.shape

(1476, 2)

## Gender Dataframe

In [15]:
df_gender = pd.read_csv(gender, sep=" ", header=None, names=['gender'])
df_gender['person'] = 'p' + df_gender.index.astype(str)
df_gender.head(3)

Unnamed: 0,gender,person
0,1,p0
1,1,p1
2,1,p2


## Name DataFrame

In [14]:
df_name = pd.read_csv(name, sep=" ", header=None, names=['name'])
df_name['person'] = 'p' + df_name.index.astype(str)
df_name.head(3)

Unnamed: 0,name,person
0,AbelDennis,p0
1,AbramsChad,p1
2,AbramsDavid,p2


## Role Dataframe

In [13]:
df_role = pd.read_csv(role, sep=",", header=None, names=['role'])
df_role.head(3)
# df_role.shape

Unnamed: 0,role
0,Suspect
1,Victim
2,Victim


## Join adjancency list with role

In [28]:
df_al_roles = df_al.join(df_role)
df_al_roles.head(3)

Unnamed: 0,person,crime,role
0,p1,c1,Suspect
1,p1,c2,Victim
2,p1,c3,Victim


In [29]:
# group bys gives us the same as degree distribution
# df_al_roles.groupby(by='person', dropna=False).count()
# df_al_roles.groupby(by=['crime', 'role'], dropna=False).count()

## Basic data stats

In [48]:
# the following are used to create the graph
people = df_al['person'].unique()
crimes = df_al['crime'].unique()

# print stats
print('Number of people:', len(people))
print('Number of crimes:', len(crimes))
print('Number of roles:', len(df_role))


Number of people: 829
Number of crimes: 551
Number of roles: 1476


Breakdown of roles

In [49]:
df_role.value_counts()

role          
Suspect           682
Victim            558
Witness           195
Victim Suspect     41
dtype: int64

# Make graph

In [None]:
# G=nx.from_pandas_dataframe(df_al_roles, 0, 'b', ['weight', 'cost'])

In [50]:
# create networkx graph
G=nx.Graph()

# # add nodes
for i in range(len(people)):
    G.add_node(people[i], name=df_name['name'][i], gender=df_gender['gender'][i], bipartite=0)

for i in range(len(crimes)):
    G.add_node(crimes[i], bipartite=1)

# # add edges
for i in range(len(df_al)):
    G.add_edge(df_al_roles['person'][i], df_al_roles['crime'][i], role=df_al_roles['role'][i])

## Get degree of all nodes
Node degreee of **people** nodes if the number of crimes they were involved in.  
Node degree of **crime** nodes is the number of people involved in the crime.

In [51]:
# Creating dict with all node degrees to add as attribute
node_degrees = dict()

# Createing dict for each node type
people_degrees = dict()
crimes_degrees = dict()

# for loop to populate dicts above
for node in G.nodes:
    # print(G.edges(node, data=True))
    node_degrees[node] = G.degree(node)
    if node.startswith('p') == True:
        people_degrees[node] = G.degree(node)
    else:
        crimes_degrees[node] = G.degree(node)

In [54]:
# Add node degree as node attribute in graph G
nx.set_node_attributes(G, node_degrees, "node_degree")

# and check it worked
nx.get_node_attributes(G, 'node_degree')

### Make a dataframe including all node attributes

In [55]:
# code from https://stackoverflow.com/a/50775962
# make pandas dataframe from graph with node attributes
pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index')

Unnamed: 0,name,gender,bipartite,node_degree
p1,AbelDennis,1.0,0,4
p2,AbramsChad,1.0,0,22
p3,AbramsDavid,1.0,0,1
p4,AbramsDon,1.0,0,1
p5,AbramsRichard,1.0,0,1
...,...,...,...,...
c547,,,1,1
c548,,,1,1
c549,,,1,1
c550,,,1,1
