# BACKGROUND 

In the 1990s Rick Rosenfeld and Norm White used police records to collect data on crime in St. Louis. They began with five homicides and recorded the names of all the individuals who had been involved as victims, suspects or witnesses. They then explored the files and recorded all the other crimes in which those same individuals appeared. This snowball process was continued until they had data on 557 crime events. Those events involved 870 participants of which: 569 appeared as victims 682 appeared as suspects 195 appeared as witnesses, and 41 were dual (they were recorded both as victims and suspects in the same crime. Their data appear, then, as an 870 by 557, individual by crime event matrix. Victims are coded as 1, suspects as 2, witnesses as 3 and duals as 4. In addition Rosenfeld and White recorded the sex of each individual.

Data Source: https://github.com/nderzsy/Network-Analysis-in-Python---Tutorial-JupyterCon18-ODSCEast18/tree/master/datafiles/social/crime

http://moreno.ss.uci.edu/data.html#crime

In [34]:
import pandas as pd
import numpy as np
import math
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline
import networkx.algorithms.bipartite as bipartite
from pyvis import network as net
import matplotlib.pyplot as plt

In [35]:
person = pd.read_csv('ent.moreno_crime_crime.person.name', sep='\t', header = None, names = ['Name'])
person['Sex'] = pd.read_csv('ent.moreno_crime_crime.person.sex', header = None)

person.loc[person.Sex == 0, ['Sex']] = 'F'
person.loc[person.Sex == 1, ['Sex']] = 'M'
person.head()

Unnamed: 0,Name,Sex
0,AbelDennis,M
1,AbramsChad,M
2,AbramsDavid,M
3,AbramsDon,M
4,AbramsRichard,M


In [36]:
crime = pd.read_csv("out.moreno_crime_crime", delim_whitespace = True, skiprows = [0,1], names = ['Person', 'Crime'])
crime.head()

Unnamed: 0,Person,Crime
0,1,1
1,1,2
2,1,3
3,1,4
4,2,5


In [37]:
#Add role
crime['Role'] = pd.read_csv("rel.moreno_crime_crime.person.role", header = None)
crime.head()

Unnamed: 0,Person,Crime,Role
0,1,1,Suspect
1,1,2,Victim
2,1,3,Victim
3,1,4,Suspect
4,2,5,Victim


In [38]:
# Number of crimes.
crime['crime_count'] = crime_file.groupby(['Person'])['Crime'].transform('count')

# Number of many Roles.
crime['role_count'] = crime_file.groupby(['Person', 'Role'])['Crime'].transform('count')

crime.head()

Unnamed: 0,Person,Crime,Role,crime_count,role_count
0,1,1,Suspect,4,2
1,1,2,Victim,4,2
2,1,3,Victim,4,2
3,1,4,Suspect,4,2
4,2,5,Victim,22,4


In [39]:
#Add name and sex

crime["Name"] = ""
crime["Sex"] = ""

for i in range(0, len(person)):
    crime.loc[crime.Person == i+1, ['Sex']] = person.iloc[i]["Sex"]
    crime.loc[crime.Person == i+1, ['Name']] = person.iloc[i]["Name"]

#Final dataset
crime.head()

Unnamed: 0,Person,Crime,Role,crime_count,role_count,Name,Sex
0,1,1,Suspect,4,2,AbelDennis,M
1,1,2,Victim,4,2,AbelDennis,M
2,1,3,Victim,4,2,AbelDennis,M
3,1,4,Suspect,4,2,AbelDennis,M
4,2,5,Victim,22,4,AbramsChad,M


In [40]:
G = nx.Graph()

for i in range(len(crime)):
    G.add_node(crime.iloc[i][5],
               Relation = crime.iloc[i][2],
               rel_count = crime.iloc[i][3],
               weight = crime.iloc[i][4],
               Sex = crime.iloc[i][6],
               bipartite = 1)


# Adding crime nodes.
for i in range(len(crime)): 
    G.add_node(crime.iloc[i][1], bipartite = 0)


# Adding edges.
for i in range(len(crime)):
    G.add_edge(crime.iloc[i][5], crime.iloc[i][1], weight = 1)


print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 1380
Number of edges: 1476
Average degree:   2.1391


In [41]:
nx.is_bipartite(G)

True