In [1]:
import pandas as pd
import numpy as np
import networkx as nx
from itertools import combinations
import os
import seaborn as sns
import statistics as stat
import community
from collections import Counter

In [2]:
#THIS IS THE NETWORK CREATION STAGE. IT IMPORTS THE DATASET AND GENERATES "g" WHICH IS THE NETWORK.
df = pd.read_csv('../Datasets/Complaints_MoreThanOneComplaint.csv')
#At this point we have imported complaints.

d0 = df.groupby('CRID').count()
# counts the number of appearances per CRID

d1 = d0[d0.UID > 1]
# subsets the dataframe for all CRIDS that have more than one officer

group_complaints = list(set(d1.index))
# deduplicates the list

df_perCRID = pd.read_csv("../Datasets/OfficersPerComplaint.csv")
dict_PerComplaint = dict(zip(df_perCRID.CRID, df_perCRID.no_of_officers))

g = nx.Graph()
for incident in group_complaints:
    numberPer = dict_PerComplaint.get(incident)
    node_pairs = list(combinations(df[df.CRID == incident]['UID'].tolist(), 2))
    for node_pair in node_pairs:
        if g.has_edge(node_pair[0], node_pair[1]):
            g[node_pair[0]][node_pair[1]]['weight'] += (1+1/numberPer)
        else:
            g.add_edge(node_pair[0], node_pair[1], weight = (1+1/numberPer))
    if group_complaints.index(incident)%100==0:
        value = str(group_complaints.index(incident)/len(group_complaints)*100)[:5] +  "% complete "
        print ("\r ", value, end = " \r "),
print("\r 100.00% complete")

print(nx.info(g))

#Write the network to a file. 
nx.write_gexf(g, "../Networks/All_CPD_Network_NewEdgeWeights.gexf")
nx.write_gml(g, "../Networks/All_CPD_Network_NewEdgeWeights.gml")

#This removes all edges that have a weight below 2. Then isolates are removed. 
edgeList = list(g.edges)
toRemove = [nodes for nodes in edgeList if g[nodes[0]][nodes[1]]['weight'] <2]
g.remove_edges_from(toRemove)
g.remove_nodes_from(list(nx.isolates(g)))
nx.write_gexf(g, "../Networks/All_CPD_Network_NewEdgeWeights_NoEdgesBelow2.gexf")
nx.write_gml(g, "../Networks/All_CPD_Network_NewEdgeWeights_NoEdgesBelow2.gml")

print(nx.info(g))

 100.00% complete  
Name: 
Type: Graph
Number of nodes: 19524
Number of edges: 218603
Average degree:  22.3933
Name: 
Type: Graph
Number of nodes: 11227
Number of edges: 26331
Average degree:   4.6907


In [None]:
#Need to export a csv with node, degree and weighted degree.
def getWeightedDegree(UID):
    return [d for x,d in list(g.degree([UID], weight = "weight"))][0]
def getDegree(UID):
    return [d for x,d in list(g.degree([UID]))][0]

df = pd.DataFrame()
df['UID'] = list(g.nodes)
df['Degree'] = df.apply(lambda row: getDegree(row['UID']), axis=1)
df['Weighted.Degree'] = df.apply(lambda row: getWeightedDegree(row['UID']), axis=1)
df.to_csv('../Datasets/NodeDegrees.csv')