In [2]:
%matplotlib inline
import csv
import networkx as nx
from networkx.algorithms import bipartite
from collections import defaultdict
import matplotlib.pyplot as plt

In [3]:
DATA_DIR = "./data/"
CASES_2010_2014_CSV = "CRM Cases 2010_2014 Unrestricted.csv"
CASES_2015_2019_CSV = "CRM Cases 2015_2019 Unrestricted.csv"
ECOMETRICS_VERTICAL_CSV = "CRM CBG Ecometrics Vertical.csv"
CASE_TYPES_CSV = "CRM Case Types.csv"
SHAPEFILE = "./Block+Groups+2010+BARI/Census Block Groups"

In [4]:
weights = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
for filename in [CASES_2015_2019_CSV, CASES_2010_2014_CSV]:
    with open(DATA_DIR + filename, 'r') as f:
        c = csv.DictReader(f, delimiter='\t')
        count = 0
        for row in c:
            count += 1
            open_dt = row.get('OPEN_DT')
            close_dt = row.get('CLOSED_DT')
            reason = row.get('TYPE')
            bg = row.get('BG_ID_10')
            if "NA" in (reason, bg):
                continue
            weights[open_dt[:4]][reason.lower()][bg] += 1
        

In [5]:
Gs = defaultdict(lambda: nx.Graph())
for year in weights:
    for reason in weights[year]:
        Gs[year].add_node(reason, bipartite='reason')
        for bg in weights[year][reason]:
            Gs[year].add_node(bg, bipartite='bg')
            Gs[year].add_edge(reason, bg, weight=weights[year][reason][bg])
Gs

defaultdict(<function __main__.<lambda>()>,
            {'2010': <networkx.classes.graph.Graph at 0x7f46703e5ba8>,
             '2011': <networkx.classes.graph.Graph at 0x7f46703e5fd0>,
             '2012': <networkx.classes.graph.Graph at 0x7f4623e6fb38>,
             '2013': <networkx.classes.graph.Graph at 0x7f46703e5e48>,
             '2014': <networkx.classes.graph.Graph at 0x7f46703e5ef0>,
             '2015': <networkx.classes.graph.Graph at 0x7f46703e5b00>,
             '2016': <networkx.classes.graph.Graph at 0x7f4623e6f940>})

In [6]:
def get_nodes(G, **kwargs):
    return [n for n in G.nodes if (all(G.nodes[n][key] == kwargs[key] for key in kwargs))]

In [7]:
projected = {}
for year in Gs:
    projected[year] = bipartite.weighted_projected_graph(Gs[year], get_nodes(Gs[year], bipartite='bg'))

In [17]:
for year in projected:
    print(len(nx.community.modularity_max.greedy_modularity_communities(projected[year], 'weight')))
    

554
554
11
555
553
552
549


In [36]:
def get_frequencies(G, t):
    return sorted(G.degree(get_nodes(G, bipartite=t), weight='weight'), key=lambda x: x[1], reverse=True)

get_frequencies(Gs['2011'], 'reason')

[('schedule a bulk item pickup', 30651),
 ('general request', 26778),
 ('pothole repair (internal)', 10121),
 ('street light outages', 8159),
 ('request for snow plowing', 7274),
 ('missed trash/recycling/yard waste/bulk item', 6252),
 ('request for pothole repair', 4637),
 ('unsatisfactory living conditions', 4408),
 ('graffiti removal', 3928),
 ('request for recycling cart', 3742),
 ('street light outages (internal)', 3652),
 ('tree emergencies', 3494),
 ('tree maintenance requests', 3357),
 ('highway maintenance', 3300),
 ('traffic signal repair', 2576),
 ('abandoned vehicles', 2283),
 ('sticker request', 2233),
 ('snow removal', 2096),
 ('sidewalk repair (make safe)', 2072),
 ('improper storage of trash (barrels)', 1742),
 ('notification', 1593),
 ('misc. snow complaint', 1409),
 ('pick up dead animal', 1388),
 ('sidewalk repair', 1328),
 ('rodent activity', 1248),
 ('sign repair', 1190),
 ('heat - excessive, insufficient', 1037),
 ('poor conditions of property', 1003),
 ('requests

In [37]:

get_frequencies(Gs['2011'], 'bg')

[('250250303003', 1572),
 ('250250612001', 956),
 ('250250606001', 901),
 ('250251104012', 895),
 ('250250801001', 892),
 ('250251101035', 810),
 ('250250709001', 764),
 ('250250701018', 764),
 ('250251401051', 747),
 ('250250303002', 703),
 ('250250302001', 689),
 ('250251202012', 669),
 ('250251102011', 652),
 ('250251105022', 647),
 ('250251204004', 644),
 ('250251203014', 639),
 ('250251201031', 632),
 ('250250705001', 632),
 ('250250406001', 628),
 ('250250701011', 627),
 ('250250106001', 625),
 ('250250406002', 615),
 ('250251010022', 613),
 ('250250304001', 612),
 ('250250404012', 607),
 ('250251302003', 607),
 ('250251204005', 598),
 ('250250201013', 594),
 ('250250107021', 586),
 ('250251106011', 584),
 ('250250803001', 578),
 ('250251203013', 567),
 ('250250814003', 566),
 ('250251106073', 560),
 ('250250820001', 559),
 ('250251304041', 546),
 ('250251106072', 538),
 ('250250921015', 524),
 ('250250008024', 524),
 ('250250711012', 523),
 ('250250001001', 523),
 ('250250701013

In [16]:
print(get_nodes(Gs['2014'], bipartite='reason'))

['planting', 'water in gas - high priority', 'undefined noise disturbance', 'unsafe dangerous conditions', 'automotive noise disturbance', 'misc. snow complaint', 'new sign  crosswalk or pavement marking', 'snow removal', 'electrical', 'no utilities - food establishment - electricity', 'ground maintenance', 'alternate transportation', 'pwd graffiti', 'animal found', 'equipment repair', 'no utilities - food establishment - sewer', 'snow/ice control', 'big buildings recycling (internal)', 'missed trash/recycling/yard waste/bulk item', 'bus stop issues', 'major system failure', 'heat/fuel assistance', 'overcrowding', 'cemetery maintenance request', 'illegal occupancy', 'new tree requests', 'graffiti removal', 'improper storage of trash (barrels)', 'news boxes', 'abandoned vehicles', 'building inspection request', 'no utilities - food establishment - flood', 'abandoned vehicles - private tow', 'abandoned bicycle', 'maintenance complaint - residential', 'empty litter basket', 'heat - excess

In [14]:
export_graph(projected['2014'], '2014')

In [11]:
def export_graph(G, file):
    nx.write_weighted_edgelist(G, file, delimiter=";")
        
#export_graph(bipartite.weighted_projected_graph(G, reason_nodes), "test.txt")

In [13]:
def draw_bipartite(G):
    X, Y = bipartite.sets(G)
    pos = dict()
    pos.update( (n, (1, i)) for i, n in enumerate(X) ) # put nodes from X at x=1
    pos.update( (n, (2, i)) for i, n in enumerate(Y) ) 
    nx.draw(G, pos=pos)
    plt.show()


In [1]:
def setup_map(shapefile, name, scale):
    from mpl_toolkits.basemap import Basemap
    plt.figure(figsize=(12*scale,6*scale))
    m = Basemap(projection='merc', llcrnrlat=42.22, urcrnrlat=42.42,
                   llcrnrlon=-71.2, urcrnrlon=-70.98, resolution='h')
    m.readshapefile(shapefile, name, linewidth=0.7)
    m.drawcoastlines()
    m.drawstates()
    m.drawmapboundary(fill_color='aqua')
    m.fillcontinents(color='coral',lake_color='aqua')
    return m
    

#m = setup_map(SHAPEFILE, "CBGs", 2)
#try:
   # m.plot()
#except Exception as e:
    #pass

