In [173]:
%matplotlib inline
import csv
import networkx as nx
from networkx.algorithms import bipartite
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
from operator import itemgetter

In [47]:
DATA_DIR = "./data/"
CASES_2010_2014_CSV = "CRM Cases 2010_2014 Unrestricted.csv"
CASES_2015_2019_CSV = "CRM Cases 2015_2019 Unrestricted.csv"
ECOMETRICS_VERTICAL_CSV = "CRM CBG Ecometrics Vertical.csv"
CASE_TYPES_CSV = "CRM Case Types.csv"
SHAPEFILE = "./Block+Groups+2010+BARI/Census Block Groups"

DEMOGRAPHICS = "./CT_info.csv"

In [117]:
weights = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
for filename in [CASES_2015_2019_CSV, CASES_2010_2014_CSV]:
    with open(DATA_DIR + filename, 'r') as f:
        c = csv.DictReader(f, delimiter='\t')
        count = 0
        for row in c:
            count += 1
            open_dt = row.get('OPEN_DT')
            close_dt = row.get('CLOSED_DT')
            reason = row.get('TYPE')
            ct = row.get('CT_ID_10')
            if "NA" in (reason, ct):
                continue
            weights[open_dt[:4]][ct][reason.lower()] += 1
        

In [195]:
ct_info = defaultdict(dict)
with open(DEMOGRAPHICS, 'r') as f:
    c = csv.DictReader(f, delimiter=',')
    for row in c:
        ct_info[row.get('GEOID10')]['TotalPop'] = row['POP100_']
        ct_info[row.get('GEOID10')]['MedHouseIncome'] = row['MdHsInc']
        ct_info[row.get('GEOID10')]['MedHomeVal'] = row['MedHmVl']

with open("output.csv", "w") as f:
    out = csv.DictWriter(f, ["CT", "Type", "Calls", "TotalPop", "MedHouseIncome", "MedHomeVal", "CallsPerPop"])
    out.writeheader()
    for year in weights:
        for ct in weights[year]:
            for reason in weights[year][ct]:
                count = weights[year][ct][reason]
                
                if ct not in ct_info:
                    print(ct)
                    continue
                
                info = ct_info[ct]
                if int(info['TotalPop']) == 0:
                    print("0 Pop")
                    print(ct)
                    continue
                    
                out.writerow({
                    "CT": ct,
                    "Type": reason,
                    "Calls": count,
                    "TotalPop": info["TotalPop"],
                    "MedHouseIncome": info["MedHouseIncome"],
                    "MedHomeVal": info["MedHomeVal"],
                    "CallsPerPop": float(count) / float(info["TotalPop"])
                })
                
    
    

0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
2502

In [186]:
def build_graphs_from_weights(weights):
    l = defaultdict(lambda: nx.Graph())
    for year in weights:
        for ct in weights[year]:
            total = sum([1 for r in weights[year][ct] if weights[year][ct][r] != 0])
            if total == 0:
                continue
            l[year].add_node(ct, bipartite='ct')
            for reason in weights[year][ct]:
                l[year].add_node(reason, bipartite='reason')
                l[year].add_edge(reason, ct, weight=(float(weights[year][ct][reason]) / float(total)))
    return l
Gs = build_graphs_from_weights(weights)
    

In [187]:
def export_bipartite(G):
    ct_count = sum([1 for n in G.nodes if G.nodes[n]['bipartite'] == 'ct'])
    ordering = sorted(G.nodes)
    mat = nx.to_numpy_matrix(G, ordering, weight='weight')
    return mat[ct_count:len(ordering), 0:ct_count], ordering[:ct_count], ordering[ct_count:]


In [188]:
adjs = {year: export_bipartite(Gs[year]) for year in Gs}

In [189]:
for year, (adj, _, _) in adjs.items():
    np.savetxt(year + ".txt", adj, fmt="%f") # TODO change maybe - this always integer

In [218]:
# After r done processing
for year, (_, cts, reasons) in adjs.items():
    if year != "2010":
        continue
    with open("./r_processing/{}_reasons.txt".format(year), "r") as f:
        groups = f.read().replace("\n", "").split(" ")
        coms = set(groups)
        for c in sorted(coms):
            l = []
            for group, reason in zip(groups, reasons):
                if group == c:
                    l.append(reason)
                    
            print(l)
            print("\n")
            

['boston public health commission (bphc)', 'building inspection request', 'cemetery maintenance request', 'construction debris', 'disability general request', 'exceeding terms of permit', 'illegal occupancy', 'mechanical', 'municipal parking lot complaints', 'new sign, crosswalk or pavement marking', 'occupying w/out a valid co/ci', 'park maintenance requests', 'private parking lot complaints', 'protection of adjoining property', 'recycling cart return', 'request for litter basket installation', 'request for recycling cart', 'requests for street cleaning', 'sidewalk repair (make safe)', 'space savers', 'work w/out permit']


['general comments for a program or policy', 'misc. snow complaint', 'missed trash/recycling/yard waste/bulk item', 'new tree requests', 'no price on gas/wrong price', 'no utilities - food establishment - sewer', 'overcrowding', 'phone bank service inquiry', 'pigeon infestation', 'rat bite', 'request for snow plowing', 'request for snow plowing (emergency responder

In [217]:
# After r done processing
for year, (_, cts, reasons) in adjs.items():
    if year != "2010":
        continue
    with open("./r_processing/{}_cts.txt".format(year), "r") as f:
        groups = f.read().replace("\n", "").split(" ")
        coms = set(groups)
        for c in sorted(coms):
            l = []
            for group, ct in zip(groups, cts):
                if group == c:
                    l.append(ct_info.get(ct).get("MedHouseIncome"))
                    
            print(l)
            print(sum(map(int, l)) / len(l))
            print("\n")

['142688', '37750', '102171', '79750', '123160', '41023', '54652', '52572', '48125', '67222', '55238', '57386', '49241', '57454', '63967', '105606', '92898', '100112', '104970', '101778', '22133', '81324', '0']
71357.39130434782


['69818', '80932', '66875', '86773', '72526', '47619', '60214', '48676', '58750', '110298', '74215', '67150', '67042', '68448', '62583', '71341', '73868', '67027', '77500', '58864', '77243', '78333', '80750', '85735', '105319', '131484', '85321', '137750', '84007', '99911', '85788', '81563', '103438', '114697', '66005', '62208', '46695', '71250', '76285', '33185', '65000', '77832', '67359', '68042']
76720.88636363637


['56522', '25364', '49600', '63036', '30598', '30819', '18275', '34107', '33967', '30563', '35084', '20848', '36685', '18045', '28362', '27045', '30794', '31031', '40950', '65273', '64280', '35938', '42647', '48678', '37154', '40078', '38371', '46397', '46576', '49559', '43729', '29048', '28279', '37203', '50292', '51938', '63271', '40764', '37

In [6]:
def get_nodes(G, **kwargs):
    return [n for n in G.nodes if (all(G.nodes[n][key] == kwargs[key] for key in kwargs))]

In [7]:
projected = {}
for year in Gs:
    projected[year] = bipartite.weighted_projected_graph(Gs[year], get_nodes(Gs[year], bipartite='ct'))

In [17]:
for year in projected:
    print(len(nx.community.modularity_max.greedy_modularity_communities(projected[year], 'weight')))
    

554
554
11
555
553
552
549


In [79]:
def get_frequencies(G, t):
    return sorted(G.degree(get_nodes(G, bipartite=t), weight='weight'), key=lambda x: x[1], reverse=True)

#get_frequencies(Gs['2011'], 'reason')

In [80]:

#get_frequencies(Gs['2011'], 'ct')

In [81]:
#print(get_nodes(Gs['2014'], bipartite='reason'))

In [14]:
export_graph(projected['2014'], '2014')

In [11]:
def export_graph(G, file):
    nx.write_weighted_edgelist(G, file, delimiter=";")
        
#export_graph(bipartite.weighted_projected_graph(G, reason_nodes), "test.txt")

In [13]:
def draw_bipartite(G):
    X, Y = bipartite.sets(G)
    pos = dict()
    pos.update( (n, (1, i)) for i, n in enumerate(X) ) # put nodes from X at x=1
    pos.update( (n, (2, i)) for i, n in enumerate(Y) ) 
    nx.draw(G, pos=pos)
    plt.show()


In [1]:
def setup_map(shapefile, name, scale):
    from mpl_toolkits.basemap import Basemap
    plt.figure(figsize=(12*scale,6*scale))
    m = Basemap(projection='merc', llcrnrlat=42.22, urcrnrlat=42.42,
                   llcrnrlon=-71.2, urcrnrlon=-70.98, resolution='h')
    m.readshapefile(shapefile, name, linewidth=0.7)
    m.drawcoastlines()
    m.drawstates()
    m.drawmapboundary(fill_color='aqua')
    m.fillcontinents(color='coral',lake_color='aqua')
    return m
    

#m = setup_map(SHAPEFILE, "CBGs", 2)
#try:
   # m.plot()
#except Exception as e:
    #pass

