In [1]:
%matplotlib inline
import csv
import networkx as nx
from networkx.algorithms import bipartite
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
from operator import itemgetter

In [2]:
DATA_DIR = "./data/"
CASES_2010_2014_CSV = "CRM Cases 2010_2014 Unrestricted.csv"
CASES_2015_2019_CSV = "CRM Cases 2015_2019 Unrestricted.csv"
ECOMETRICS_VERTICAL_CSV = "CRM CBG Ecometrics Vertical.csv"
CASE_TYPES_CSV = "CRM Case Types.csv"
SHAPEFILE = "./Block+Groups+2010+BARI/Census Block Groups"

DEMOGRAPHICS = "./CT_info.csv"

In [30]:
weights = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
for filename in [CASES_2015_2019_CSV, CASES_2010_2014_CSV]:
    with open(DATA_DIR + filename, 'r') as f:
        c = csv.DictReader(f, delimiter='\t')
        count = 0
        for row in c:
            count += 1
            open_dt = row.get('OPEN_DT')
            close_dt = row.get('CLOSED_DT')
            reason = row.get('TYPE')
            
            #ignore all snow related requests
            if "snow" in reason.lower():
                continue
            ct = row.get('CT_ID_10')
            if "NA" in (reason, ct):
                continue
            weights[open_dt[:4]][ct][reason.lower()] += 1
            
ct_info = defaultdict(dict)
with open(DEMOGRAPHICS, 'r') as f:
    c = csv.DictReader(f, delimiter=',')
    for row in c:
        ct_info[row.get('GEOID10')]['TotalPop'] = row['POP100_']
        ct_info[row.get('GEOID10')]['MedHouseIncome'] = row['MdHsInc']
        ct_info[row.get('GEOID10')]['MedHomeVal'] = row['MedHmVl']

        

In [31]:
def get_nodes(G, **kwargs):
    return [n for n in G.nodes if (all(G.nodes[n][key] == kwargs[key] for key in kwargs))]
def get_frequencies(G, t):
    return sorted(G.degree(get_nodes(G, bipartite=t), weight='weight'), key=lambda x: x[1], reverse=True)
def most_frequent(frequencies, reasons, n):
    return list(filter(lambda x: x[0] in reasons, frequencies))[:n]

In [46]:
def build_graphs_from_weights(weights):
    l = defaultdict(lambda: nx.Graph())
    for year in weights:
        for ct in weights[year]:
            total = sum([weights[year][ct][r] for r in weights[year][ct]])
            if total == 0:
                continue
            l[year].add_node(ct, bipartite='ct')
            for reason in weights[year][ct]:
                l[year].add_node(reason, bipartite='reason')
                l[year].add_edge(reason, ct, weight=(weights[year][ct][reason]))
    return l
    
Gs = build_graphs_from_weights(weights)

def export_bipartite(G):
    ct_count = sum([1 for n in G.nodes if G.nodes[n]['bipartite'] == 'ct'])
    ordering = sorted(G.nodes)
    mat = nx.to_numpy_matrix(G, ordering, weight='weight')
    return mat[ct_count:len(ordering), 0:ct_count], ordering[:ct_count], ordering[ct_count:]

adjs = {year: export_bipartite(Gs[year]) for year in Gs}

for year, (adj, _, _) in adjs.items():
    np.savetxt(year + ".txt", adj, fmt="%f") # TODO change maybe - this always integer

In [19]:
with open("normalized_call_data.csv", "w") as f:
    c = csv.writer(f, delimiter=",")
    c.writerow(['YEAR', 'CT', 'REASON', 'VALUE'])
    for year in weights:
        for ct in weights[year]:
            total = sum([weights[year][ct][r] for r in weights[year][ct] if weights[year][ct][r] != 0])
            if total == 0:
                continue
            for reason in weights[year][ct]:
                c.writerow([year, ct, reason, float(weights[year][ct][reason]) / float(total)])
                
            

In [48]:
# After r done processing
for year, (_, cts, reasons) in adjs.items():
    if year != "2011":
        continue
    with open("./r_processing/{}_reasons.txt".format(year), "r") as f:
        groups = f.read().replace("\n", "").split(" ")
        coms = set(groups)
        for c in sorted(coms):
            l = []
            for group, reason in zip(groups, reasons):
                if group == c:
                    l.append(reason)
                    
            #print([x[0] for x in most_frequent(get_frequencies(Gs[year], "reason"), l, 15)])
            print(l)
            print("\n")
            
    with open("./r_processing/{}_cts.txt".format(year), "r") as f:
        groups = f.read().replace("\n", "").split(" ")
        coms = set(groups)
        for c in sorted(coms):
            l = []
            for group, ct in zip(groups, cts):
                if group == c:
                    l.append(ct_info.get(ct).get("MedHouseIncome"))
                    
            
            #Remove all 0 
            zeros = sum([1 for g in l if g == "0"])
            print(zeros)
            l = list(filter(lambda x: x, map(int, l)))
            print(l)
            if len(l) > 0:
                print(sum(l) / len(l))
            print("\n")
            

['carbon monoxide', 'cemetery maintenance request', 'egress', 'general traffic engineering request', 'mosquitoes (west nile)', 'municipal parking lot complaints', 'oil truck - short measure', 'park safety notifications', 'pothole repair (internal)', 'scale not visible', 'short measure - gas', 'test case a', 'water in gas - high priority', 'watermain break']


['contractor complaints', 'contractors complaint', 'exceeding terms of permit', 'illegal posting of signs', 'mechanical', 'miscellaneous event', 'missed trash/recycling/yard waste/bulk item', 'new sign  crosswalk or pavement marking', 'new sign, crosswalk or pavement marking', 'new tree requests', 'new tree warrantee inspection', 'park improvement requests', 'parks lighting issues', 'parks lighting/electrical issues', 'pigeon infestation', 'product short measure', 'protection of adjoining property', 'rat bite', 'recycling cart return', 'request for recycling cart', 'schedule a bulk item pickup', 'school facility issues', 'sidewalk

In [12]:

with open("output.csv", "w") as f:
    out = csv.DictWriter(f, ["CT", "Type", "Calls", "TotalPop", "MedHouseIncome", "MedHomeVal", "CallsPerPop", "Year"])
    out.writeheader()
    for year in weights:
        for ct in weights[year]:
            for reason in weights[year][ct]:
                count = weights[year][ct][reason]
                
                if ct not in ct_info:
                    print(ct)
                    continue
                
                info = ct_info[ct]
                if int(info['TotalPop']) == 0:
                    print("0 Pop")
                    print(ct)
                    continue
                    
                out.writerow({
                    "CT": ct,
                    "Type": reason,
                    "Calls": count,
                    "TotalPop": info["TotalPop"],
                    "MedHouseIncome": info["MedHouseIncome"],
                    "MedHomeVal": info["MedHomeVal"],
                    "CallsPerPop": float(count) / float(info["TotalPop"]),
                    "Year": year
                })
                
    
    

0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981700
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981600
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981201
0 Pop
25025981700
0 Pop
2502

In [None]:

#get_frequencies(Gs['2011'], 'reason')

In [None]:

#get_frequencies(Gs['2011'], 'ct')

In [None]:
#print(get_nodes(Gs['2014'], bipartite='reason'))

In [None]:
export_graph(projected['2014'], '2014')

In [None]:
def export_graph(G, file):
    nx.write_weighted_edgelist(G, file, delimiter=";")
        
#export_graph(bipartite.weighted_projected_graph(G, reason_nodes), "test.txt")

In [None]:
def draw_bipartite(G):
    X, Y = bipartite.sets(G)
    pos = dict()
    pos.update( (n, (1, i)) for i, n in enumerate(X) ) # put nodes from X at x=1
    pos.update( (n, (2, i)) for i, n in enumerate(Y) ) 
    nx.draw(G, pos=pos)
    plt.show()


In [34]:
def setup_map(shapefile, name, scale):
    from mpl_toolkits.basemap import Basemap
    plt.figure(figsize=(12*scale,6*scale))
    m = Basemap(projection='merc', llcrnrlat=42.22, urcrnrlat=42.42,
                   llcrnrlon=-71.2, urcrnrlon=-70.98, resolution='h')
    m.readshapefile(shapefile, name, linewidth=0.7)
    m.drawcoastlines()
    m.drawstates()
    m.drawmapboundary(fill_color='aqua')
    m.fillcontinents(color='coral',lake_color='aqua')
    return m
    

#m = setup_map(SHAPEFILE, "CBGs", 2)
#try:
#    m.plot()
#except Exception as e:
#    pass



In [33]:
for filename in ["./reformatted/2010_2014", "./reformatted/2015_2018"]:
    with open(filename + ".csv", 'r') as f:
        with open(filename + "_reformat.csv", 'w') as o:
            c = csv.reader(f, delimiter='\t')
            out = csv.writer(o, delimiter=',')
            
            count = 0
            for row in c:
                if count == 0:
                    out.writerow(row)
                    count += 1
                else:
                    r = list(row)
                    r[1] = r[1][:4]
                    
                    #Ignore locations with no location
                    if r[9] != "NA":
                        out.writerow(r)
                    else:
                        print("sldkjf")
                