In [1]:
%matplotlib inline
import csv
import networkx as nx
from networkx.algorithms import bipartite
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
from operator import itemgetter

In [2]:
DATA_DIR = "./data/"
CASES_2010_2014_CSV = "CRM Cases 2010_2014 Unrestricted.csv"
CASES_2015_2019_CSV = "CRM Cases 2015_2019 Unrestricted.csv"
ECOMETRICS_VERTICAL_CSV = "CRM CBG Ecometrics Vertical.csv"
CASE_TYPES_CSV = "CRM Case Types.csv"
SHAPEFILE = "./Block+Groups+2010+BARI/Census Block Groups"

DEMOGRAPHICS = "./CT_info.csv"

In [3]:
weights = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
for filename in [CASES_2015_2019_CSV, CASES_2010_2014_CSV]:
    with open(DATA_DIR + filename, 'r') as f:
        c = csv.DictReader(f, delimiter='\t')
        count = 0
        for row in c:
            count += 1
            open_dt = row.get('OPEN_DT')
            close_dt = row.get('CLOSED_DT')
            reason = row.get('TYPE')
            
            #ignore all snow related requests
            if "snow" in reason.lower():
                continue
            ct = row.get('CT_ID_10')
            if "NA" in (reason, ct):
                continue
            weights[open_dt[:4]][ct][reason.lower()] += 1
            
ct_info = defaultdict(dict)
with open(DEMOGRAPHICS, 'r') as f:
    c = csv.DictReader(f, delimiter=',')
    for row in c:
        ct_info[row.get('GEOID10')]['TotalPop'] = row['POP100_']
        ct_info[row.get('GEOID10')]['MedHouseIncome'] = row['MdHsInc']
        ct_info[row.get('GEOID10')]['MedHomeVal'] = row['MedHmVl']

        

In [4]:
def get_nodes(G, **kwargs):
    return [n for n in G.nodes if (all(G.nodes[n][key] == kwargs[key] for key in kwargs))]
def get_frequencies(G, t):
    return sorted(G.degree(get_nodes(G, bipartite=t), weight='weight'), key=lambda x: x[1], reverse=True)
def most_frequent(frequencies, reasons, n):
    return list(filter(lambda x: x[0] in reasons, frequencies))[:n]

In [28]:
def build_graphs_from_weights(weights):
    l = defaultdict(lambda: nx.Graph())
    for year in weights:
        for ct in weights[year]:
            total = sum([weights[year][ct][r] for r in weights[year][ct]])
            if total == 0:
                continue
            l[year].add_node(ct, bipartite='ct')
            for reason in weights[year][ct]:
                l[year].add_node(reason, bipartite='reason')
                l[year].add_edge(reason, ct, weight=(weights[year][ct][reason]))
    return l
    
Gs = build_graphs_from_weights(weights)

def export_bipartite(G):
    ct_count = sum([1 for n in G.nodes if G.nodes[n]['bipartite'] == 'ct'])
    ordering = sorted(G.nodes)
    mat = nx.to_numpy_matrix(G, ordering, weight='weight')
    return mat[ct_count:len(ordering), 0:ct_count], ordering[:ct_count], ordering[ct_count:]

adjs = {year: export_bipartite(Gs[year]) for year in Gs}

for year, (adj, _, _) in adjs.items():
    np.savetxt("./graphs/" + year + ".txt", adj, fmt="%f") # TODO change maybe - this always integer

In [32]:
# After r done processing
def get_communities(year):
    year = str(year)
    with open("./r_processing/{}_reasons.txt".format(year), "r") as r, \
            open("./r_processing/{}_cts.txt".format(year), "r") as c:
        r_groups = r.read().replace("\n", "").split(" ")
        c_groups = c.read().replace("\n", "").split(" ")
        _, cts, reasons = export_bipartite(Gs[year])
        
        coms = defaultdict(lambda: ([], [], []))
        
        for group, reason in zip(r_groups, reasons):
            coms[group][0].append(reason)
            
        for group, ct in zip(c_groups, cts):
            coms[group][1].append(ct)
            coms[group][2].append(ct_info.get(ct).get("MedHouseIncome"))
    
    return sorted(coms.values(), key=lambda x: sum(map(int, x[2])) / len(x[2]))

def print_communities(year):
    coms = get_communities(year)
    for c in coms:
        print(c[0])
        print(list(zip(*c[1:])))
        ints = list(filter(lambda x: x != 0, map(int, c[2])))
        if ints != []:
            print(sum(ints) / len(ints))
        print("\n")

print_communities(2012)

['abandoned building', 'abandoned vehicles', 'abandoned vehicles - private tow', 'alternate transportation', 'bed bugs', 'billing complaint', 'breathe easy', 'bus emergency', 'bus stop issues', 'bwsc general request', 'carbon monoxide', 'chronic dampness/mold', 'cross metering - sub-metering', 'electrical', 'fire hydrant', 'general school request', 'general transportation issues', 'heat - excessive, insufficient', 'heat/fuel assistance', 'highway maintenance', 'illegal auto body shop', 'illegal occupancy', 'illegal rooming house', 'illegal use', 'knockdown replacement', 'late bus issues', 'lead', 'maintenance - homeowner', 'maintenance complaint - residential', 'mechanical', 'mice infestation - residential', 'new sign, crosswalk or pavement marking', 'no utilities residential - electricity', 'no utilities residential - gas', 'no utilities residential - water', 'park maintenance requests', 'parking on front/back yards (illegal parking)', 'pest infestation - residential', 'pick up dead a

In [None]:

#get_frequencies(Gs['2011'], 'reason')

In [None]:

#get_frequencies(Gs['2011'], 'ct')

In [None]:
#print(get_nodes(Gs['2014'], bipartite='reason'))

In [None]:
export_graph(projected['2014'], '2014')

In [None]:
def export_graph(G, file):
    nx.write_weighted_edgelist(G, file, delimiter=";")
        
#export_graph(bipartite.weighted_projected_graph(G, reason_nodes), "test.txt")

In [None]:

for year, G in Gs.items():
    with open("./rich_poor_proportions/{}_rich_poor_proportions.csv".format(year), "w") as f:
        c = csv.writer(f)
        c.writerow(["CT", "RICH_CALLS", "POOR_CALLS", "OTHER_CALLS", "CT_TYPE"])

        communities = get_communities(year)
        poor_types = set(communities[0][0])
        rich_types = set(communities[-1][0])
        poor_cts = set(communities[0][1])
        rich_cts = set(communities[-1][1])

        for ct in weights[year]:
            total = sum([weights[year][ct][r] for r in weights[year][ct]])
            if total == 0:
                continue
            rich = 0
            poor = 0
            other = 0

            for reason in weights[year][ct]:
                if reason in poor_types:
                    poor += weights[year][ct][reason]
                elif reason in rich_types:
                    rich += weights[year][ct][reason]
                else:
                    other += weights[year][ct][reason]

            rich = float(rich) / float(total)
            poor = float(poor) / float(total)
            other = float(other) / float(total)
            t = ""
            if ct in poor_cts:
                t = "POOR"
            elif ct in rich_cts:
                t = "RICH"
            else:
                t = "OTHER"
            
            c.writerow([ct, rich, poor, other, t])


In [30]:
with open("normalized_call_data.csv", "w") as f:
    c = csv.writer(f, delimiter=",")
    c.writerow(['YEAR', 'CT', 'REASON', 'VALUE'])
    for year in weights:
        for ct in weights[year]:
            total = sum([weights[year][ct][r] for r in weights[year][ct] if weights[year][ct][r] != 0])
            if total == 0:
                continue
            for reason in weights[year][ct]:
                c.writerow([year, ct, reason, float(weights[year][ct][reason]) / float(total)])
                

In [33]:
for filename in ["./reformatted/2010_2014", "./reformatted/2015_2018"]:
    with open(filename + ".csv", 'r') as f:
        with open(filename + "_reformat.csv", 'w') as o:
            c = csv.reader(f, delimiter='\t')
            out = csv.writer(o, delimiter=',')
            
            count = 0
            for row in c:
                if count == 0:
                    out.writerow(row)
                    count += 1
                else:
                    r = list(row)
                    r[1] = r[1][:4]
                    
                    #Ignore locations with no location
                    if r[9] != "NA":
                        out.writerow(r)
                    else:
                        print("sldkjf")
                

In [None]:
with open("output.csv", "w") as f:
    out = csv.DictWriter(f, ["CT", "Type", "Calls", "TotalPop", "MedHouseIncome", "MedHomeVal", "CallsPerPop", "Year"])
    out.writeheader()
    for year in weights:
        for ct in weights[year]:
            for reason in weights[year][ct]:
                count = weights[year][ct][reason]
                
                if ct not in ct_info:
                    continue
                
                info = ct_info[ct]
                if int(info['TotalPop']) == 0:
                    continue
                    
                out.writerow({
                    "CT": ct,
                    "Type": reason,
                    "Calls": count,
                    "TotalPop": info["TotalPop"],
                    "MedHouseIncome": info["MedHouseIncome"],
                    "MedHomeVal": info["MedHomeVal"],
                    "CallsPerPop": float(count) / float(info["TotalPop"]),
                    "Year": year
                })
                
    
    