In [31]:
import csv
import mapreduce as mr
import dateutil
from dateutil import parser
import operator

#1. Weekday

In [10]:
#Calculating sum of complaints per zipcode per weekday

def mapper1(row):
    weekday = (row['Incident Zip'], dateutil.parser.parse(row['Created Date']).weekday())
    yield (weekday, 1)

def reducer1(k2v2):
    weekday, count = k2v2
    return (weekday, sum(count)) 

with open('311_noise.csv', 'r') as fi:
    reader = csv.DictReader(fi)
    output1 = list(mr.run(reader, mapper1, reducer1))

output1[6:11]

[(('', 6), 981),
 (('00083', 0), 15),
 (('00083', 1), 12),
 (('00083', 2), 12),
 (('00083', 3), 16)]

In [95]:
#Getting unique zipcodes

zipcodes = []

for i in output1:
    zipcodes.append(i[0][0])
    
zipcodes = list(set(zipcodes))

zipcodes[:10]

['',
 '10065',
 '10069',
 '10453',
 '10452',
 '10451',
 '10457',
 '10456',
 '10455',
 '10454']

In [12]:
#Creating dictionaries for each zipcode

weekday = {}

for i in zipcodes:
    weekday[i] = []
    
for i in output1:
    weekday[i[0][0]].append(((i[0][1]), i[1]))
    
weekday['11216']

[(0, 999), (1, 1064), (2, 1039), (3, 979), (4, 1221), (5, 1898), (6, 1936)]

#2. Hour

In [13]:
#Calculating sum of complaints per zipcode per hour

def mapper1(row):
    hour = (row['Incident Zip'], dateutil.parser.parse(row['Created Date']).hour)
    yield (hour, 1)

def reducer1(k2v2):
    hour, count = k2v2
    return (hour, sum(count)) 

with open('311_noise.csv', 'r') as fi:
    reader = csv.DictReader(fi)
    output2 = list(mr.run(reader, mapper1, reducer1))

output2[15:20]

[(('', 15), 212),
 (('', 16), 216),
 (('', 17), 195),
 (('', 18), 217),
 (('', 19), 231)]

In [14]:
#Creating dictionaries for each zipcode

hour = {}

for i in zipcodes:
    hour[i] = []
    
for i in output2:
    hour[i[0][0]].append(((i[0][1]), i[1]))
    
hour['11216']

[(0, 890),
 (1, 828),
 (2, 544),
 (3, 273),
 (4, 182),
 (5, 114),
 (6, 113),
 (7, 154),
 (8, 209),
 (9, 237),
 (10, 206),
 (11, 205),
 (12, 200),
 (13, 239),
 (14, 185),
 (15, 217),
 (16, 257),
 (17, 235),
 (18, 345),
 (19, 364),
 (20, 519),
 (21, 622),
 (22, 832),
 (23, 1166)]

#3. Descriptor

In [18]:
#Calculating sum of complaints per zipcode per descriptor

def mapper3(row):
    descriptor = (row['Incident Zip'], row['Descriptor'])
    yield (descriptor, 1)

def reducer3(k2v2):
    descriptor, count = k2v2
    return (descriptor, sum(count)) 

with open('311_noise.csv', 'r') as fi:
    reader = csv.DictReader(fi)
    output3 = list(mr.run(reader, mapper3, reducer3))

output3[7:12]

[(('', 'Loud Talking'), 295),
 (('', 'Loud Television'), 1),
 (('', 'NYPD'), 1),
 (('', 'News Gathering'), 2),
 (('', 'Noise, Barking Dog (NR5)'), 517)]

In [60]:
#Creating dictionaries for each zipcode

descriptor = {}

for i in zipcodes:
    descriptor[i] = []
    
for i in output3:
    descriptor[i[0][0]].append(((i[0][1]), i[1]))

#Extracting top 5 complaints per zipcode 

for i in zipcodes:
    descriptor[i].sort(key=operator.itemgetter(1), reverse=True)
    descriptor[i] = descriptor[i][:5]
    
descriptor['10005']

[('Noise: Construction Before/After Hours (NM1)', 1013),
 ('Loud Music/Party', 584),
 ('Noise: Jack Hammering (NC2)', 256),
 ('Loud Talking', 229),
 ('Noise: Construction Equipment (NC1)', 153)]

#4. Total complaints (+ bottom 10/top 10)

In [62]:
#Calculating sum of complaints per zipcode per hour

def mapper4(row):
    zipcode = row['Incident Zip']
    yield (zipcode, 1)

def reducer4(k2v2):
    zipcode, count = k2v2
    return (zipcode, sum(count)) 

with open('311_noise.csv', 'r') as fi:
    reader = csv.DictReader(fi)
    output4 = list(mr.run(reader, mapper4, reducer4))

output4[:5]

[('', 7411), ('00083', 292), ('10000', 267), ('10001', 8196), ('10002', 14796)]

In [65]:
#Ordering zipcodes by number of noise complaints

output4.sort(key=operator.itemgetter(1))

#Zipcodes with the least complaints

output4[:10]

[('10123', 1),
 ('10803', 1),
 ('11371', 1),
 ('11241', 2),
 ('10103', 3),
 ('10111', 3),
 ('10107', 4),
 ('10112', 4),
 ('10119', 4),
 ('11242', 4)]

In [66]:
#Zipcodes with the most complaints

output4[len(output4)-10:len(output4)]

[('10040', 12178),
 ('10011', 12279),
 ('10019', 13094),
 ('11238', 13181),
 ('10025', 13197),
 ('10032', 13598),
 ('10009', 14341),
 ('10002', 14796),
 ('10003', 15329),
 ('11211', 17897)]

#5. Normalized complaints (+ bottom 10/top 10)

In [76]:
def mapper5(row):
    yield (row['Zipcode'], int(row['Total']))

def reducer5(k2v2):
    population, count = k2v2
    return (population, sum(count))
    
with open('Census_2010.csv', 'r') as fi:
    reader = csv.DictReader(fi)
    output5 = list(mr.run(reader, mapper5, reducer5))

output5[:10]

[('10001', 21102),
 ('10002', 81410),
 ('10003', 56024),
 ('10004', 3089),
 ('10005', 7135),
 ('10006', 3011),
 ('10007', 6988),
 ('10009', 61347),
 ('10010', 31834),
 ('10011', 50984)]

In [79]:
#Creating dictionaries for total complaints and population

complaints = {}
population = {}

for i in output4:
    complaints[i[0]] = i[1] 
    
for i in output5:
    population[i[0]] = i[1] 
    
print complaints['11216']
print population['11216']

9136
54316


In [114]:
#Normalizing number of complaints per population

normalized = {}
zipcodes.remove('10103')

for i in zipcodes:
    if i in complaints.keys() and i in population.keys() and population[i]>0:
        normalized[i] = complaints[i]/float(population[i])
    else:
        pass
    
normalized['10002']

0.18174671416287924

In [118]:
#Creating list of tuples

normalized_list = []

for i in normalized:
    normalized_list.append((i, normalized[i]))
    
normalized_list.sort(key=operator.itemgetter(1))

#Bottom 10 zipcodes (normalized)

normalized_list[:10]

[('11697', 0.006128953174797744),
 ('11005', 0.008305647840531562),
 ('10475', 0.00955266179668222),
 ('11239', 0.01060255357276189),
 ('10044', 0.01646514021095961),
 ('11691', 0.019072207878737403),
 ('11429', 0.019956184027086238),
 ('11040', 0.020229885057471263),
 ('11427', 0.020896028483024624),
 ('11360', 0.02097013344630375)]

In [120]:
#Top 10 zipcodes (normalized)

normalized_list[len(normalized_list)-10:len(normalized_list)]

[('10014', 0.3307988360086361),
 ('10013', 0.3572924187725632),
 ('10005', 0.38009810791871057),
 ('10001', 0.38839920386693205),
 ('10036', 0.3889361013313909),
 ('10007', 0.42658843732112195),
 ('10012', 0.42743877127438773),
 ('10004', 0.46487536419553255),
 ('10006', 0.5423447359681169),
 ('10018', 0.6550009562057755)]