In [2]:
import numpy as np
import pandas as pd

complaints = pd.read_csv('../data/cleaned/nyc-311-complaints.csv.gz')
nyc_population = pd.read_csv('../data/cleaned/population-by-zip-nyc-2010.csv')

## What are the 10 most common complaint types in NYC in 2017?

In [3]:
most_common = (
    complaints.groupby('complaint_type')
              .apply(lambda x: len(x))
              .nlargest(n=10)
              .to_frame(name='n_complaints')
              .reset_index()
)
most_common

Unnamed: 0,complaint_type,n_complaints
0,Noise - Residential,228938
1,HEAT/HOT WATER,212700
2,Illegal Parking,144522
3,Blocked Driveway,135007
4,Street Condition,89788
5,UNSANITARY CONDITION,79133
6,Noise - Street/Sidewalk,72423
7,Water System,63182
8,Noise,58791
9,PAINT/PLASTER,56991


## For each borough, how many of each of those 10 types were there in 2017?

In [4]:
by_borough = (
    complaints.groupby(['borough', 'complaint_type'])
              .size()
              .rename('n_complaints')
              .reset_index()
)

filt = by_borough['complaint_type'].isin(most_common['complaint_type'])
by_borough.loc[filt, :]

Unnamed: 0,borough,complaint_type,n_complaints
20,Bronx,Blocked Driveway,24592
72,Bronx,HEAT/HOT WATER,68713
86,Bronx,Illegal Parking,16136
104,Bronx,Noise,3056
109,Bronx,Noise - Residential,57687
110,Bronx,Noise - Street/Sidewalk,14028
119,Bronx,PAINT/PLASTER,19493
148,Bronx,Street Condition,11155
163,Bronx,UNSANITARY CONDITION,24557
175,Bronx,Water System,9973


## Considering all complaint types, calculate the complaints per person that gives an indication of which boroughs make the most complaints (i.e., the "biggest complainers").

In [5]:
import pandas as pd
nyc_population = pd.read_csv('../data/cleaned/population-by-zip-nyc-2010.csv')

population_by_borough = (
    nyc_population.groupby('borough', as_index=False)
                  .population
                  .sum()
)
population_by_borough

Unnamed: 0,borough,population
0,Bronx,1382480
1,Brooklyn,2504700
2,Manhattan,1565608
3,Queens,2231553
4,Staten Island,468730


In [14]:
complaints_per_borough = (
    complaints.groupby('borough')
              .size()
              .to_frame('n_complaints')
              .reset_index()
              .merge(population_by_borough, on='borough')
              .assign(complaints_per_person=lambda x: x.n_complaints / x.population)
)
complaints_per_borough.sort_values('complaints_per_person', ascending=False)

Unnamed: 0,borough,n_complaints,population,complaints_per_person
0,Bronx,433097,1382480,0.313275
1,Brooklyn,737733,2504700,0.294539
2,Manhattan,460139,1565608,0.293904
4,Staten Island,127860,468730,0.27278
3,Queens,569162,2231553,0.255052
