In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import pyarrow as pa
import joblib
import numpy as np
import pathlib

In [3]:
root_dir = pathlib.Path("../data")
df = pd.read_parquet(root_dir/"combined"/"3")
# df.columns

In [4]:
text_feats = df[['complaint_type','descriptor', 'resolution_description']]
text_feats

Unnamed: 0,complaint_type,descriptor,resolution_description
0,Traffic,Congestion/Gridlock,The Police Department responded to the complai...
1,Derelict Vehicle,With License Plate,This complaint does not fall under the Police ...
2,Derelict Vehicle,With License Plate,The Police Department responded to the complai...
3,Derelict Vehicle,With License Plate,The Police Department responded to the complai...
4,Derelict Vehicle,With License Plate,Your request can not be processed at this time...
...,...,...,...
749174,Noise - Street/Sidewalk,Loud Talking,The Police Department responded to the complai...
749175,Noise - Commercial,Banging/Pounding,The Police Department responded to the complai...
749176,Noise - Commercial,Loud Music/Party,The Police Department responded to the complai...
749177,Blocked Driveway,No Access,The Police Department responded and upon arriv...


In [20]:
# resolution was completed by police or determined to be unnecessary by police
resolved_by_police = {
    'The Police Department responded to the complaint and took action to fix the condition.',
    'The Police Department responded to the complaint and determined that police action was not necessary.',
    'The Police Department issued a summons in response to the complaint.',
    'The Police Department responded to the complaint and a report was prepared.',
    'The Police Department made an arrest in response to the complaint.',
}

# resolved before police arrived
resolved_before_police = {
    'The Police Department responded to the complaint and with the information available observed no evidence of the violation at that time.',
    'The Police Department responded and upon arrival those responsible for the condition were gone.',
}
# police were unable to repond to complaint either because of legal constraints on their work or becuase of user error
failed_to_respond = {
    'The Police Department responded to the complaint but officers were unable to gain entry into the premises.',
    "This complaint does not fall under the Police Department's jurisdiction.",
    'Your request can not be processed at this time because of insufficient contact information. Please create a new Service Request on NYC.gov and provide more detailed contact information.',
}
# censored entries
resolution_unknown = {
    'The Police Department reviewed your complaint and provided additional information below.',
    'Your complaint has been forwarded to the New York Police Department for a non-emergency response. Your complaint will take priority over other non-emergency complaints. 311 will have additional information in 8 hours. Please note your service request number for future reference.',
    'Your complaint has been received by the Police Department and additional information will be available later.',
    "Your complaint has been forwarded to the New York Police Department for a non-emergency response. If the police determine the vehicle is illegally parked, they will ticket the vehicle and then you may either contact a private towing company to remove the vehicle or ask your local precinct to contact 'rotation tow'. Any fees charged for towing will have to be paid by the vehicle owner. 311 will have additional information in 8 hours. Please note your service request number for future reference."
}

In [25]:
df['descriptor'].value_counts()

descriptor
Loud Music/Party                  228643
No Access                          98916
Banging/Pounding                   67788
Blocked Hydrant                    42909
Posted Parking Sign Violation      42233
With License Plate                 38954
Partial Access                     37877
Loud Talking                       37854
Blocked Sidewalk                   24222
Commercial Overnight Parking       21548
Car/Truck Music                    17230
Car/Truck Horn                     10944
Double Parked Blocking Traffic     10285
Other (complaint details)           8543
Engine Idling                       7628
Double Parked Blocking Vehicle      6763
N/A                                 6216
Blocked Bike Lane                   5686
Loud Television                     4494
Neglected                           4145
Parking Permit Improper Use         3142
Congestion/Gridlock                 3013
Unauthorized Bus Layover            2755
In Prohibited Area                  2658
Overn

In [24]:
categorized = resolved_by_police.union(resolved_before_police).union(failed_to_respond)
list(df[~df['resolution_description'].isin(categorized)]['resolution_description'].value_counts().items())
# resolved_by_police

[('The Police Department reviewed your complaint and provided additional information below.',
  40508),
 ('Your complaint has been forwarded to the New York Police Department for a non-emergency response. Your complaint will take priority over other non-emergency complaints. 311 will have additional information in 8 hours. Please note your service request number for future reference.',
  56),
 ('Your complaint has been received by the Police Department and additional information will be available later.',
  14),
 ("Your complaint has been forwarded to the New York Police Department for a non-emergency response. If the police determine the vehicle is illegally parked, they will ticket the vehicle and then you may either contact a private towing company to remove the vehicle or ask your local precinct to contact 'rotation tow'. Any fees charged for towing will have to be paid by the vehicle owner. 311 will have additional information in 8 hours. Please note your service request number fo

In [13]:
df[df['resolution_description'] == 'The Police Department responded to the complaint but officers were unable to gain entry into the premises.']['complaint_type'].value_counts()

complaint_type
Noise - Residential            12515
Animal Abuse                     462
Noise - Street/Sidewalk          370
Noise - Commercial               301
Illegal Parking                  156
Blocked Driveway                 136
Drug Activity                     74
Non-Emergency Police Matter       72
Derelict Vehicle                  51
Noise - Vehicle                   47
Homeless Encampment               40
Drinking                          11
Noise - Park                       8
Vending                            4
Traffic                            4
Graffiti                           4
Noise - House of Worship           3
Urinating in Public                2
Name: count, dtype: Int64

In [11]:
list(df[['resolution_description']].value_counts().items())

[(('The Police Department responded to the complaint and with the information available observed no evidence of the violation at that time.',),
  267500),
 (('The Police Department responded to the complaint and took action to fix the condition.',),
  159121),
 (('The Police Department responded and upon arrival those responsible for the condition were gone.',),
  100197),
 (('The Police Department responded to the complaint and determined that police action was not necessary.',),
  84184),
 (('The Police Department issued a summons in response to the complaint.',),
  54593),
 (('The Police Department reviewed your complaint and provided additional information below.',),
  40508),
 (('Your request can not be processed at this time because of insufficient contact information. Please create a new Service Request on NYC.gov and provide more detailed contact information.',),
  19071),
 (('The Police Department responded to the complaint but officers were unable to gain entry into the premi

In [10]:
list(df[['resolution_description', 'complaint_type']].value_counts().items())

[(('The Police Department responded to the complaint and with the information available observed no evidence of the violation at that time.',
   'Noise - Residential'),
  102773),
 (('The Police Department responded to the complaint and took action to fix the condition.',
   'Noise - Residential'),
  59545),
 (('The Police Department responded to the complaint and with the information available observed no evidence of the violation at that time.',
   'Illegal Parking'),
  42669),
 (('The Police Department responded and upon arrival those responsible for the condition were gone.',
   'Blocked Driveway'),
  41017),
 (('The Police Department responded and upon arrival those responsible for the condition were gone.',
   'Illegal Parking'),
  35709),
 (('The Police Department responded to the complaint and with the information available observed no evidence of the violation at that time.',
   'Noise - Street/Sidewalk'),
  35510),
 (('The Police Department issued a summons in response to the

In [6]:
df['descriptor'].value_counts()

descriptor
Loud Music/Party                  228643
No Access                          98916
Banging/Pounding                   67788
Blocked Hydrant                    42909
Posted Parking Sign Violation      42233
With License Plate                 38954
Partial Access                     37877
Loud Talking                       37854
Blocked Sidewalk                   24222
Commercial Overnight Parking       21548
Car/Truck Music                    17230
Car/Truck Horn                     10944
Double Parked Blocking Traffic     10285
Other (complaint details)           8543
Engine Idling                       7628
Double Parked Blocking Vehicle      6763
N/A                                 6216
Blocked Bike Lane                   5686
Loud Television                     4494
Neglected                           4145
Parking Permit Improper Use         3142
Congestion/Gridlock                 3013
Unauthorized Bus Layover            2755
In Prohibited Area                  2658
Overn

In [None]:
df['resolution_description'].str.split(" ").explode()

In [3]:
compl_text = df['descriptor'].str.split(" ").explode()


In [4]:
compl_text

0           Tortured
1                 No
1             Access
2              Labor
2          Violation
             ...    
1886367     Flooding
1886367         (SJ)
1886368      Pothole
1886368            -
1886368      Highway
Name: descriptor, Length: 5049022, dtype: object

In [8]:
nypdf = joblib.load("../data/nypdf_precinct4.pkl")
nypd_compl_text = nypdf['descriptor'].str.split(" ")
nypd_compl_text

0                                  [Tortured]
1                                [No, Access]
15           [Commercial, Overnight, Parking]
16         [Posted, Parking, Sign, Violation]
20                         [Banging/Pounding]
                          ...                
1886355         [Other, (complaint, details)]
1886363                    [Blocked, Hydrant]
1886364                   [Loud, Music/Party]
1886365                    [Banging/Pounding]
1886366                   [Loud, Music/Party]
Name: descriptor, Length: 573681, dtype: object

In [None]:
most_freq_words = df['resolution_description'].str.split(" ").explode().value_counts().sort_values()
most_freq_words

In [None]:
patterns = [
    'responded',
    'completed',
    'insufficient',
    'jurisdiction',
    'review',
    'attempt',
    'investigated',
    ' not ',
    ' no '
]
for pat in patterns:
    df[pat] = df['resolution_description'].str.contains(pat)
    srs = df[df['resolution_description'].str.contains(pat)].groupby('agency')['created_date'].count()
    srs = srs.rename(pat)
    correlation= df[[pat,'hours_to_complete',"complaint_type"]].groupby(["complaint_type"]).corr().dropna()
    correlation = correlation[correlation["hours_to_complete"]<1]['hours_to_complete'].iloc[1::2].sort_values()
    print(correlation)
    display(srs)