In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set(font_scale=1.25)
np.random.seed(5)

pd.set_option("display.max_rows",10000)
pd.set_option("display.max_columns",100)

In [2]:
df = pd.read_csv("nyc1.csv")

In [3]:
df.head()

Unnamed: 0,unique_key,created_date,closed_date,complaint_type,location_type,incident_zip,incident_address,street_name,address_type,city,status,resolution_description,borough,latitude,longitude
0,45539970,2020-02-03T11:34:00.000,,HEAT/HOT WATER,RESIDENTIAL BUILDING,11694.0,180 BEACH 117 STREET,BEACH 117 STREET,ADDRESS,Rockaway Park,Open,The following complaint conditions are still o...,QUEENS,40.578958,-73.837773
1,45540031,2020-02-03T15:42:43.000,,PLUMBING,RESIDENTIAL BUILDING,10035.0,1900 LEXINGTON AVENUE,LEXINGTON AVENUE,ADDRESS,NEW YORK,Open,The following complaint conditions are still o...,MANHATTAN,40.800055,-73.940585
2,45539181,2020-02-03T12:53:54.000,,ELECTRIC,RESIDENTIAL BUILDING,10466.0,4040 CARPENTER AVENUE,CARPENTER AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.890607,-73.862283
3,45538988,2020-02-03T05:53:31.000,2020-02-03T16:54:52.000,HEAT/HOT WATER,RESIDENTIAL BUILDING,10461.0,1185 NEILL AVENUE,NEILL AVENUE,ADDRESS,BRONX,Closed,The Department of Housing Preservation and Dev...,BRONX,40.855078,-73.852492
4,45540022,2020-02-03T18:01:48.000,,HEAT/HOT WATER,RESIDENTIAL BUILDING,10468.0,2523 UNIVERSITY AVENUE,UNIVERSITY AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.866135,-73.902726


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 15 columns):
unique_key                100000 non-null int64
created_date              100000 non-null object
closed_date               91976 non-null object
complaint_type            100000 non-null object
location_type             100000 non-null object
incident_zip              99957 non-null float64
incident_address          100000 non-null object
street_name               100000 non-null object
address_type              100000 non-null object
city                      99963 non-null object
status                    100000 non-null object
resolution_description    99956 non-null object
borough                   100000 non-null object
latitude                  99957 non-null float64
longitude                 99957 non-null float64
dtypes: float64(3), int64(1), object(11)
memory usage: 11.4+ MB


In [5]:
df.shape

(100000, 15)

In [6]:
df['complaint_type'].value_counts()

HEAT/HOT WATER          49540
UNSANITARY CONDITION    13477
PLUMBING                 6778
WATER LEAK               5849
PAINT/PLASTER            5735
DOOR/WINDOW              4965
GENERAL                  3621
ELECTRIC                 3240
APPLIANCE                2863
FLOORING/STAIRS          2231
SAFETY                   1339
ELEVATOR                  262
OUTSIDE BUILDING          100
Name: complaint_type, dtype: int64

In [7]:
df['borough'].value_counts()

BRONX            33772
BROOKLYN         30312
MANHATTAN        21180
QUEENS           13217
STATEN ISLAND     1518
Unspecified          1
Name: borough, dtype: int64

### There are 2 approaches using Classification ML Model

One is use Heat/Hot Water as '1' and the rest as '0'<br>
Two is using Multiclassification Model to predict which complaint type

### Use Bronx as main target area

In [8]:
df2 = df[df['borough'] == 'BRONX']

In [9]:
df2.head()

Unnamed: 0,unique_key,created_date,closed_date,complaint_type,location_type,incident_zip,incident_address,street_name,address_type,city,status,resolution_description,borough,latitude,longitude
2,45539181,2020-02-03T12:53:54.000,,ELECTRIC,RESIDENTIAL BUILDING,10466.0,4040 CARPENTER AVENUE,CARPENTER AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.890607,-73.862283
3,45538988,2020-02-03T05:53:31.000,2020-02-03T16:54:52.000,HEAT/HOT WATER,RESIDENTIAL BUILDING,10461.0,1185 NEILL AVENUE,NEILL AVENUE,ADDRESS,BRONX,Closed,The Department of Housing Preservation and Dev...,BRONX,40.855078,-73.852492
4,45540022,2020-02-03T18:01:48.000,,HEAT/HOT WATER,RESIDENTIAL BUILDING,10468.0,2523 UNIVERSITY AVENUE,UNIVERSITY AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.866135,-73.902726
7,45539035,2020-02-03T06:54:49.000,2020-02-03T17:06:54.000,HEAT/HOT WATER,RESIDENTIAL BUILDING,10463.0,3150 BAILEY AVENUE,BAILEY AVENUE,ADDRESS,BRONX,Closed,The Department of Housing Preservation and Dev...,BRONX,40.87879,-73.901771
9,45540010,2020-02-03T08:28:50.000,,UNSANITARY CONDITION,RESIDENTIAL BUILDING,10467.0,679 WARING AVENUE,WARING AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.861366,-73.868229


In [10]:
df2.shape

(33772, 15)

In [11]:
df2['complaint_type'].value_counts()

HEAT/HOT WATER          17330
UNSANITARY CONDITION     4290
PLUMBING                 2234
PAINT/PLASTER            2031
WATER LEAK               1960
DOOR/WINDOW              1582
GENERAL                  1077
ELECTRIC                 1036
APPLIANCE                1018
FLOORING/STAIRS           750
SAFETY                    365
ELEVATOR                   75
OUTSIDE BUILDING           24
Name: complaint_type, dtype: int64

In [12]:
df2.isnull().sum()

unique_key                   0
created_date                 0
closed_date               3669
complaint_type               0
location_type                0
incident_zip                12
incident_address             0
street_name                  0
address_type                 0
city                        12
status                       0
resolution_description       1
borough                      0
latitude                    12
longitude                   12
dtype: int64

### Using Approach 1

In [13]:
df3 = df2.replace({'HEAT/HOT WATER':1,'UNSANITARY CONDITION':0,'PLUMBING':0,'PAINT/PLASTER':0,
             'WATER LEAK':0,'DOOR/WINDOW':0,'GENERAL':0,'ELECTRIC':0,'APPLIANCE':0,
             'FLOORING/STAIRS':0,'SAFETY':0,'ELEVATOR':0,'OUTSIDE BUILDING':0,})

In [14]:
df3['complaint_type'].value_counts()

1    17330
0    16442
Name: complaint_type, dtype: int64

In [15]:
df3

Unnamed: 0,unique_key,created_date,closed_date,complaint_type,location_type,incident_zip,incident_address,street_name,address_type,city,status,resolution_description,borough,latitude,longitude
2,45539181,2020-02-03T12:53:54.000,,0,RESIDENTIAL BUILDING,10466.0,4040 CARPENTER AVENUE,CARPENTER AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.890607,-73.862283
3,45538988,2020-02-03T05:53:31.000,2020-02-03T16:54:52.000,1,RESIDENTIAL BUILDING,10461.0,1185 NEILL AVENUE,NEILL AVENUE,ADDRESS,BRONX,Closed,The Department of Housing Preservation and Dev...,BRONX,40.855078,-73.852492
4,45540022,2020-02-03T18:01:48.000,,1,RESIDENTIAL BUILDING,10468.0,2523 UNIVERSITY AVENUE,UNIVERSITY AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.866135,-73.902726
7,45539035,2020-02-03T06:54:49.000,2020-02-03T17:06:54.000,1,RESIDENTIAL BUILDING,10463.0,3150 BAILEY AVENUE,BAILEY AVENUE,ADDRESS,BRONX,Closed,The Department of Housing Preservation and Dev...,BRONX,40.878790,-73.901771
9,45540010,2020-02-03T08:28:50.000,,0,RESIDENTIAL BUILDING,10467.0,679 WARING AVENUE,WARING AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.861366,-73.868229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99988,45138841,2019-12-11T11:14:16.000,2019-12-13T10:36:44.000,1,RESIDENTIAL BUILDING,10458.0,550 EAST 187 STREET,EAST 187 STREET,ADDRESS,BRONX,Closed,The complaint you filed is a duplicate of a co...,BRONX,40.856385,-73.889152
99991,45138803,2019-12-11T15:07:14.000,2019-12-13T16:53:55.000,1,RESIDENTIAL BUILDING,10459.0,830 REV JAMES POLITE AVENUE,REV JAMES POLITE AVENUE,ADDRESS,BRONX,Closed,The Department of Housing Preservation and Dev...,BRONX,40.818750,-73.898886
99995,45138068,2019-12-11T12:31:58.000,2019-12-13T10:36:44.000,1,RESIDENTIAL BUILDING,10460.0,989 EAST 179 STREET,EAST 179 STREET,ADDRESS,BRONX,Closed,The Department of Housing Preservation and Dev...,BRONX,40.842428,-73.880584
99996,45138035,2019-12-11T11:25:45.000,2019-12-13T17:14:22.000,1,RESIDENTIAL BUILDING,10462.0,1470 PARKCHESTER ROAD,PARKCHESTER ROAD,ADDRESS,BRONX,Closed,The Department of Housing Preservation and Dev...,BRONX,40.836695,-73.859511


In [16]:
#Save to csv
#df3.to_csv("nycbronx1.csv",index=False)

### Using Approach 2

In [17]:
df = pd.read_csv("nyc1.csv")

In [18]:
df2 = df[df['borough'] == 'BRONX']

In [19]:
df2.head()

Unnamed: 0,unique_key,created_date,closed_date,complaint_type,location_type,incident_zip,incident_address,street_name,address_type,city,status,resolution_description,borough,latitude,longitude
2,45539181,2020-02-03T12:53:54.000,,ELECTRIC,RESIDENTIAL BUILDING,10466.0,4040 CARPENTER AVENUE,CARPENTER AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.890607,-73.862283
3,45538988,2020-02-03T05:53:31.000,2020-02-03T16:54:52.000,HEAT/HOT WATER,RESIDENTIAL BUILDING,10461.0,1185 NEILL AVENUE,NEILL AVENUE,ADDRESS,BRONX,Closed,The Department of Housing Preservation and Dev...,BRONX,40.855078,-73.852492
4,45540022,2020-02-03T18:01:48.000,,HEAT/HOT WATER,RESIDENTIAL BUILDING,10468.0,2523 UNIVERSITY AVENUE,UNIVERSITY AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.866135,-73.902726
7,45539035,2020-02-03T06:54:49.000,2020-02-03T17:06:54.000,HEAT/HOT WATER,RESIDENTIAL BUILDING,10463.0,3150 BAILEY AVENUE,BAILEY AVENUE,ADDRESS,BRONX,Closed,The Department of Housing Preservation and Dev...,BRONX,40.87879,-73.901771
9,45540010,2020-02-03T08:28:50.000,,UNSANITARY CONDITION,RESIDENTIAL BUILDING,10467.0,679 WARING AVENUE,WARING AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.861366,-73.868229


In [20]:
df2.shape

(33772, 15)

In [21]:
df2.duplicated().sum()

0

In [22]:
#Save to csv
#df2.to_csv("nycbronx2.csv",index=False)