In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set(font_scale=1.25)
np.random.seed(5)

pd.set_option("display.max_rows",10000)
pd.set_option("display.max_columns",100)

In [2]:
#Load first dataset
df1 = pd.read_csv("nycheatbronx.csv")

In [3]:
df1.head()

Unnamed: 0,unique_key,created_date,closed_date,complaint_type,location_type,zipcode,incident_address,street_name,address_type,city,status,resolution_description,borough,latitude,longitude
0,45538988,2020-02-03T05:53:31.000,2020-02-03T16:54:52.000,HEAT/HOT WATER,RESIDENTIAL BUILDING,10461.0,1185 NEILL AVENUE,NEILL AVENUE,ADDRESS,BRONX,Closed,The Department of Housing Preservation and Dev...,BRONX,40.855078,-73.852492
1,45540022,2020-02-03T18:01:48.000,,HEAT/HOT WATER,RESIDENTIAL BUILDING,10468.0,2523 UNIVERSITY AVENUE,UNIVERSITY AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.866135,-73.902726
2,45539035,2020-02-03T06:54:49.000,2020-02-03T17:06:54.000,HEAT/HOT WATER,RESIDENTIAL BUILDING,10463.0,3150 BAILEY AVENUE,BAILEY AVENUE,ADDRESS,BRONX,Closed,The Department of Housing Preservation and Dev...,BRONX,40.87879,-73.901771
3,45539982,2020-02-03T12:31:16.000,,HEAT/HOT WATER,RESIDENTIAL BUILDING,10455.0,520 TINTON AVENUE,TINTON AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.812037,-73.906544
4,45539989,2020-02-03T17:25:12.000,,HEAT/HOT WATER,RESIDENTIAL BUILDING,10460.0,2112 MAPES AVENUE,MAPES AVENUE,ADDRESS,BRONX,Open,The following complaint conditions are still o...,BRONX,40.846939,-73.885253


In [4]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17330 entries, 0 to 17329
Data columns (total 15 columns):
unique_key                17330 non-null int64
created_date              17330 non-null object
closed_date               17064 non-null object
complaint_type            17330 non-null object
location_type             17330 non-null object
zipcode                   17326 non-null float64
incident_address          17330 non-null object
street_name               17330 non-null object
address_type              17330 non-null object
city                      17326 non-null object
status                    17330 non-null object
resolution_description    17329 non-null object
borough                   17330 non-null object
latitude                  17326 non-null float64
longitude                 17326 non-null float64
dtypes: float64(3), int64(1), object(11)
memory usage: 2.0+ MB


In [5]:
df1.shape

(17330, 15)

In [6]:
df1['city'].value_counts()

BRONX    17326
Name: city, dtype: int64

In [7]:
df1['borough'].value_counts()

BRONX    17330
Name: borough, dtype: int64

### Drop unnecessary features first

In [8]:
df1.drop(columns=['unique_key','created_date','closed_date','complaint_type','location_type','incident_address',
                  'address_type','city','status','resolution_description','borough'],inplace=True, axis=1)

In [9]:
df1

Unnamed: 0,zipcode,street_name,latitude,longitude
0,10461.0,NEILL AVENUE,40.855078,-73.852492
1,10468.0,UNIVERSITY AVENUE,40.866135,-73.902726
2,10463.0,BAILEY AVENUE,40.878790,-73.901771
3,10455.0,TINTON AVENUE,40.812037,-73.906544
4,10460.0,MAPES AVENUE,40.846939,-73.885253
...,...,...,...,...
17325,10458.0,EAST 187 STREET,40.856385,-73.889152
17326,10459.0,REV JAMES POLITE AVENUE,40.818750,-73.898886
17327,10460.0,EAST 179 STREET,40.842428,-73.880584
17328,10462.0,PARKCHESTER ROAD,40.836695,-73.859511


In [10]:
df1['address'] = df1['street_name'].str.replace('\d+', '')

In [11]:
df1

Unnamed: 0,zipcode,street_name,latitude,longitude,address
0,10461.0,NEILL AVENUE,40.855078,-73.852492,NEILL AVENUE
1,10468.0,UNIVERSITY AVENUE,40.866135,-73.902726,UNIVERSITY AVENUE
2,10463.0,BAILEY AVENUE,40.878790,-73.901771,BAILEY AVENUE
3,10455.0,TINTON AVENUE,40.812037,-73.906544,TINTON AVENUE
4,10460.0,MAPES AVENUE,40.846939,-73.885253,MAPES AVENUE
...,...,...,...,...,...
17325,10458.0,EAST 187 STREET,40.856385,-73.889152,EAST STREET
17326,10459.0,REV JAMES POLITE AVENUE,40.818750,-73.898886,REV JAMES POLITE AVENUE
17327,10460.0,EAST 179 STREET,40.842428,-73.880584,EAST STREET
17328,10462.0,PARKCHESTER ROAD,40.836695,-73.859511,PARKCHESTER ROAD


In [12]:
df1.drop(columns='street_name',axis=1,inplace=True)

In [13]:
df1

Unnamed: 0,zipcode,latitude,longitude,address
0,10461.0,40.855078,-73.852492,NEILL AVENUE
1,10468.0,40.866135,-73.902726,UNIVERSITY AVENUE
2,10463.0,40.878790,-73.901771,BAILEY AVENUE
3,10455.0,40.812037,-73.906544,TINTON AVENUE
4,10460.0,40.846939,-73.885253,MAPES AVENUE
...,...,...,...,...
17325,10458.0,40.856385,-73.889152,EAST STREET
17326,10459.0,40.818750,-73.898886,REV JAMES POLITE AVENUE
17327,10460.0,40.842428,-73.880584,EAST STREET
17328,10462.0,40.836695,-73.859511,PARKCHESTER ROAD


In [14]:
#Save to csv
#df1.to_csv("left.csv",index=False)

### Load the second dataset and sample 17330 samples

In [15]:
df2 = pd.read_csv("bronx.csv")

In [16]:
df2.head()

Unnamed: 0,borough,lot,zipcode,address,lotarea,bldgarea,comarea,resarea,officearea,retailarea,numbldgs,numfloors,lotdepth,bldgdepth,yearbuilt,yearalter1,builtfar,residfar,commfar,facilfar,latitude,longitude
0,BX,47,10469.0,3310 BRONXWOOD AVENUE,2500.0,2288.0,0.0,2288.0,0.0,0.0,1.0,2.0,100.0,52.0,1960.0,0.0,0.92,1.25,0.0,2.0,40.873572,-73.860952
1,BX,72,10466.0,4031 ELY AVENUE,1629.0,1584.0,0.0,1152.0,0.0,0.0,1.0,2.0,90.5,32.0,1950.0,0.0,0.97,0.75,0.0,2.0,40.890608,-73.845925
2,BX,183,10466.0,964 EAST 229 STREET,1970.0,1485.0,0.0,1080.0,0.0,0.0,1.0,2.0,109.42,32.0,1955.0,0.0,0.75,0.75,0.0,2.0,40.887873,-73.851034
3,BX,33,10469.0,1006 EAST GUN HILL ROAD,2500.0,3933.0,1337.0,2596.0,0.0,1337.0,1.0,3.0,100.0,60.0,1938.0,0.0,1.57,3.0,0.0,3.0,40.874358,-73.857873
4,BX,14,10466.0,3879 AMUNDSON AVENUE,1800.0,1701.0,0.0,1188.0,0.0,0.0,1.0,2.0,100.0,33.0,1957.0,0.0,0.95,0.75,0.0,2.0,40.888725,-73.835676


In [17]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86157 entries, 0 to 86156
Data columns (total 22 columns):
borough       86157 non-null object
lot           86157 non-null int64
zipcode       85881 non-null float64
address       86112 non-null object
lotarea       86097 non-null float64
bldgarea      86109 non-null float64
comarea       79299 non-null float64
resarea       79299 non-null float64
officearea    79299 non-null float64
retailarea    79299 non-null float64
numbldgs      86097 non-null float64
numfloors     86097 non-null float64
lotdepth      86097 non-null float64
bldgdepth     86097 non-null float64
yearbuilt     86112 non-null float64
yearalter1    86112 non-null float64
builtfar      86061 non-null float64
residfar      86112 non-null float64
commfar       86112 non-null float64
facilfar      86112 non-null float64
latitude      86034 non-null float64
longitude     86034 non-null float64
dtypes: float64(19), int64(1), object(2)
memory usage: 14.5+ MB


In [18]:
df2['address2'] = df2['address'].str.replace('\d+', '')

In [19]:
df2

Unnamed: 0,borough,lot,zipcode,address,lotarea,bldgarea,comarea,resarea,officearea,retailarea,numbldgs,numfloors,lotdepth,bldgdepth,yearbuilt,yearalter1,builtfar,residfar,commfar,facilfar,latitude,longitude,address2
0,BX,47,10469.0,3310 BRONXWOOD AVENUE,2500.0,2288.0,0.0,2288.0,0.0,0.0,1.0,2.0,100.00,52.0,1960.0,0.0,0.92,1.25,0.0,2.0,40.873572,-73.860952,BRONXWOOD AVENUE
1,BX,72,10466.0,4031 ELY AVENUE,1629.0,1584.0,0.0,1152.0,0.0,0.0,1.0,2.0,90.50,32.0,1950.0,0.0,0.97,0.75,0.0,2.0,40.890608,-73.845925,ELY AVENUE
2,BX,183,10466.0,964 EAST 229 STREET,1970.0,1485.0,0.0,1080.0,0.0,0.0,1.0,2.0,109.42,32.0,1955.0,0.0,0.75,0.75,0.0,2.0,40.887873,-73.851034,EAST STREET
3,BX,33,10469.0,1006 EAST GUN HILL ROAD,2500.0,3933.0,1337.0,2596.0,0.0,1337.0,1.0,3.0,100.00,60.0,1938.0,0.0,1.57,3.00,0.0,3.0,40.874358,-73.857873,EAST GUN HILL ROAD
4,BX,14,10466.0,3879 AMUNDSON AVENUE,1800.0,1701.0,0.0,1188.0,0.0,0.0,1.0,2.0,100.00,33.0,1957.0,0.0,0.95,0.75,0.0,2.0,40.888725,-73.835676,AMUNDSON AVENUE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86152,BX,58,10462.0,2161 BARNES AVENUE,20008.0,74772.0,0.0,74772.0,0.0,0.0,1.0,6.0,100.00,76.0,1931.0,0.0,3.74,3.44,0.0,4.8,40.855241,-73.864135,BARNES AVENUE
86153,BX,20,10472.0,1272 NOBLE AVENUE,2500.0,3321.0,0.0,3321.0,0.0,0.0,1.0,2.0,100.00,81.0,1926.0,0.0,1.33,1.25,0.0,2.0,40.832030,-73.869990,NOBLE AVENUE
86154,BX,45,10460.0,1726 ADAMS STREET,4913.0,1932.0,1932.0,0.0,0.0,0.0,1.0,2.0,98.25,45.0,1942.0,0.0,0.39,1.25,0.0,2.0,40.841328,-73.871066,ADAMS STREET
86155,BX,17,10461.0,2020 TENBROECK AVENUE,6641.0,1621.0,0.0,1621.0,0.0,0.0,1.0,2.0,121.67,26.0,1940.0,0.0,0.24,0.50,0.0,1.0,40.855852,-73.852111,TENBROECK AVENUE


In [20]:
df2.drop(columns=['borough','address'],axis=1,inplace=True)

In [21]:
df2

Unnamed: 0,lot,zipcode,lotarea,bldgarea,comarea,resarea,officearea,retailarea,numbldgs,numfloors,lotdepth,bldgdepth,yearbuilt,yearalter1,builtfar,residfar,commfar,facilfar,latitude,longitude,address2
0,47,10469.0,2500.0,2288.0,0.0,2288.0,0.0,0.0,1.0,2.0,100.00,52.0,1960.0,0.0,0.92,1.25,0.0,2.0,40.873572,-73.860952,BRONXWOOD AVENUE
1,72,10466.0,1629.0,1584.0,0.0,1152.0,0.0,0.0,1.0,2.0,90.50,32.0,1950.0,0.0,0.97,0.75,0.0,2.0,40.890608,-73.845925,ELY AVENUE
2,183,10466.0,1970.0,1485.0,0.0,1080.0,0.0,0.0,1.0,2.0,109.42,32.0,1955.0,0.0,0.75,0.75,0.0,2.0,40.887873,-73.851034,EAST STREET
3,33,10469.0,2500.0,3933.0,1337.0,2596.0,0.0,1337.0,1.0,3.0,100.00,60.0,1938.0,0.0,1.57,3.00,0.0,3.0,40.874358,-73.857873,EAST GUN HILL ROAD
4,14,10466.0,1800.0,1701.0,0.0,1188.0,0.0,0.0,1.0,2.0,100.00,33.0,1957.0,0.0,0.95,0.75,0.0,2.0,40.888725,-73.835676,AMUNDSON AVENUE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86152,58,10462.0,20008.0,74772.0,0.0,74772.0,0.0,0.0,1.0,6.0,100.00,76.0,1931.0,0.0,3.74,3.44,0.0,4.8,40.855241,-73.864135,BARNES AVENUE
86153,20,10472.0,2500.0,3321.0,0.0,3321.0,0.0,0.0,1.0,2.0,100.00,81.0,1926.0,0.0,1.33,1.25,0.0,2.0,40.832030,-73.869990,NOBLE AVENUE
86154,45,10460.0,4913.0,1932.0,1932.0,0.0,0.0,0.0,1.0,2.0,98.25,45.0,1942.0,0.0,0.39,1.25,0.0,2.0,40.841328,-73.871066,ADAMS STREET
86155,17,10461.0,6641.0,1621.0,0.0,1621.0,0.0,0.0,1.0,2.0,121.67,26.0,1940.0,0.0,0.24,0.50,0.0,1.0,40.855852,-73.852111,TENBROECK AVENUE


In [22]:
df2 = df2.rename(columns={'address2':'address'})

In [23]:
df2

Unnamed: 0,lot,zipcode,lotarea,bldgarea,comarea,resarea,officearea,retailarea,numbldgs,numfloors,lotdepth,bldgdepth,yearbuilt,yearalter1,builtfar,residfar,commfar,facilfar,latitude,longitude,address
0,47,10469.0,2500.0,2288.0,0.0,2288.0,0.0,0.0,1.0,2.0,100.00,52.0,1960.0,0.0,0.92,1.25,0.0,2.0,40.873572,-73.860952,BRONXWOOD AVENUE
1,72,10466.0,1629.0,1584.0,0.0,1152.0,0.0,0.0,1.0,2.0,90.50,32.0,1950.0,0.0,0.97,0.75,0.0,2.0,40.890608,-73.845925,ELY AVENUE
2,183,10466.0,1970.0,1485.0,0.0,1080.0,0.0,0.0,1.0,2.0,109.42,32.0,1955.0,0.0,0.75,0.75,0.0,2.0,40.887873,-73.851034,EAST STREET
3,33,10469.0,2500.0,3933.0,1337.0,2596.0,0.0,1337.0,1.0,3.0,100.00,60.0,1938.0,0.0,1.57,3.00,0.0,3.0,40.874358,-73.857873,EAST GUN HILL ROAD
4,14,10466.0,1800.0,1701.0,0.0,1188.0,0.0,0.0,1.0,2.0,100.00,33.0,1957.0,0.0,0.95,0.75,0.0,2.0,40.888725,-73.835676,AMUNDSON AVENUE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86152,58,10462.0,20008.0,74772.0,0.0,74772.0,0.0,0.0,1.0,6.0,100.00,76.0,1931.0,0.0,3.74,3.44,0.0,4.8,40.855241,-73.864135,BARNES AVENUE
86153,20,10472.0,2500.0,3321.0,0.0,3321.0,0.0,0.0,1.0,2.0,100.00,81.0,1926.0,0.0,1.33,1.25,0.0,2.0,40.832030,-73.869990,NOBLE AVENUE
86154,45,10460.0,4913.0,1932.0,1932.0,0.0,0.0,0.0,1.0,2.0,98.25,45.0,1942.0,0.0,0.39,1.25,0.0,2.0,40.841328,-73.871066,ADAMS STREET
86155,17,10461.0,6641.0,1621.0,0.0,1621.0,0.0,0.0,1.0,2.0,121.67,26.0,1940.0,0.0,0.24,0.50,0.0,1.0,40.855852,-73.852111,TENBROECK AVENUE


In [24]:
df2 = df2.sample(n=17330)

In [25]:
df2

Unnamed: 0,lot,zipcode,lotarea,bldgarea,comarea,resarea,officearea,retailarea,numbldgs,numfloors,lotdepth,bldgdepth,yearbuilt,yearalter1,builtfar,residfar,commfar,facilfar,latitude,longitude,address
80661,39,10466.0,2000.0,2080.0,0.0,2080.0,0.0,0.0,1.0,2.0,100.00,52.00,2005.0,0.0,1.04,0.75,0.0,2.0,40.896076,-73.839879,BUSSING AVENUE
83279,149,10465.0,1823.0,1499.0,0.0,1499.0,0.0,0.0,1.0,2.0,50.00,36.00,1940.0,0.0,0.82,0.75,0.0,2.0,40.813435,-73.822838,SCHURZ AVENUE
66128,136,10453.0,1500.0,3423.0,0.0,3423.0,0.0,0.0,1.0,3.0,100.00,37.00,2007.0,0.0,2.28,3.44,0.0,4.8,40.848281,-73.913303,DAVIDSON AVENUE
83266,56,10469.0,1900.0,1911.0,0.0,1344.0,0.0,0.0,1.0,2.0,100.00,40.00,1965.0,1998.0,1.01,0.75,0.0,2.0,40.866312,-73.858935,PAULDING AVENUE
54651,57,10461.0,2500.0,1536.0,0.0,1536.0,0.0,0.0,1.0,2.5,100.00,36.00,1920.0,0.0,0.61,0.75,0.0,2.0,40.847051,-73.852947,HONE AVENUE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44763,76,10465.0,1800.0,1998.0,0.0,1998.0,0.0,0.0,1.0,3.0,100.00,37.00,1960.0,0.0,1.11,0.50,0.0,1.0,40.815113,-73.821479,HUNTINGTON AVENUE
27459,150,10459.0,2147.0,2280.0,0.0,2280.0,0.0,0.0,1.0,3.0,100.00,40.00,1991.0,0.0,1.06,3.44,0.0,6.5,40.828625,-73.895601,EAST STREET
85095,7,10461.0,1676.0,1676.0,1676.0,0.0,0.0,1676.0,1.0,1.0,60.75,60.00,1928.0,0.0,1.00,3.44,0.0,4.8,40.844162,-73.830079,MIDDLETOWN ROAD
14685,11,10455.0,3900.0,15414.0,0.0,15414.0,0.0,0.0,1.0,6.0,100.00,86.67,1906.0,1995.0,3.95,2.43,0.0,4.8,40.820495,-73.915185,EAST STREET


In [26]:
#df2.to_csv('right.csv',index=False)

### Merge the two datasets

In [27]:
df1 = pd.read_csv("left.csv")

In [28]:
df1.head()

Unnamed: 0,zipcode,latitude,longitude,address
0,10461.0,40.855078,-73.852492,NEILL AVENUE
1,10468.0,40.866135,-73.902726,UNIVERSITY AVENUE
2,10463.0,40.87879,-73.901771,BAILEY AVENUE
3,10455.0,40.812037,-73.906544,TINTON AVENUE
4,10460.0,40.846939,-73.885253,MAPES AVENUE


In [29]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17330 entries, 0 to 17329
Data columns (total 4 columns):
zipcode      17326 non-null float64
latitude     17326 non-null float64
longitude    17326 non-null float64
address      17330 non-null object
dtypes: float64(3), object(1)
memory usage: 541.7+ KB


In [30]:
df2 = pd.read_csv("right.csv")

In [31]:
df2.head()

Unnamed: 0,lot,zipcode,lotarea,bldgarea,comarea,resarea,officearea,retailarea,numbldgs,numfloors,lotdepth,bldgdepth,yearbuilt,yearalter1,builtfar,residfar,commfar,facilfar,latitude,longitude,address
0,39,10466.0,2000.0,2080.0,0.0,2080.0,0.0,0.0,1.0,2.0,100.0,52.0,2005.0,0.0,1.04,0.75,0.0,2.0,40.896076,-73.839879,BUSSING AVENUE
1,149,10465.0,1823.0,1499.0,0.0,1499.0,0.0,0.0,1.0,2.0,50.0,36.0,1940.0,0.0,0.82,0.75,0.0,2.0,40.813435,-73.822838,SCHURZ AVENUE
2,136,10453.0,1500.0,3423.0,0.0,3423.0,0.0,0.0,1.0,3.0,100.0,37.0,2007.0,0.0,2.28,3.44,0.0,4.8,40.848281,-73.913303,DAVIDSON AVENUE
3,56,10469.0,1900.0,1911.0,0.0,1344.0,0.0,0.0,1.0,2.0,100.0,40.0,1965.0,1998.0,1.01,0.75,0.0,2.0,40.866312,-73.858935,PAULDING AVENUE
4,57,10461.0,2500.0,1536.0,0.0,1536.0,0.0,0.0,1.0,2.5,100.0,36.0,1920.0,0.0,0.61,0.75,0.0,2.0,40.847051,-73.852947,HONE AVENUE


In [32]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17330 entries, 0 to 17329
Data columns (total 21 columns):
lot           17330 non-null int64
zipcode       17270 non-null float64
lotarea       17318 non-null float64
bldgarea      17319 non-null float64
comarea       15895 non-null float64
resarea       15895 non-null float64
officearea    15895 non-null float64
retailarea    15895 non-null float64
numbldgs      17318 non-null float64
numfloors     17318 non-null float64
lotdepth      17318 non-null float64
bldgdepth     17318 non-null float64
yearbuilt     17320 non-null float64
yearalter1    17320 non-null float64
builtfar      17313 non-null float64
residfar      17320 non-null float64
commfar       17320 non-null float64
facilfar      17320 non-null float64
latitude      17306 non-null float64
longitude     17306 non-null float64
address       17320 non-null object
dtypes: float64(19), int64(1), object(1)
memory usage: 2.8+ MB


In [33]:
df3 = df1.join(df2,lsuffix='_left',rsuffix='_right')

In [34]:
df3

Unnamed: 0,zipcode_left,latitude_left,longitude_left,address_left,lot,zipcode_right,lotarea,bldgarea,comarea,resarea,officearea,retailarea,numbldgs,numfloors,lotdepth,bldgdepth,yearbuilt,yearalter1,builtfar,residfar,commfar,facilfar,latitude_right,longitude_right,address_right
0,10461.0,40.855078,-73.852492,NEILL AVENUE,39,10466.0,2000.0,2080.0,0.0,2080.0,0.0,0.0,1.0,2.0,100.00,52.00,2005.0,0.0,1.04,0.75,0.0,2.0,40.896076,-73.839879,BUSSING AVENUE
1,10468.0,40.866135,-73.902726,UNIVERSITY AVENUE,149,10465.0,1823.0,1499.0,0.0,1499.0,0.0,0.0,1.0,2.0,50.00,36.00,1940.0,0.0,0.82,0.75,0.0,2.0,40.813435,-73.822838,SCHURZ AVENUE
2,10463.0,40.878790,-73.901771,BAILEY AVENUE,136,10453.0,1500.0,3423.0,0.0,3423.0,0.0,0.0,1.0,3.0,100.00,37.00,2007.0,0.0,2.28,3.44,0.0,4.8,40.848281,-73.913303,DAVIDSON AVENUE
3,10455.0,40.812037,-73.906544,TINTON AVENUE,56,10469.0,1900.0,1911.0,0.0,1344.0,0.0,0.0,1.0,2.0,100.00,40.00,1965.0,1998.0,1.01,0.75,0.0,2.0,40.866312,-73.858935,PAULDING AVENUE
4,10460.0,40.846939,-73.885253,MAPES AVENUE,57,10461.0,2500.0,1536.0,0.0,1536.0,0.0,0.0,1.0,2.5,100.00,36.00,1920.0,0.0,0.61,0.75,0.0,2.0,40.847051,-73.852947,HONE AVENUE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17325,10458.0,40.856385,-73.889152,EAST STREET,76,10465.0,1800.0,1998.0,0.0,1998.0,0.0,0.0,1.0,3.0,100.00,37.00,1960.0,0.0,1.11,0.50,0.0,1.0,40.815113,-73.821479,HUNTINGTON AVENUE
17326,10459.0,40.818750,-73.898886,REV JAMES POLITE AVENUE,150,10459.0,2147.0,2280.0,0.0,2280.0,0.0,0.0,1.0,3.0,100.00,40.00,1991.0,0.0,1.06,3.44,0.0,6.5,40.828625,-73.895601,EAST STREET
17327,10460.0,40.842428,-73.880584,EAST STREET,7,10461.0,1676.0,1676.0,1676.0,0.0,0.0,1676.0,1.0,1.0,60.75,60.00,1928.0,0.0,1.00,3.44,0.0,4.8,40.844162,-73.830079,MIDDLETOWN ROAD
17328,10462.0,40.836695,-73.859511,PARKCHESTER ROAD,11,10455.0,3900.0,15414.0,0.0,15414.0,0.0,0.0,1.0,6.0,100.00,86.67,1906.0,1995.0,3.95,2.43,0.0,4.8,40.820495,-73.915185,EAST STREET


In [37]:
#Save to csv
#df3.to_csv("combine.csv",index=False)

In [35]:
#df3 = pd.concat([df1,df2],axis=1)

In [36]:
#df3 = pd.merge(left=df1,right=df2,left_on='address',right_on='address',how='outer')