### Data Dictionary

The goal of this exercise is to do Model Development and Validation to find the answer to the Question 4 of the problem statement:

Can a predictive model be built for future prediction of the possibility of complaints of the specific type that you identified in response to Question 1?

Using the best model, you need to predict the number of future complaints (of the Complaint Type that you decided to focus on in Question 1).

### Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn

import xgboost as xgb
from xgboost import XGBClassifier, XGBRegressor
from xgboost import to_graphviz, plot_importance

%matplotlib inline
sns.set_style('dark')
sns.set(font_scale=1.5)

from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report, mean_absolute_error, mean_squared_error,r2_score
from sklearn.metrics import plot_confusion_matrix, plot_precision_recall_curve, plot_roc_curve, accuracy_score
from sklearn.metrics import auc, f1_score, precision_score, recall_score, roc_auc_score

import feature_engine.missing_data_imputers as mdi
from feature_engine.outlier_removers import Winsorizer

import warnings
warnings.filterwarnings('ignore')

import pickle
from pickle import dump, load

pd.options.display.max_columns= None
#pd.options.display.max_rows = None

### Data Exploration

In [2]:
df = pd.read_csv("heat.csv")

In [3]:
df

Unnamed: 0,ComplaintType,Zipcode,Street,Borough
0,HEAT/HOT WATER,10019.0,WEST 52 STREET,MANHATTAN
1,HEAT/HOT WATER,11372.0,37 AVENUE,QUEENS
2,HEAT/HOT WATER,10458.0,SOUTHERN BOULEVARD,BRONX
3,HEAT/HOT WATER,10456.0,MORRIS AVENUE,BRONX
4,HEAT/HOT WATER,11372.0,81 STREET,QUEENS
...,...,...,...,...
1847750,HEAT/HOT WATER,10029.0,EAST 108 STREET,MANHATTAN
1847751,HEAT/HOT WATER,10029.0,EAST 108 STREET,MANHATTAN
1847752,HEAT/HOT WATER,10461.0,BRUCKNER BOULEVARD,BRONX
1847753,HEAT/HOT WATER,10034.0,SHERMAN AVENUE,MANHATTAN


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1847755 entries, 0 to 1847754
Data columns (total 4 columns):
 #   Column         Dtype  
---  ------         -----  
 0   ComplaintType  object 
 1   Zipcode        float64
 2   Street         object 
 3   Borough        object 
dtypes: float64(1), object(3)
memory usage: 56.4+ MB


In [5]:
df.shape

(1847755, 4)

In [6]:
df.columns

Index(['ComplaintType', 'Zipcode', 'Street', 'Borough'], dtype='object')

### Data Preprocessing

### Treat Missing Values

In [7]:
df.isnull().sum()

ComplaintType    0
Zipcode          0
Street           0
Borough          0
dtype: int64

### Segment Heat/Hot Water Cases for Bronx

In [8]:
df['Borough'].value_counts()

BRONX            600147
BROOKLYN         569310
MANHATTAN        418432
QUEENS           241660
STATEN ISLAND     18206
Name: Borough, dtype: int64

In [9]:
bronx = df[df['Borough'] == 'BRONX']

In [10]:
bronx

Unnamed: 0,ComplaintType,Zipcode,Street,Borough
2,HEAT/HOT WATER,10458.0,SOUTHERN BOULEVARD,BRONX
3,HEAT/HOT WATER,10456.0,MORRIS AVENUE,BRONX
6,HEAT/HOT WATER,10459.0,ALDUS STREET,BRONX
12,HEAT/HOT WATER,10473.0,BOYNTON AVENUE,BRONX
18,HEAT/HOT WATER,10463.0,KINGSBRIDGE TERRACE,BRONX
...,...,...,...,...
1847747,HEAT/HOT WATER,10452.0,WOODYCREST AVENUE,BRONX
1847748,HEAT/HOT WATER,10470.0,EAST 242 STREET,BRONX
1847749,HEAT/HOT WATER,10458.0,BAINBRIDGE AVENUE,BRONX
1847752,HEAT/HOT WATER,10461.0,BRUCKNER BOULEVARD,BRONX


In [11]:
# Save to CSV
#df.to_csv("bronxheat.csv",index=False)

In [12]:
df2 = pd.read_csv("partfive.csv")

In [13]:
df2

Unnamed: 0,Address,BldgArea,BldgDepth,BuiltFAR,CommFAR,FacilFAR,Lot,LotArea,LotDepth,NumBldgs,NumFloors,OfficeArea,ResArea,ResidFAR,RetailArea,YearBuilt,YearAlter1,ZipCode,Age,Period
0,122 BRUCKNER BOULEVARD,2340,46.0,0.90,5.0,6.5,1,15000,200.0,1,2.0,2628,2000,6.02,2458,1935,2000,10454,65,85
1,126 BRUCKNER BOULEVARD,752,16.0,0.05,5.0,6.5,4,13770,100.0,2,1.0,272,2000,6.02,2458,1931,1994,10454,63,89
2,138 BRUCKNER BOULEVARD,39375,200.0,1.13,5.0,6.5,10,35000,200.0,1,2.0,2628,2000,6.02,2458,1931,2000,10454,69,89
3,144 BRUCKNER BOULEVARD,12500,85.0,5.00,5.0,6.5,17,2500,100.0,1,5.0,2628,12500,6.02,2458,1931,2001,10454,70,89
4,148 BRUCKNER BOULEVARD,8595,70.0,4.58,5.0,6.5,18,1875,75.0,1,5.0,2628,6876,6.02,1719,1920,2009,10454,89,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89849,GOLD STREET,2340,46.0,0.90,2.0,2.0,100,2513,100.0,1,2.0,2628,2000,1.25,2458,1935,2000,10465,65,85
89850,GOLD STREET,2340,46.0,0.90,3.4,6.5,150,2513,100.0,1,2.0,2628,2000,6.02,2458,1935,2000,10465,65,85
89851,GOLD STREET,2340,46.0,0.90,2.0,2.0,200,2513,100.0,1,2.0,2628,2000,1.25,2458,1935,2000,10465,65,85
89852,GOLD STREET,2340,46.0,0.90,2.0,2.0,8900,2513,100.0,1,2.0,2628,2000,1.25,2458,1935,2000,10465,65,85


In [14]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 89854 entries, 0 to 89853
Data columns (total 20 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Address     89854 non-null  object 
 1   BldgArea    89854 non-null  int64  
 2   BldgDepth   89854 non-null  float64
 3   BuiltFAR    89854 non-null  float64
 4   CommFAR     89854 non-null  float64
 5   FacilFAR    89854 non-null  float64
 6   Lot         89854 non-null  int64  
 7   LotArea     89854 non-null  int64  
 8   LotDepth    89854 non-null  float64
 9   NumBldgs    89854 non-null  int64  
 10  NumFloors   89854 non-null  float64
 11  OfficeArea  89854 non-null  int64  
 12  ResArea     89854 non-null  int64  
 13  ResidFAR    89854 non-null  float64
 14  RetailArea  89854 non-null  int64  
 15  YearBuilt   89854 non-null  int64  
 16  YearAlter1  89854 non-null  int64  
 17  ZipCode     89854 non-null  int64  
 18  Age         89854 non-null  int64  
 19  Period      89854 non-nul

In [15]:
bs = bronx.sample(n=89854)

In [16]:
bs

Unnamed: 0,ComplaintType,Zipcode,Street,Borough
418099,HEAT/HOT WATER,10458.0,MARION AVENUE,BRONX
1505604,HEAT/HOT WATER,10452.0,WALTON AVENUE,BRONX
1532844,HEAT/HOT WATER,10462.0,NEWBOLD AVENUE,BRONX
558777,HEAT/HOT WATER,10457.0,EAST 176 STREET,BRONX
622866,HEAT/HOT WATER,10467.0,HOLLAND AVENUE,BRONX
...,...,...,...,...
540782,HEAT/HOT WATER,10458.0,BEAUMONT AVENUE,BRONX
588226,HEAT/HOT WATER,10463.0,BAILEY AVENUE,BRONX
1783443,HEAT/HOT WATER,10452.0,JESUP AVENUE,BRONX
618762,HEAT/HOT WATER,10451.0,EAST 162 STREET,BRONX


In [17]:
bs.reset_index(drop=True,inplace=True)

In [18]:
df3 = pd.concat([bs,df2],axis=1)

In [19]:
df3

Unnamed: 0,ComplaintType,Zipcode,Street,Borough,Address,BldgArea,BldgDepth,BuiltFAR,CommFAR,FacilFAR,Lot,LotArea,LotDepth,NumBldgs,NumFloors,OfficeArea,ResArea,ResidFAR,RetailArea,YearBuilt,YearAlter1,ZipCode,Age,Period
0,HEAT/HOT WATER,10458.0,MARION AVENUE,BRONX,122 BRUCKNER BOULEVARD,2340,46.0,0.90,5.0,6.5,1,15000,200.0,1,2.0,2628,2000,6.02,2458,1935,2000,10454,65,85
1,HEAT/HOT WATER,10452.0,WALTON AVENUE,BRONX,126 BRUCKNER BOULEVARD,752,16.0,0.05,5.0,6.5,4,13770,100.0,2,1.0,272,2000,6.02,2458,1931,1994,10454,63,89
2,HEAT/HOT WATER,10462.0,NEWBOLD AVENUE,BRONX,138 BRUCKNER BOULEVARD,39375,200.0,1.13,5.0,6.5,10,35000,200.0,1,2.0,2628,2000,6.02,2458,1931,2000,10454,69,89
3,HEAT/HOT WATER,10457.0,EAST 176 STREET,BRONX,144 BRUCKNER BOULEVARD,12500,85.0,5.00,5.0,6.5,17,2500,100.0,1,5.0,2628,12500,6.02,2458,1931,2001,10454,70,89
4,HEAT/HOT WATER,10467.0,HOLLAND AVENUE,BRONX,148 BRUCKNER BOULEVARD,8595,70.0,4.58,5.0,6.5,18,1875,75.0,1,5.0,2628,6876,6.02,1719,1920,2009,10454,89,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89849,HEAT/HOT WATER,10458.0,BEAUMONT AVENUE,BRONX,GOLD STREET,2340,46.0,0.90,2.0,2.0,100,2513,100.0,1,2.0,2628,2000,1.25,2458,1935,2000,10465,65,85
89850,HEAT/HOT WATER,10463.0,BAILEY AVENUE,BRONX,GOLD STREET,2340,46.0,0.90,3.4,6.5,150,2513,100.0,1,2.0,2628,2000,6.02,2458,1935,2000,10465,65,85
89851,HEAT/HOT WATER,10452.0,JESUP AVENUE,BRONX,GOLD STREET,2340,46.0,0.90,2.0,2.0,200,2513,100.0,1,2.0,2628,2000,1.25,2458,1935,2000,10465,65,85
89852,HEAT/HOT WATER,10451.0,EAST 162 STREET,BRONX,GOLD STREET,2340,46.0,0.90,2.0,2.0,8900,2513,100.0,1,2.0,2628,2000,1.25,2458,1935,2000,10465,65,85


In [20]:
df3['Zipcode'].value_counts()

10467.0    8490
10458.0    8479
10468.0    7608
10453.0    7357
10452.0    6546
10457.0    6164
10456.0    5829
10462.0    5243
10472.0    4540
10463.0    4487
10460.0    4153
10451.0    3516
10466.0    3251
10459.0    2911
10455.0    2301
10461.0    2120
10454.0    1456
10473.0    1329
10469.0    1289
10470.0    1006
10474.0     714
10471.0     561
10465.0     325
10475.0     120
10464.0      59
Name: Zipcode, dtype: int64

In [21]:
df3['ZipCode'].value_counts()

10469    11349
10466     9229
10465     8492
10461     7499
10473     4870
10467     4673
10462     4671
10472     4329
10457     3028
10460     2892
10456     2876
10458     2769
10459     2605
10471     2352
10470     2334
10463     2247
10453     2102
10455     1793
10454     1746
10468     1627
10464     1550
10452     1548
10451     1228
10474     1220
10475      823
11370        2
Name: ZipCode, dtype: int64

In [22]:
df3['Street'].value_counts()

GRAND CONCOURSE             5017
MORRIS AVENUE               2121
BOYNTON AVENUE              1612
BAILEY AVENUE               1416
DR M L KING JR BOULEVARD    1271
                            ... 
SACKETT AVENUE                 1
SAXON AVENUE                   1
WEST  176 STREET               1
SEMINOLE STREET                1
WATERS PLACE                   1
Name: Street, Length: 1061, dtype: int64

In [23]:
df3['Address'].value_counts()

SHORE DRIVE             43
PALISADE AVENUE         37
PARK AVENUE             35
WHITE PLAINS ROAD       35
BRUCKNER BOULEVARD      34
                        ..
3469 BRUNER AVENUE       1
137 ALEXANDER AVENUE     1
1291 CLAY AVENUE         1
2659 HARDING AVENUE      1
775 EAST 236 STREET      1
Name: Address, Length: 87035, dtype: int64

In [24]:
df3.columns

Index(['ComplaintType', 'Zipcode', 'Street', 'Borough', 'Address', 'BldgArea',
       'BldgDepth', 'BuiltFAR', 'CommFAR', 'FacilFAR', 'Lot', 'LotArea',
       'LotDepth', 'NumBldgs', 'NumFloors', 'OfficeArea', 'ResArea',
       'ResidFAR', 'RetailArea', 'YearBuilt', 'YearAlter1', 'ZipCode', 'Age',
       'Period'],
      dtype='object')

In [25]:
df3.drop(['ComplaintType', 'Zipcode','Borough'],axis=1,inplace=True)

In [26]:
df3

Unnamed: 0,Street,Address,BldgArea,BldgDepth,BuiltFAR,CommFAR,FacilFAR,Lot,LotArea,LotDepth,NumBldgs,NumFloors,OfficeArea,ResArea,ResidFAR,RetailArea,YearBuilt,YearAlter1,ZipCode,Age,Period
0,MARION AVENUE,122 BRUCKNER BOULEVARD,2340,46.0,0.90,5.0,6.5,1,15000,200.0,1,2.0,2628,2000,6.02,2458,1935,2000,10454,65,85
1,WALTON AVENUE,126 BRUCKNER BOULEVARD,752,16.0,0.05,5.0,6.5,4,13770,100.0,2,1.0,272,2000,6.02,2458,1931,1994,10454,63,89
2,NEWBOLD AVENUE,138 BRUCKNER BOULEVARD,39375,200.0,1.13,5.0,6.5,10,35000,200.0,1,2.0,2628,2000,6.02,2458,1931,2000,10454,69,89
3,EAST 176 STREET,144 BRUCKNER BOULEVARD,12500,85.0,5.00,5.0,6.5,17,2500,100.0,1,5.0,2628,12500,6.02,2458,1931,2001,10454,70,89
4,HOLLAND AVENUE,148 BRUCKNER BOULEVARD,8595,70.0,4.58,5.0,6.5,18,1875,75.0,1,5.0,2628,6876,6.02,1719,1920,2009,10454,89,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89849,BEAUMONT AVENUE,GOLD STREET,2340,46.0,0.90,2.0,2.0,100,2513,100.0,1,2.0,2628,2000,1.25,2458,1935,2000,10465,65,85
89850,BAILEY AVENUE,GOLD STREET,2340,46.0,0.90,3.4,6.5,150,2513,100.0,1,2.0,2628,2000,6.02,2458,1935,2000,10465,65,85
89851,JESUP AVENUE,GOLD STREET,2340,46.0,0.90,2.0,2.0,200,2513,100.0,1,2.0,2628,2000,1.25,2458,1935,2000,10465,65,85
89852,EAST 162 STREET,GOLD STREET,2340,46.0,0.90,2.0,2.0,8900,2513,100.0,1,2.0,2628,2000,1.25,2458,1935,2000,10465,65,85


In [27]:
# Save to CSV
# df3.to_csv("partsix.csv",index=False)

In [28]:
streets = df3.groupby(['Street']).count()

In [29]:
streets

Unnamed: 0_level_0,Address,BldgArea,BldgDepth,BuiltFAR,CommFAR,FacilFAR,Lot,LotArea,LotDepth,NumBldgs,NumFloors,OfficeArea,ResArea,ResidFAR,RetailArea,YearBuilt,YearAlter1,ZipCode,Age,Period
Street,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
229 DRIVE NORTH,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
229 DRIVE SOUTH,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
3 AVENUE,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318
ADAMS PLACE,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36
ADAMS STREET,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WYATT STREET,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33
WYTHE PLACE,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56
YATES AVENUE,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25
YOUNG AVENUE,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12


In [30]:
zipcode = df3.groupby(['ZipCode']).count()

In [31]:
zipcode

Unnamed: 0_level_0,Street,Address,BldgArea,BldgDepth,BuiltFAR,CommFAR,FacilFAR,Lot,LotArea,LotDepth,NumBldgs,NumFloors,OfficeArea,ResArea,ResidFAR,RetailArea,YearBuilt,YearAlter1,Age,Period
ZipCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
10451,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228
10452,1548,1548,1548,1548,1548,1548,1548,1548,1548,1548,1548,1548,1548,1548,1548,1548,1548,1548,1548,1548
10453,2102,2102,2102,2102,2102,2102,2102,2102,2102,2102,2102,2102,2102,2102,2102,2102,2102,2102,2102,2102
10454,1746,1746,1746,1746,1746,1746,1746,1746,1746,1746,1746,1746,1746,1746,1746,1746,1746,1746,1746,1746
10455,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793
10456,2876,2876,2876,2876,2876,2876,2876,2876,2876,2876,2876,2876,2876,2876,2876,2876,2876,2876,2876,2876
10457,3028,3028,3028,3028,3028,3028,3028,3028,3028,3028,3028,3028,3028,3028,3028,3028,3028,3028,3028,3028
10458,2769,2769,2769,2769,2769,2769,2769,2769,2769,2769,2769,2769,2769,2769,2769,2769,2769,2769,2769,2769
10459,2605,2605,2605,2605,2605,2605,2605,2605,2605,2605,2605,2605,2605,2605,2605,2605,2605,2605,2605,2605
10460,2892,2892,2892,2892,2892,2892,2892,2892,2892,2892,2892,2892,2892,2892,2892,2892,2892,2892,2892,2892


In [32]:
len(zipcode)

26

In [33]:
address = df3.groupby(['Address']).count()

In [34]:
address

Unnamed: 0_level_0,Street,BldgArea,BldgDepth,BuiltFAR,CommFAR,FacilFAR,Lot,LotArea,LotDepth,NumBldgs,NumFloors,OfficeArea,ResArea,ResidFAR,RetailArea,YearBuilt,YearAlter1,ZipCode,Age,Period
Address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1 ALDEN PARK,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
1 ANGELAS PLACE,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1 BANES COURT,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1 BEDFORD PARK BLVD,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1 BLACKSTONE PLACE,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YATES AVENUE,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
YOUNG AVENUE,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
YZNAGA PLACE,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
ZEREGA AVENUE,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7


In [35]:
address['Street']

Address
1 ALDEN PARK           2
1 ANGELAS PLACE        1
1 BANES COURT          1
1 BEDFORD PARK BLVD    1
1 BLACKSTONE PLACE     1
                      ..
YATES AVENUE           6
YOUNG AVENUE           3
YZNAGA PLACE           5
ZEREGA AVENUE          7
ZULETTE AVENUE         1
Name: Street, Length: 87035, dtype: int64

In [36]:
cases = pd.DataFrame(data=address['Street'])

In [37]:
cases

Unnamed: 0_level_0,Street
Address,Unnamed: 1_level_1
1 ALDEN PARK,2
1 ANGELAS PLACE,1
1 BANES COURT,1
1 BEDFORD PARK BLVD,1
1 BLACKSTONE PLACE,1
...,...
YATES AVENUE,6
YOUNG AVENUE,3
YZNAGA PLACE,5
ZEREGA AVENUE,7


In [38]:
cases.reset_index(drop=True,inplace=True)

In [39]:
cases

Unnamed: 0,Street
0,2
1,1
2,1
3,1
4,1
...,...
87030,6
87031,3
87032,5
87033,7


In [40]:
cases.columns = ['Cases']

In [41]:
cases

Unnamed: 0,Cases
0,2
1,1
2,1
3,1
4,1
...,...
87030,6
87031,3
87032,5
87033,7


In [42]:
df3

Unnamed: 0,Street,Address,BldgArea,BldgDepth,BuiltFAR,CommFAR,FacilFAR,Lot,LotArea,LotDepth,NumBldgs,NumFloors,OfficeArea,ResArea,ResidFAR,RetailArea,YearBuilt,YearAlter1,ZipCode,Age,Period
0,MARION AVENUE,122 BRUCKNER BOULEVARD,2340,46.0,0.90,5.0,6.5,1,15000,200.0,1,2.0,2628,2000,6.02,2458,1935,2000,10454,65,85
1,WALTON AVENUE,126 BRUCKNER BOULEVARD,752,16.0,0.05,5.0,6.5,4,13770,100.0,2,1.0,272,2000,6.02,2458,1931,1994,10454,63,89
2,NEWBOLD AVENUE,138 BRUCKNER BOULEVARD,39375,200.0,1.13,5.0,6.5,10,35000,200.0,1,2.0,2628,2000,6.02,2458,1931,2000,10454,69,89
3,EAST 176 STREET,144 BRUCKNER BOULEVARD,12500,85.0,5.00,5.0,6.5,17,2500,100.0,1,5.0,2628,12500,6.02,2458,1931,2001,10454,70,89
4,HOLLAND AVENUE,148 BRUCKNER BOULEVARD,8595,70.0,4.58,5.0,6.5,18,1875,75.0,1,5.0,2628,6876,6.02,1719,1920,2009,10454,89,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89849,BEAUMONT AVENUE,GOLD STREET,2340,46.0,0.90,2.0,2.0,100,2513,100.0,1,2.0,2628,2000,1.25,2458,1935,2000,10465,65,85
89850,BAILEY AVENUE,GOLD STREET,2340,46.0,0.90,3.4,6.5,150,2513,100.0,1,2.0,2628,2000,6.02,2458,1935,2000,10465,65,85
89851,JESUP AVENUE,GOLD STREET,2340,46.0,0.90,2.0,2.0,200,2513,100.0,1,2.0,2628,2000,1.25,2458,1935,2000,10465,65,85
89852,EAST 162 STREET,GOLD STREET,2340,46.0,0.90,2.0,2.0,8900,2513,100.0,1,2.0,2628,2000,1.25,2458,1935,2000,10465,65,85


In [43]:
df4 = df3.iloc[0:87035,:]

In [44]:
df4

Unnamed: 0,Street,Address,BldgArea,BldgDepth,BuiltFAR,CommFAR,FacilFAR,Lot,LotArea,LotDepth,NumBldgs,NumFloors,OfficeArea,ResArea,ResidFAR,RetailArea,YearBuilt,YearAlter1,ZipCode,Age,Period
0,MARION AVENUE,122 BRUCKNER BOULEVARD,2340,46.0,0.90,5.0,6.5,1,15000,200.0,1,2.0,2628,2000,6.02,2458,1935,2000,10454,65,85
1,WALTON AVENUE,126 BRUCKNER BOULEVARD,752,16.0,0.05,5.0,6.5,4,13770,100.0,2,1.0,272,2000,6.02,2458,1931,1994,10454,63,89
2,NEWBOLD AVENUE,138 BRUCKNER BOULEVARD,39375,200.0,1.13,5.0,6.5,10,35000,200.0,1,2.0,2628,2000,6.02,2458,1931,2000,10454,69,89
3,EAST 176 STREET,144 BRUCKNER BOULEVARD,12500,85.0,5.00,5.0,6.5,17,2500,100.0,1,5.0,2628,12500,6.02,2458,1931,2001,10454,70,89
4,HOLLAND AVENUE,148 BRUCKNER BOULEVARD,8595,70.0,4.58,5.0,6.5,18,1875,75.0,1,5.0,2628,6876,6.02,1719,1920,2009,10454,89,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87030,FRANKLIN AVENUE,3605 KINGSBRIDGE AVENUE,65250,87.0,5.22,2.0,2.0,775,12500,100.0,1,6.0,2628,59750,0.90,5500,1929,1990,10463,61,91
87031,EAST 188 STREET,3610 CORLEAR AVENUE,1620,36.0,0.90,2.0,2.0,880,1800,100.0,1,2.0,2628,1296,0.90,2458,1940,2000,10463,60,80
87032,EAST 213 STREET,3616 CORLEAR AVENUE,1616,44.0,0.65,2.0,2.0,881,2500,100.0,1,1.0,2628,944,0.90,2458,1951,2000,10463,49,69
87033,SEDGWICK AVENUE,3633 KINGSBRIDGE AVENUE,1880,45.0,0.25,2.0,2.0,882,7500,100.0,2,1.0,2628,2000,0.90,2458,1950,2000,10463,50,70


In [45]:
df5 = pd.concat([df4,cases],axis=1)

In [46]:
df5

Unnamed: 0,Street,Address,BldgArea,BldgDepth,BuiltFAR,CommFAR,FacilFAR,Lot,LotArea,LotDepth,NumBldgs,NumFloors,OfficeArea,ResArea,ResidFAR,RetailArea,YearBuilt,YearAlter1,ZipCode,Age,Period,Cases
0,MARION AVENUE,122 BRUCKNER BOULEVARD,2340,46.0,0.90,5.0,6.5,1,15000,200.0,1,2.0,2628,2000,6.02,2458,1935,2000,10454,65,85,2
1,WALTON AVENUE,126 BRUCKNER BOULEVARD,752,16.0,0.05,5.0,6.5,4,13770,100.0,2,1.0,272,2000,6.02,2458,1931,1994,10454,63,89,1
2,NEWBOLD AVENUE,138 BRUCKNER BOULEVARD,39375,200.0,1.13,5.0,6.5,10,35000,200.0,1,2.0,2628,2000,6.02,2458,1931,2000,10454,69,89,1
3,EAST 176 STREET,144 BRUCKNER BOULEVARD,12500,85.0,5.00,5.0,6.5,17,2500,100.0,1,5.0,2628,12500,6.02,2458,1931,2001,10454,70,89,1
4,HOLLAND AVENUE,148 BRUCKNER BOULEVARD,8595,70.0,4.58,5.0,6.5,18,1875,75.0,1,5.0,2628,6876,6.02,1719,1920,2009,10454,89,100,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87030,FRANKLIN AVENUE,3605 KINGSBRIDGE AVENUE,65250,87.0,5.22,2.0,2.0,775,12500,100.0,1,6.0,2628,59750,0.90,5500,1929,1990,10463,61,91,6
87031,EAST 188 STREET,3610 CORLEAR AVENUE,1620,36.0,0.90,2.0,2.0,880,1800,100.0,1,2.0,2628,1296,0.90,2458,1940,2000,10463,60,80,3
87032,EAST 213 STREET,3616 CORLEAR AVENUE,1616,44.0,0.65,2.0,2.0,881,2500,100.0,1,1.0,2628,944,0.90,2458,1951,2000,10463,49,69,5
87033,SEDGWICK AVENUE,3633 KINGSBRIDGE AVENUE,1880,45.0,0.25,2.0,2.0,882,7500,100.0,2,1.0,2628,2000,0.90,2458,1950,2000,10463,50,70,7


In [47]:
#Save to CSV
#df5.to_csv("regression.csv",index=False)