In [88]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import sem

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
crime_data_to_load = "data/Boston Crime Data.csv"
zip_data_to_load = "data/Boston_ZipCodes.csv"
dist_data_to_load = "data/districts.csv"
census_data_to_load = "data/Census Race Stats.csv"

# Read Data 
crime_data = pd.read_csv(crime_data_to_load)
# Prevent truncation of zero in Zipcode 
zip_data = pd.io.parsers.read_csv(zip_data_to_load, dtype={'Zip Code ': 'str'}).set_index('Zip Code ')
dist_data = pd.read_csv(dist_data_to_load)
cen_data = pd.io.parsers.read_csv(census_data_to_load, dtype={'Zip Code': 'str'})




# Combine the data into a single dataset
# data_combine = pd.merge(mouse_data,clinical_data, how='left', on=["Mouse ID", "Mouse ID"])
# Display the data table for preview
cen_data.head(2)


Unnamed: 0.1,Unnamed: 0,Zip Code,District,White Pop,Afr Am Pop,Asian Pop,Native Am Pacific Pop,Other Pop,Multi Race Pop
0,0,2108,Boston,3563.0,223.0,204.0,0.0,0.0,165.0
1,1,2109,Boston,3419.0,157.0,343.0,0.0,0.0,68.0


Index(['Zip Code ', 'District'], dtype='object')

#                                    Transformation - Crime Data 

In [8]:
crime_data.columns

Index(['INCIDENT_NUMBER', 'OFFENSE_CODE', 'OFFENSE_CODE_GROUP',
       'OFFENSE_DESCRIPTION', 'DISTRICT', 'REPORTING_AREA', 'SHOOTING',
       'OCCURRED_ON_DATE', 'YEAR', 'MONTH', 'DAY_OF_WEEK', 'HOUR', 'UCR_PART',
       'STREET', 'Lat', 'Long', 'Location'],
      dtype='object')

In [16]:
# kept occurred_on_date because the stripped out date didnt contain "day", easier to parse out the date later into the units needed 
Crime_data_trunc = crime_data[['OFFENSE_CODE','OFFENSE_CODE_GROUP',
       'OFFENSE_DESCRIPTION', 'DISTRICT', 'REPORTING_AREA', 'SHOOTING',
        'OCCURRED_ON_DATE']]

In [17]:
Crime_data_trunc.set_index('DISTRICT')

Unnamed: 0_level_0,OFFENSE_CODE,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,REPORTING_AREA,SHOOTING,OCCURRED_ON_DATE
DISTRICT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
D14,619,Larceny,LARCENY ALL OTHERS,808,,9/2/2018 13:00
C11,1402,Vandalism,VANDALISM,347,,8/21/2018 0:00
D4,3410,Towed,TOWED MOTOR VEHICLE,151,,9/3/2018 19:27
D4,3114,Investigate Property,INVESTIGATE PROPERTY,272,,9/3/2018 21:16
B3,3114,Investigate Property,INVESTIGATE PROPERTY,421,,9/3/2018 21:05
C11,3820,Motor Vehicle Accident Response,M/V ACCIDENT INVOLVING PEDESTRIAN - INJURY,398,,9/3/2018 21:09
B2,724,Auto Theft,AUTO THEFT,330,,9/3/2018 21:25
B2,3301,Verbal Disputes,VERBAL DISPUTE,584,,9/3/2018 20:39
C6,301,Robbery,ROBBERY - STREET,177,,9/3/2018 20:48
C11,3301,Verbal Disputes,VERBAL DISPUTE,364,,9/3/2018 20:38


In [1]:
# data_combine = pd.merge(Crime_data_trunc,clinical_data, how='left', on=["District", "District"])

#                               Transformation - District 

In [33]:
dist_data_clean[District_Address] =(dist_data_clean['District Names'].str.split(',').str[0] 
    

Unnamed: 0.1,Unnamed: 0,District Codes,District Names
0,0,District A-7,"69 Paris Street, East Boston, MA 02128"
1,1,District B-2,"2400 Washington Street, Roxbury, MA 02119"
2,2,District B-3,"1165 Blue Hill Avenue, Mattapan, MA 02124"
3,3,District C-6,"101 West Broadway, South Boston, MA 02127"
4,4,District C-11,"40 Gibson Street, Dorchester, MA 02122"
5,5,District D-4,"650 Harrison Avenue, Boston, MA 02116"
6,6,District D-14,"301 Washington Street, Brighton, MA 02135"
7,7,District E-5,"1708 Centre Street, West Roxbury, MA 02132"
8,8,District E-13,"3347 Washington Street, Jamaica Plain, MA 02130"
9,9,District E-18,"1249 Hyde Park Avenue, Hyde Park, MA 02136"


In [74]:
dist_data_clean = pd.DataFrame()
dist_data_clean['District_Address'] =(dist_data['District Names'].str.split(',').str[0])
dist_data_clean['District'] =(dist_data['District Names'].str.split(',').str[1])   
# Clean up Dual Precinct for Boston/Charlestown
# dist_data_clean.replace('40 New Sudbury Street Boston', '40 New Sudbury Street')
dist_data_clean.at[10, 'District_Address'] = '40 New Sudbury Street'
dist_data_clean.at[11, 'District_Address'] = '40 New Sudbury Street'
dist_data_clean.at[10, 'District'] = 'Charlestown'
dist_data_clean.at[11, 'District'] = 'Boston'
dist_data_clean

Unnamed: 0,District_Address,District
0,69 Paris Street,East Boston
1,2400 Washington Street,Roxbury
2,1165 Blue Hill Avenue,Mattapan
3,101 West Broadway,South Boston
4,40 Gibson Street,Dorchester
5,650 Harrison Avenue,Boston
6,301 Washington Street,Brighton
7,1708 Centre Street,West Roxbury
8,3347 Washington Street,Jamaica Plain
9,1249 Hyde Park Avenue,Hyde Park


#                               Transformation - Census Data

In [95]:
cen_data.dropna(inplace=True)
cen_data

Unnamed: 0.1,Unnamed: 0,Zip Code,District,White Pop,Afr Am Pop,Asian Pop,Native Am Pacific Pop,Other Pop,Multi Race Pop
0,0,2108,Boston,3563.0,223.0,204.0,0.0,0.0,165.0
1,1,2109,Boston,3419.0,157.0,343.0,0.0,0.0,68.0
2,2,2110,Boston,1896.0,92.0,260.0,0.0,0.0,22.0
3,3,2111,Boston,3250.0,156.0,3673.0,0.0,0.0,301.0
5,5,2113,Boston,7022.0,52.0,161.0,0.0,0.0,112.0
6,6,2114,Boston,10504.0,613.0,1403.0,0.0,0.0,264.0
7,7,2115,Boston,19506.0,2421.0,4531.0,32.0,32.0,948.0
8,8,2116,Boston,17684.0,1297.0,3310.0,0.0,0.0,605.0
10,10,2118,Roxbury,14830.0,4467.0,4363.0,0.0,0.0,1267.0
11,11,2119,Roxbury,4257.0,16232.0,866.0,7.0,7.0,1542.0


In [None]:
# # print(type(dataFF))
# data_frames = [dataFF,dataBB ]

# #merge as needed
# Final_Data2 = reduce(lambda  left,right: pd.merge(left,right,on=['Date', 'Tableau Geo', 'Tableau LOB','TM1 Geography','Account', 'Scenario' ],
#                                             how='outer'), data_frames).fillna(0)