In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from census import Census

# Census API Key
from config import api_key
c = Census(api_key, year=2009)

In [2]:
midwest_data = pd.read_csv('MidwestMurderData.csv')
midwest_data

Unnamed: 0,ID,CNTYFIPS,Ori,State,Agency,Agentype,Source,Solved,Year,StateName,...,OffEthnic,Weapon,Relationship,Circumstance,Subcircum,VicCount,OffCount,FileDate,fstate,MSA
0,197601001IA00801,"Boone, IA",IA00801,Iowa,Boone,Municipal police,FBI,No,1976,IOWA,...,Unknown or not reported,Knife or cutting instrument,Relationship not determined,Other arguments,,0,0,30180.0,Iowa,Rural Iowa
1,197601001IA04400,"Henry, IA",IA04400,Iowa,Henry County,Sheriff,FBI,Yes,1976,IOWA,...,Unknown or not reported,"Handgun - pistol, revolver, etc",Acquaintance,Robbery,,0,0,30180.0,Iowa,Rural Iowa
2,197601001IA05202,"Johnson, IA",IA05202,Iowa,Iowa City,Municipal police,FBI,Yes,1976,IOWA,...,Unknown or not reported,"Handgun - pistol, revolver, etc",Acquaintance,Other arguments,,0,0,30180.0,Iowa,"Iowa City, IA"
3,197601001IA05701,"Linn, IA",IA05701,Iowa,Cedar Rapids,Municipal police,FBI,Yes,1976,IOWA,...,Unknown or not reported,"Handgun - pistol, revolver, etc",Husband,Other arguments,,0,0,30180.0,Iowa,"Cedar Rapids, IA"
4,197601001IA05703,"Linn, IA",IA05703,Iowa,Hiawatha,Municipal police,FBI,Yes,1976,IOWA,...,Unknown or not reported,"Handgun - pistol, revolver, etc",Stranger,Brawl due to influence of alcohol,,0,0,30180.0,Iowa,"Cedar Rapids, IA"
5,197601001IA06400,"Marshall, IA",IA06400,Iowa,Marshall County,Sheriff,FBI,Yes,1976,IOWA,...,Unknown or not reported,"Handgun - pistol, revolver, etc",Acquaintance,Lovers triangle,,0,0,30180.0,Iowa,Rural Iowa
6,197601001IA07800,"Pottawattamie, IA",IA07800,Iowa,Pottawattamie County,Sheriff,FBI,Yes,1976,IOWA,...,Unknown or not reported,Shotgun,Stranger,Other - not specified,,0,4,30180.0,Iowa,"Omaha-Council Bluffs, NE-IA"
7,197601001IL00103,"Adams, IL",IL00103,Illinois,Quincy,Municipal police,FBI,Yes,1976,ILL,...,Unknown or not reported,"Handgun - pistol, revolver, etc",Friend,Brawl due to influence of alcohol,,0,1,30180.0,Illinois,Rural Illinois
8,197601001IL01000,"Champaign, IL",IL01000,Illinois,Champaign County,Sheriff,FBI,No,1976,ILL,...,Unknown or not reported,"Handgun - pistol, revolver, etc",Relationship not determined,Narcotic drug laws,,0,0,30180.0,Illinois,"Champaign-Urbana, IL"
9,197601001IL01600,"Cook, IL",IL01600,Illinois,Cook County,Sheriff,FBI,Yes,1976,ILL,...,Unknown or not reported,"Handgun - pistol, revolver, etc",Stranger,Robbery,,0,1,30180.0,Illinois,"Chicago-Naperville-Joliet, IL-IN-WI"


In [3]:
# From 2009 census, the same year from which MSA codes/labels in midwest_data were taken, grab metro populations
# Filter and clean census metro data to get just the metro areas that match those in midwest_data above

acs_msa = c.acs5.get(('NAME', 'B01003_001E'), \
                    {'for': 'metropolitan statistical area/micropolitan statistical area:*'})
metrodf = pd.DataFrame(acs_msa)
metrodf = metrodf.rename(columns={"B01003_001E": "Population", "NAME": "MSA1",
                                  "metropolitan statistical area/micropolitan statistical area": "MSA Code"})
metro_only = metrodf[metrodf['MSA1'].str.contains('Metro Area')]

# new data frame with split value columns to isolate 'Metro Area'
new_metro = metro_only["MSA1"].str.split(" Metro Area", n = 1, expand = True)  
# making separate MSA column from new data frame 
metro_only["MSA"]= new_metro[0] 
# Dropping old MSA columns 
metro_only.drop(columns =["MSA1"], inplace = True) 

# new data frame with split columns to isolate state abbreviations
metrostates = metro_only["MSA"].str.split(", ", n = 1, expand = True) 
# add separate state column from new df
metro_only['State'] = metrostates[1]

# create list of unique values in state column to identify all possible midwest state labels in MidwestMurderData.csv
stateslist = metro_only['State'].unique().tolist() 
midweststates = ['OH-PA', 'KS', 'WV-OH', 'WI', 'IA', 'OH', 'IN', 'MO', 'IL', 'IN-MI', 'SD', 'IA-NE-SD', \
                  'MO-IL', 'MO-KS', 'MN', 'MI', 'NE-IA', 'MN-WI', 'KY-IN', 'NE', 'WI-MN', 'WV-KY-OH', \
                  'ND-MN', 'AR-MO', 'IN-KY', 'IA-IL', 'OH-KY-IN', 'IL-IN-WI', 'ND']
# new dataframe with just state values in the midweststates list, reset index 
midwest_metros = metro_only[metro_only['State'].isin(midweststates)]
midwest_metros = midwest_metros.reset_index(drop=True)
midwest_metros

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Population,MSA Code,MSA,State
0,571519.0,49660,"Youngstown-Warren-Boardman, OH-PA",OH-PA
1,596643.0,48620,"Wichita, KS",KS
2,145942.0,48540,"Wheeling, WV-OH",WV-OH
3,122778.0,48260,"Weirton-Steubenville, WV-OH",WV-OH
4,129803.0,48140,"Wausau, WI",WI
5,163040.0,47940,"Waterloo-Cedar Falls, IA",IA
6,228761.0,45820,"Topeka, KS",KS
7,673545.0,45780,"Toledo, OH",OH
8,169733.0,45460,"Terre Haute, IN",IN
9,140419.0,44220,"Springfield, OH",OH


In [4]:
# Back in the midwest murder dataframe, get aggregates for each MSA
msa_murdercounts = midwest_data.groupby(['MSA']).count().reset_index()
msa_murdercounts



Unnamed: 0,MSA,ID,CNTYFIPS,Ori,State,Agency,Agentype,Source,Solved,Year,...,OffRace,OffEthnic,Weapon,Relationship,Circumstance,Subcircum,VicCount,OffCount,FileDate,fstate
0,"Akron, OH",1132,1132,1132,1132,1132,1132,1132,1132,1132,...,1132,1132,1132,1132,1132,17,1132,1132,1132,1132
1,"Ames, IA",27,27,27,27,27,27,27,27,27,...,27,27,27,27,27,0,27,27,27,27
2,"Anderson, IN",144,144,144,144,144,144,144,144,144,...,144,144,144,144,144,1,144,144,144,144
3,"Ann Arbor, MI",435,435,435,435,435,435,435,435,435,...,435,435,435,435,435,9,435,435,435,435
4,"Appleton, WI",81,81,81,81,81,81,81,81,81,...,81,81,81,81,81,2,81,81,81,81
5,"Battle Creek, MI",378,378,378,378,378,378,378,378,378,...,378,378,378,378,378,12,378,378,378,378
6,"Bay City, MI",98,98,98,98,98,98,98,98,98,...,98,98,98,98,98,7,98,98,98,98
7,"Bismarck, ND",68,68,68,68,68,68,68,68,68,...,68,68,68,68,68,2,68,68,68,68
8,"Bloomington, IN",139,139,139,139,139,139,139,139,139,...,139,139,139,139,139,2,139,139,139,139
9,"Bloomington-Normal, IL",85,85,85,85,85,85,85,85,85,...,85,85,85,85,85,1,85,85,85,85


In [5]:
# Add murder counts to midwest_metros df
midwest_metros['Murders, 1976-2018'] = msa_murdercounts
midwest_metros

ValueError: Wrong number of items passed 32, placement implies 1

In [None]:
# census_data_cntyfips = c.acs5.get(('NAME', 'B01003_001E'), {'for': 'county:*', 'in': 'state:*'})
# census_df_cntyfips = pd.DataFrame(census_data_cntyfips)
# census_df_cntyfips = census_df_cntyfips.rename(columns={"B01003_001E": "Population",
#                                                         "B19301_001E": "Per Capita Income",
#                                                         "B17001_002E": "Poverty Count",
#                                                         "NAME": "Name",
#                                                         "county": "County"})
# census_df_cntyfips
