In [None]:
# Import libraries
import pandas as pd
import numpy as np
import re

# Don't display too many rows/cols of DataFrames
pd.options.display.max_rows = 7
pd.options.display.max_columns = 10

In [None]:
# Load crime data
df_crime = pd.read_csv('UScrime_scraped.csv')
df_crime

Unnamed: 0.1,Unnamed: 0,city,state,murders,murdersper100k,...,autothefts,autotheftsper100k,arson,arsonper100k,crime index
0,0,Abbeville,Alabama,0.0,0.0,...,6.0,231.7,,,265.3
1,1,Adamsville,Alabama,1.0,23.1,...,15.0,346.1,,,468.4
2,2,Addison,Alabama,0.0,0.0,...,2.0,271.0,,,117.9
...,...,...,...,...,...,...,...,...,...,...,...
9862,9862,Torrington,Wyoming,0.0,0.0,...,3.0,44.6,0.0,0.0,91.5
9863,9863,Wheatland,Wyoming,0.0,0.0,...,7.0,194.3,0.0,0.0,117.2
9864,9864,Worland,Wyoming,0.0,0.0,...,3.0,56.7,0.0,0.0,118.4


In [None]:
# Load income data
df_income = pd.read_csv('economic_data.csv')
df_income

Unnamed: 0.1,Unnamed: 0,place,percentile,income,percent
0,0,https://statisticalatlas.com/place/Alabama/Ala...,95th,"0 177,359.000000\rdtype: object",0 241.881\rdtype: object
1,1,https://statisticalatlas.com/place/Alabama/Ala...,80th,"0 122,176.000000\rdtype: object",0 166.623\rdtype: object
2,2,https://statisticalatlas.com/place/Alabama/Ala...,60th,"0 85,277.000000\rdtype: object",0 116.300\rdtype: object
...,...,...,...,...,...
10869,3,https://statisticalatlas.com/place/Wyoming/Pow...,Median,"0 46,971.000000\rdtype: object",0 100.000\rdtype: object
10870,4,https://statisticalatlas.com/place/Wyoming/Pow...,40th,"0 40,583.000000\rdtype: object",0 86.400\rdtype: object
10871,5,https://statisticalatlas.com/place/Wyoming/Pow...,20th,"0 23,336.000000\rdtype: object",0 49.682\rdtype: object


In [None]:
# Extract city and state from url
df_income['citystate'] = df_income['place'].str.extract("place/(.*)/")
df_income['citystate']

0        Alabama/Alabaster
1        Alabama/Alabaster
2        Alabama/Alabaster
               ...        
10869       Wyoming/Powell
10870       Wyoming/Powell
10871       Wyoming/Powell
Name: citystate, Length: 10872, dtype: object

In [None]:
# Extract state
df_income['state'] = df_income['citystate'].str.extract("(.*)/")
df_income['state']

0        Alabama
1        Alabama
2        Alabama
          ...   
10869    Wyoming
10870    Wyoming
10871    Wyoming
Name: state, Length: 10872, dtype: object

In [None]:
# Extract city
df_income['city'] = df_income['citystate'].str.extract("/(.*)")
df_income['city']

0        Alabaster
1        Alabaster
2        Alabaster
           ...    
10869       Powell
10870       Powell
10871       Powell
Name: city, Length: 10872, dtype: object

In [None]:
# Remove place and citystate columns
df_income = df_income.drop(['place', 'citystate'], axis=1)
df_income

Unnamed: 0.1,Unnamed: 0,percentile,income,percent,state,city
0,0,95th,"0 177,359.000000\rdtype: object",0 241.881\rdtype: object,Alabama,Alabaster
1,1,80th,"0 122,176.000000\rdtype: object",0 166.623\rdtype: object,Alabama,Alabaster
2,2,60th,"0 85,277.000000\rdtype: object",0 116.300\rdtype: object,Alabama,Alabaster
...,...,...,...,...,...,...
10869,3,Median,"0 46,971.000000\rdtype: object",0 100.000\rdtype: object,Wyoming,Powell
10870,4,40th,"0 40,583.000000\rdtype: object",0 86.400\rdtype: object,Wyoming,Powell
10871,5,20th,"0 23,336.000000\rdtype: object",0 49.682\rdtype: object,Wyoming,Powell


In [None]:
# Reorder columns
df_income = df_income[['Unnamed: 0', 'city', 'state', 'percentile', 'income', 'percent']]
df_income

Unnamed: 0.1,Unnamed: 0,city,state,percentile,income,percent
0,0,Alabaster,Alabama,95th,"0 177,359.000000\rdtype: object",0 241.881\rdtype: object
1,1,Alabaster,Alabama,80th,"0 122,176.000000\rdtype: object",0 166.623\rdtype: object
2,2,Alabaster,Alabama,60th,"0 85,277.000000\rdtype: object",0 116.300\rdtype: object
...,...,...,...,...,...,...
10869,3,Powell,Wyoming,Median,"0 46,971.000000\rdtype: object",0 100.000\rdtype: object
10870,4,Powell,Wyoming,40th,"0 40,583.000000\rdtype: object",0 86.400\rdtype: object
10871,5,Powell,Wyoming,20th,"0 23,336.000000\rdtype: object",0 49.682\rdtype: object


In [None]:
# Remove zeroes in income and percent columns
df_income['income'] = df_income['income'].replace({'0 ':''}, regex = True)
df_income['percent'] = df_income['percent'].replace({'0 ':''}, regex = True)
df_income

Unnamed: 0.1,Unnamed: 0,city,state,percentile,income,percent
0,0,Alabaster,Alabama,95th,"177,359.000000\rdtype: object",241.881\rdtype: object
1,1,Alabaster,Alabama,80th,"122,176.000000\rdtype: object",166.623\rdtype: object
2,2,Alabaster,Alabama,60th,"85,277.000000\rdtype: object",116.300\rdtype: object
...,...,...,...,...,...,...
10869,3,Powell,Wyoming,Median,"46,971.000000\rdtype: object",100.000\rdtype: object
10870,4,Powell,Wyoming,40th,"40,583.000000\rdtype: object",86.400\rdtype: object
10871,5,Powell,Wyoming,20th,"23,336.000000\rdtype: object",49.682\rdtype: object


In [None]:
# Make percentile columns 95th, 80th, 60th, Median, 40th, 20th
for i in range(0, len(df_income)):
    incomeConvert = df_income['income'][i]
    incomeConvert = incomeConvert[:(incomeConvert.find("."))]
    incomeConvert = incomeConvert.replace(',', "")
    incomeConvert = re.sub(r"^\s+", "", incomeConvert, flags=re.UNICODE)
    incomeConvert = int(float(incomeConvert))
    df_income['income'][i] = incomeConvert
   
  
{df_income['income'][i] == int(float(df_income['income'][i]))}
percentiles = df_income.pivot_table('income',['city', 'state', 'percent'], 'percentile')








A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [None]:
# Merge the two dataframes
df = pd.merge(df_income, df_crime, on='Unnamed: 0') 
df