In [None]:
# some good general info for the census: http://www.opengeocode.org/tutorials/USCensusAPI.php

# census code wrapper: https://github.com/sunlightlabs/census

In [1]:

import pandas as pd 
import numpy as np
#import seaborn as sns
#sns.set(style="whitegrid", color_codes=True)
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.style.use('ggplot')
from pandas import DataFrame
from census import Census # this needs to be installed via $ pip install census
from us import states # this needs to be installed via $ pip install us

In [2]:
ck = Census("642e9046916aeaedd65d2e855468e3d7b66328d0")

county_table_load = pd.read_table("http://www2.census.gov/geo/docs/reference/codes/files/national_county.txt", header=None, names=['State_Abrv','State#','County#','County','Blank'],sep=",")

df_county_table = county_table_load

# variables for us to define which census year to utilize
census_year = 2014

df_county_table.head()

Unnamed: 0,State_Abrv,State#,County#,County,Blank
0,AL,1,1,Autauga County,H1
1,AL,1,3,Baldwin County,H1
2,AL,1,5,Barbour County,H1
3,AL,1,7,Bibb County,H1
4,AL,1,9,Blount County,H1


In [3]:
# load the variables table

variables_table_load = pd.read_json("http://api.census.gov/data/2014/acs5/variables.json", typ="frame",orient="columns")

df_variables_table = variables_table_load

# convert the table to a series to make it a viewable dataframe
df_variables_table = df_variables_table.variables.apply(pd.Series).reset_index()
df_variables_table.columns = ['variables','concept','label','predicateOnly','predicateType']

df_variables_table.head()

Unnamed: 0,variables,concept,label,predicateOnly,predicateType
0,AIANHH,Selectable Geographies,FIPS AIANHH code,,
1,AIHHTLI,Selectable Geographies,American Indian Trust Land/Hawaiian Home Land ...,,
2,AITS,Selectable Geographies,American Indian Tribal Subdivision (FIPS),,
3,AITSCE,Selectable Geographies,American Indian Tribal Subdivision (Census),,
4,ANRC,Selectable Geographies,Alaska Native Regional Corporation (FIPS),,


In [4]:
# use this to explore what's in the labels -- note it is case sensitive
pd.set_option('display.max_rows', 30)
pd.set_option('display.max_colwidth', -1)
df_variables_table_explorer = df_variables_table.copy()
df_variables_table[df_variables_table['label'].str.contains("")]


Unnamed: 0,variables,concept,label,predicateOnly,predicateType
0,AIANHH,Selectable Geographies,FIPS AIANHH code,,
1,AIHHTLI,Selectable Geographies,American Indian Trust Land/Hawaiian Home Land Indicator,,
2,AITS,Selectable Geographies,American Indian Tribal Subdivision (FIPS),,
3,AITSCE,Selectable Geographies,American Indian Tribal Subdivision (Census),,
4,ANRC,Selectable Geographies,Alaska Native Regional Corporation (FIPS),,
5,B00001_001E,B00001. Unweighted Sample Count of the Population,Total,,int
6,B00001_001M,B00001. Unweighted Sample Count of the Population,Margin Of Error For!!Total,,int
7,B00002_001E,B00002. Unweighted Sample Housing Units,Total,,int
8,B00002_001M,B00002. Unweighted Sample Housing Units,Margin Of Error For!!Total,,int
9,B01001A_001E,B01001A. SEX BY AGE (WHITE ALONE),Total:,,int


In [5]:
# total population for California
# male by age
m_cali_byage = pd.DataFrame(ck.acs5.get(('NAME',
                                                'B01001_007E', # male 18 to 19
                                                'B01001_008E', # male 20 years
                                                'B01001_009E', # male 21 years
                                                'B01001_010E', # male 22 to 24 years
                                                'B01001_011E', # male 25 to 29 years
                                                'B01001_012E', # male 30 to 34 years
                                                'B01001_013E', # male 35 to 39 years
                                                'B01001_014E', # male 40 to 44 years
                                                'B01001_015E', # male 45 to 49 years
                                                'B01001_016E', # male 50 to 54 years
                                                'B01001_017E', # male 55 to 59 years
                                                'B01001_018E', # male 60 to 51 years
                                                'B01001_019E', # male 62 to 64 years
                                                'B01001_020E', # male 65 to 66 years
                                                'B01001_021E', # male 67 to 69 years
                                                'B01001_022E'), # male 70 to 74 years 
                                          geo={'for': 'county:*', 
                                               'in': 'state:{}'.format(states.CA.fips)}, year=census_year))

m_cali_byage.head()

Unnamed: 0,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,B01001_012E,B01001_013E,B01001_014E,B01001_015E,B01001_016E,B01001_017E,B01001_018E,B01001_019E,B01001_020E,B01001_021E,B01001_022E,NAME,county,state
0,20721,10529,11250,33339,59098,59768,56709,58547,55736,54962,48199,17788,22942,13507,15558,18608,"Alameda County, California",1,6
1,27,15,9,17,31,23,26,85,11,50,61,16,32,12,31,12,"Alpine County, California",3,6
2,453,167,163,653,902,1057,988,1637,1711,1780,1730,695,961,737,814,916,"Amador County, California",5,6
3,4116,2865,2126,8534,7269,6580,5887,5826,6162,7022,7154,2973,4182,2755,2597,3600,"Butte County, California",7,6
4,494,252,176,631,976,829,1074,974,1564,1826,1982,821,1257,757,1115,1352,"Calaveras County, California",9,6


In [6]:
# total population for California
# femmale by age
f_cali_byage = pd.DataFrame(ck.acs5.get(('NAME',
                                                'B01001_031E', # female 18 to 19
                                                'B01001_032E', # female 20 years
                                                'B01001_033E', # female 21 years
                                                'B01001_034E', # female 22 to 24 years
                                                'B01001_035E', # female 25 to 29 years
                                                'B01001_036E', # female 30 to 34 years
                                                'B01001_037E', # female 35 to 39 years
                                                'B01001_038E', # female 40 to 44 years
                                                'B01001_039E', # female 45 to 49 years
                                                'B01001_040E', # female 50 to 54 years
                                                'B01001_041E', # female 55 to 59 years
                                                'B01001_042E', # female 60 to 51 years
                                                'B01001_043E', # female 62 to 64 years
                                                'B01001_044E', # female 65 to 66 years
                                                'B01001_045E', # female 67 to 69 years
                                                'B01001_046E'), # female 70 to 74 years 
                                          geo={'for': 'county:*', 
                                               'in': 'state:{}'.format(states.CA.fips)}, year=census_year))

f_cali_byage.head()

Unnamed: 0,B01001_031E,B01001_032E,B01001_033E,B01001_034E,B01001_035E,B01001_036E,B01001_037E,B01001_038E,B01001_039E,B01001_040E,B01001_041E,B01001_042E,B01001_043E,B01001_044E,B01001_045E,B01001_046E,NAME,county,state
0,20040,10794,11093,32492,59685,61900,58659,58518,56716,57024,50947,18700,26891,14847,17278,23404,"Alameda County, California",1,6
1,6,22,7,22,27,39,11,28,55,45,52,14,11,19,10,43,"Alpine County, California",3,6
2,316,159,83,453,641,694,757,861,1088,1391,1557,817,1003,469,836,1215,"Amador County, California",5,6
3,4060,2331,2790,7403,6575,6078,5331,6020,6533,7363,7593,3287,4278,2343,3433,4337,"Butte County, California",7,6
4,425,151,189,582,838,905,860,1368,1528,2010,2141,717,1490,852,1167,1296,"Calaveras County, California",9,6


In [7]:
m_cali_byage.dtypes

B01001_007E    object
B01001_008E    object
B01001_009E    object
B01001_010E    object
B01001_011E    object
B01001_012E    object
B01001_013E    object
B01001_014E    object
B01001_015E    object
B01001_016E    object
B01001_017E    object
B01001_018E    object
B01001_019E    object
B01001_020E    object
B01001_021E    object
B01001_022E    object
NAME           object
county         object
state          object
dtype: object

In [8]:
# sum the values for males

m_cali_age = m_cali_byage.copy()

# since the values are stored as objects, we are going to convert them to numberic
m_cali_age = m_cali_age.convert_objects(convert_numeric=True)

# perform the sum
m_cali_age['Total Male Population'] =  m_cali_age['B01001_007E'] +  m_cali_age['B01001_008E'] + m_cali_age['B01001_009E'] + m_cali_age['B01001_010E'] + m_cali_age['B01001_011E'] + m_cali_age['B01001_012E'] + m_cali_age['B01001_013E'] + m_cali_age['B01001_014E'] + m_cali_age['B01001_015E'] + m_cali_age['B01001_016E'] + m_cali_age['B01001_017E'] + m_cali_age['B01001_018E'] + m_cali_age['B01001_019E'] + m_cali_age['B01001_020E'] + m_cali_age['B01001_021E'] + m_cali_age['B01001_022E']
                                                
m_cali_age.head()



Unnamed: 0,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,B01001_012E,B01001_013E,B01001_014E,B01001_015E,B01001_016E,B01001_017E,B01001_018E,B01001_019E,B01001_020E,B01001_021E,B01001_022E,NAME,county,state,Total Male Population
0,20721,10529,11250,33339,59098,59768,56709,58547,55736,54962,48199,17788,22942,13507,15558,18608,"Alameda County, California",1,6,557261
1,27,15,9,17,31,23,26,85,11,50,61,16,32,12,31,12,"Alpine County, California",3,6,458
2,453,167,163,653,902,1057,988,1637,1711,1780,1730,695,961,737,814,916,"Amador County, California",5,6,15364
3,4116,2865,2126,8534,7269,6580,5887,5826,6162,7022,7154,2973,4182,2755,2597,3600,"Butte County, California",7,6,79648
4,494,252,176,631,976,829,1074,974,1564,1826,1982,821,1257,757,1115,1352,"Calaveras County, California",9,6,16080


In [38]:
# sum the values for females

f_cali_age = f_cali_byage.copy()

# since the values are stored as objects, we are going to convert them to numberic
f_cali_age = f_cali_age.convert_objects(convert_numeric=True)

# perform the sum
f_cali_age['Total Female Population'] =  f_cali_age['B01001_031E'] +  f_cali_age['B01001_032E'] + f_cali_age['B01001_033E'] + f_cali_age['B01001_034E'] + f_cali_age['B01001_035E'] + f_cali_age['B01001_036E'] + f_cali_age['B01001_037E'] + f_cali_age['B01001_038E'] + f_cali_age['B01001_039E'] + f_cali_age['B01001_040E'] + f_cali_age['B01001_041E'] + f_cali_age['B01001_042E'] + f_cali_age['B01001_043E'] + f_cali_age['B01001_044E'] + f_cali_age['B01001_045E'] + f_cali_age['B01001_046E']
                                                
f_cali_age



Unnamed: 0,B01001_031E,B01001_032E,B01001_033E,B01001_034E,B01001_035E,B01001_036E,B01001_037E,B01001_038E,B01001_039E,B01001_040E,B01001_041E,B01001_042E,B01001_043E,B01001_044E,B01001_045E,B01001_046E,NAME,county,state,Total Female Population
0,20040,10794,11093,32492,59685,61900,58659,58518,56716,57024,50947,18700,26891,14847,17278,23404,"Alameda County, California",1,6,578988
1,6,22,7,22,27,39,11,28,55,45,52,14,11,19,10,43,"Alpine County, California",3,6,411
2,316,159,83,453,641,694,757,861,1088,1391,1557,817,1003,469,836,1215,"Amador County, California",5,6,12340
3,4060,2331,2790,7403,6575,6078,5331,6020,6533,7363,7593,3287,4278,2343,3433,4337,"Butte County, California",7,6,79755
4,425,151,189,582,838,905,860,1368,1528,2010,2141,717,1490,852,1167,1296,"Calaveras County, California",9,6,16519
5,238,168,150,341,686,645,680,584,681,697,637,191,324,193,206,295,"Colusa County, California",11,6,6716
6,12898,5838,6551,19395,32757,35030,38157,38305,42130,42451,38038,13653,19834,11716,13493,18704,"Contra Costa County, California",13,6,388950
7,328,115,128,552,603,699,611,511,957,1155,993,388,609,303,403,507,"Del Norte County, California",15,6,8862
8,1950,1109,1153,2220,4161,4346,4719,5906,7069,8374,8120,2843,4025,2621,2724,3614,"El Dorado County, California",17,6,64954
9,14346,8822,8180,21416,35009,32273,29194,28106,28604,28867,26036,9562,12688,7457,9198,12707,"Fresno County, California",19,6,312465


In [40]:
# this code can be used to replace the above aggregation
f_cali_ageT1 = f_cali_byage.copy()
f_cali_ageT1.drop(['NAME','county','state'], inplace=True, axis=1)
f_cali_ageT1 = f_cali_ageT1.astype(int)
f_cali_ageT1['Total Female Population'] = f_cali_ageT1.sum(axis=1)
f_cali_ageT1 = f_cali_ageT1[['Total Female Population']]
f_cali_ageT = f_cali_age.copy()
f_cali_ageT1 = pd.concat([f_cali_ageT, f_cali_ageT1], axis=1)
f_cali_ageT


Unnamed: 0,B01001_031E,B01001_032E,B01001_033E,B01001_034E,B01001_035E,B01001_036E,B01001_037E,B01001_038E,B01001_039E,B01001_040E,B01001_041E,B01001_042E,B01001_043E,B01001_044E,B01001_045E,B01001_046E,NAME,county,state,Total Female Population
0,20040,10794,11093,32492,59685,61900,58659,58518,56716,57024,50947,18700,26891,14847,17278,23404,"Alameda County, California",1,6,578988
1,6,22,7,22,27,39,11,28,55,45,52,14,11,19,10,43,"Alpine County, California",3,6,411
2,316,159,83,453,641,694,757,861,1088,1391,1557,817,1003,469,836,1215,"Amador County, California",5,6,12340
3,4060,2331,2790,7403,6575,6078,5331,6020,6533,7363,7593,3287,4278,2343,3433,4337,"Butte County, California",7,6,79755
4,425,151,189,582,838,905,860,1368,1528,2010,2141,717,1490,852,1167,1296,"Calaveras County, California",9,6,16519
5,238,168,150,341,686,645,680,584,681,697,637,191,324,193,206,295,"Colusa County, California",11,6,6716
6,12898,5838,6551,19395,32757,35030,38157,38305,42130,42451,38038,13653,19834,11716,13493,18704,"Contra Costa County, California",13,6,388950
7,328,115,128,552,603,699,611,511,957,1155,993,388,609,303,403,507,"Del Norte County, California",15,6,8862
8,1950,1109,1153,2220,4161,4346,4719,5906,7069,8374,8120,2843,4025,2621,2724,3614,"El Dorado County, California",17,6,64954
9,14346,8822,8180,21416,35009,32273,29194,28106,28604,28867,26036,9562,12688,7457,9198,12707,"Fresno County, California",19,6,312465


In [10]:
# reindex the headers so they align with the labels from our variables table

m_cali_age.rename(columns=df_variables_table.set_index('variables')['label'], inplace=True)

m_cali_age.head()

Unnamed: 0,Male:!!18 and 19 years,Male:!!20 years,Male:!!21 years,Male:!!22 to 24 years,Male:!!25 to 29 years,Male:!!30 to 34 years,Male:!!35 to 39 years,Male:!!40 to 44 years,Male:!!45 to 49 years,Male:!!50 to 54 years,Male:!!55 to 59 years,Male:!!60 and 61 years,Male:!!62 to 64 years,Male:!!65 and 66 years,Male:!!67 to 69 years,Male:!!70 to 74 years,Geographic Area Name,county,state,Total Male Population
0,20721,10529,11250,33339,59098,59768,56709,58547,55736,54962,48199,17788,22942,13507,15558,18608,"Alameda County, California",1,6,557261
1,27,15,9,17,31,23,26,85,11,50,61,16,32,12,31,12,"Alpine County, California",3,6,458
2,453,167,163,653,902,1057,988,1637,1711,1780,1730,695,961,737,814,916,"Amador County, California",5,6,15364
3,4116,2865,2126,8534,7269,6580,5887,5826,6162,7022,7154,2973,4182,2755,2597,3600,"Butte County, California",7,6,79648
4,494,252,176,631,976,829,1074,974,1564,1826,1982,821,1257,757,1115,1352,"Calaveras County, California",9,6,16080


In [11]:
# reindex the headers so they align with the labels from our variables table

f_cali_age.rename(columns=df_variables_table.set_index('variables')['label'], inplace=True)

f_cali_age.head()

Unnamed: 0,Female:!!18 and 19 years,Female:!!20 years,Female:!!21 years,Female:!!22 to 24 years,Female:!!25 to 29 years,Female:!!30 to 34 years,Female:!!35 to 39 years,Female:!!40 to 44 years,Female:!!45 to 49 years,Female:!!50 to 54 years,Female:!!55 to 59 years,Female:!!60 and 61 years,Female:!!62 to 64 years,Female:!!65 and 66 years,Female:!!67 to 69 years,Female:!!70 to 74 years,Geographic Area Name,county,state,Total Female Population
0,20040,10794,11093,32492,59685,61900,58659,58518,56716,57024,50947,18700,26891,14847,17278,23404,"Alameda County, California",1,6,578988
1,6,22,7,22,27,39,11,28,55,45,52,14,11,19,10,43,"Alpine County, California",3,6,411
2,316,159,83,453,641,694,757,861,1088,1391,1557,817,1003,469,836,1215,"Amador County, California",5,6,12340
3,4060,2331,2790,7403,6575,6078,5331,6020,6533,7363,7593,3287,4278,2343,3433,4337,"Butte County, California",7,6,79755
4,425,151,189,582,838,905,860,1368,1528,2010,2141,717,1490,852,1167,1296,"Calaveras County, California",9,6,16519


In [12]:
# merge the tables together

cali_age = pd.merge(m_cali_age, f_cali_age, on="Geographic Area Name")

cali_age

Unnamed: 0,Male:!!18 and 19 years,Male:!!20 years,Male:!!21 years,Male:!!22 to 24 years,Male:!!25 to 29 years,Male:!!30 to 34 years,Male:!!35 to 39 years,Male:!!40 to 44 years,Male:!!45 to 49 years,Male:!!50 to 54 years,...,Female:!!50 to 54 years,Female:!!55 to 59 years,Female:!!60 and 61 years,Female:!!62 to 64 years,Female:!!65 and 66 years,Female:!!67 to 69 years,Female:!!70 to 74 years,county_y,state_y,Total Female Population
0,20721,10529,11250,33339,59098,59768,56709,58547,55736,54962,...,57024,50947,18700,26891,14847,17278,23404,1,6,578988
1,27,15,9,17,31,23,26,85,11,50,...,45,52,14,11,19,10,43,3,6,411
2,453,167,163,653,902,1057,988,1637,1711,1780,...,1391,1557,817,1003,469,836,1215,5,6,12340
3,4116,2865,2126,8534,7269,6580,5887,5826,6162,7022,...,7363,7593,3287,4278,2343,3433,4337,7,6,79755
4,494,252,176,631,976,829,1074,974,1564,1826,...,2010,2141,717,1490,852,1167,1296,9,6,16519
5,378,252,124,360,729,667,715,681,680,712,...,697,637,191,324,193,206,295,11,6,6716
6,13981,6970,6500,19798,32898,33592,34910,37320,40036,40171,...,42451,38038,13653,19834,11716,13493,18704,13,6,388950
7,442,206,284,677,1137,1333,1184,911,1273,1030,...,1155,993,388,609,303,403,507,15,6,8862
8,2337,1489,975,2939,4352,4490,4828,5474,6844,7959,...,8374,8120,2843,4025,2621,2724,3614,17,6,64954
9,15425,8074,8452,23594,37202,33383,29890,28306,28670,28474,...,28867,26036,9562,12688,7457,9198,12707,19,6,312465


In [13]:
cali_age_summary = cali_age.copy()
cali_age_summary = cali_age[['Geographic Area Name','county_y','Total Female Population','Total Male Population']]
cali_age_summary.columns= ['Geographic Area Name','County #','Total Female Population','Total Male Population']

cali_age_summary['Total Population'] = cali_age_summary['Total Female Population'] + cali_age_summary['Total Male Population']
cali_age_summary['Female %'] = round((cali_age_summary['Total Female Population'] / cali_age_summary['Total Population'] *100),1).astype(str) + '%'
cali_age_summary['Male %'] = round((cali_age_summary['Total Male Population'] / cali_age_summary['Total Population'] *100),1).astype(str) + '%'

cali_age_summary = cali_age_summary.sort(['Geographic Area Name'], ascending=False)

cali_age_summary 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Geographic Area Name,County #,Total Female Population,Total Male Population,Total Population,Female %,Male %
57,"Yuba County, California",115,24283,24695,48978,49.6%,50.4%
56,"Yolo County, California",113,76771,72455,149226,51.4%,48.6%
55,"Ventura County, California",111,291448,289101,580549,50.2%,49.8%
54,"Tuolumne County, California",109,18473,21558,40031,46.1%,53.9%
53,"Tulare County, California",107,143226,144152,287378,49.8%,50.2%
52,"Trinity County, California",105,4768,5167,9935,48.0%,52.0%
51,"Tehama County, California",103,21692,21489,43181,50.2%,49.8%
50,"Sutter County, California",101,31969,31618,63587,50.3%,49.7%
49,"Stanislaus County, California",99,177499,173345,350844,50.6%,49.4%
48,"Sonoma County, California",97,179520,175062,354582,50.6%,49.4%


In [14]:
grouped = cali_age_summary.copy()
grouped = cali_age_summary.groupby(['Geographic Area Name'])

cali_age_group = grouped['Total Female Population','Total Male Population'].aggregate(np.sum)

cali_age_group

Unnamed: 0_level_0,Total Female Population,Total Male Population
Geographic Area Name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Alameda County, California",578988,557261
"Alpine County, California",411,458
"Amador County, California",12340,15364
"Butte County, California",79755,79648
"Calaveras County, California",16519,16080
"Colusa County, California",6716,7312
"Contra Costa County, California",388950,369162
"Del Norte County, California",8862,11583
"El Dorado County, California",64954,65236
"Fresno County, California",312465,313367


In [15]:
cali_age_group.plot.barh(title='Total Gender Distribution by County')
plt.gca().invert_yaxis()


In [16]:
plt.show()

In [17]:
bayarea_age_summary = cali_age_summary.copy()

bayarea_age_summary

bayarea_age_summary =bayarea_age_summary[bayarea_age_summary['Geographic Area Name'].str.contains("Alameda|Contra Costa|Marin|Napa|San Francisco|San Mateo|Santa Clara|Solano|Sonoma")]

grouped_bayarea = bayarea_age_summary.groupby(['Geographic Area Name'])

bayarea_age_summary = grouped_bayarea['Total Female Population','Total Male Population'].aggregate(np.sum)

bayarea_age_summary

Unnamed: 0_level_0,Total Female Population,Total Male Population
Geographic Area Name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Alameda County, California",578988,557261
"Contra Costa County, California",388950,369162
"Marin County, California",93108,90887
"Napa County, California",48722,49134
"San Francisco County, California",319409,341837
"San Mateo County, California",268136,262224
"Santa Clara County, California",647925,662529
"Solano County, California",149772,150195
"Sonoma County, California",179520,175062


In [18]:
bayarea_age_summary.plot.barh(stacked=True, title='Total Gender Distribution by County')
plt.gca().invert_yaxis()
plt.show()

In [19]:
cali_age_summary_percent = cali_age.copy()
cali_age_summary_percent = cali_age[['Geographic Area Name','county_y','Total Female Population','Total Male Population']]
cali_age_summary_percent.columns= ['Geographic Area Name','County #','Total Female Population','Total Male Population']

cali_age_summary_percent['Total Population'] = cali_age_summary_percent['Total Female Population'] + cali_age_summary_percent['Total Male Population']
cali_age_summary_percent['Female %'] = round((cali_age_summary_percent['Total Female Population'] / cali_age_summary_percent['Total Population'] *100),1)
cali_age_summary_percent['Male %'] = round((cali_age_summary_percent['Total Male Population'] / cali_age_summary_percent['Total Population'] *100),1)

cali_age_summary_percent = cali_age_summary_percent.sort(['Geographic Area Name'], ascending=False)

cali_age_summary_percent 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Geographic Area Name,County #,Total Female Population,Total Male Population,Total Population,Female %,Male %
57,"Yuba County, California",115,24283,24695,48978,49.6,50.4
56,"Yolo County, California",113,76771,72455,149226,51.4,48.6
55,"Ventura County, California",111,291448,289101,580549,50.2,49.8
54,"Tuolumne County, California",109,18473,21558,40031,46.1,53.9
53,"Tulare County, California",107,143226,144152,287378,49.8,50.2
52,"Trinity County, California",105,4768,5167,9935,48.0,52.0
51,"Tehama County, California",103,21692,21489,43181,50.2,49.8
50,"Sutter County, California",101,31969,31618,63587,50.3,49.7
49,"Stanislaus County, California",99,177499,173345,350844,50.6,49.4
48,"Sonoma County, California",97,179520,175062,354582,50.6,49.4


In [20]:
grouped_percent = cali_age_summary_percent.copy()
grouped_percent = cali_age_summary_percent.groupby(['Geographic Area Name'])

cali_age_group_percent = grouped_percent['Female %','Male %'].aggregate(np.sum)

cali_age_group_percent

Unnamed: 0_level_0,Female %,Male %
Geographic Area Name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Alameda County, California",51.0,49.0
"Alpine County, California",47.3,52.7
"Amador County, California",44.5,55.5
"Butte County, California",50.0,50.0
"Calaveras County, California",50.7,49.3
"Colusa County, California",47.9,52.1
"Contra Costa County, California",51.3,48.7
"Del Norte County, California",43.3,56.7
"El Dorado County, California",49.9,50.1
"Fresno County, California",49.9,50.1


In [21]:
cali_age_group_percent.plot.barh(stacked=True, title='Total Gender Distribution by County')
plt.gca().invert_yaxis()
plt.show()

In [22]:
bayarea_age_summary_percent = cali_age_summary_percent.copy()

bayarea_age_summary_percent

bayarea_age_summary_percent =bayarea_age_summary_percent[bayarea_age_summary_percent['Geographic Area Name'].str.contains("Alameda|Contra Costa|Marin|Napa|San Francisco|San Mateo|Santa Clara|Solano|Sonoma")]

grouped_bayarea_percent = bayarea_age_summary_percent.groupby(['Geographic Area Name'])

bayarea_age_summary_percent = grouped_bayarea_percent['Female %','Male %'].aggregate(np.sum)

bayarea_age_summary_percent

Unnamed: 0_level_0,Female %,Male %
Geographic Area Name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Alameda County, California",51.0,49.0
"Contra Costa County, California",51.3,48.7
"Marin County, California",50.6,49.4
"Napa County, California",49.8,50.2
"San Francisco County, California",48.3,51.7
"San Mateo County, California",50.6,49.4
"Santa Clara County, California",49.4,50.6
"Solano County, California",49.9,50.1
"Sonoma County, California",50.6,49.4


In [23]:
bayarea_age_summary_percent.plot.barh(stacked=True, title='Total Gender Distribution in SF Bay Area')
plt.gca().invert_yaxis()
plt.show()

In [None]:
m_cali_age.dtypes

In [None]:
""" df= pd.DataFrame(ck.acs5.get(('NAME', 'B01001_002E','NAME', 'B01001_026E', 'NAME','B08013_003E'),
          {'for': 'state:{}'.format(states.CA.fips)}, year=census_year))
df

In [None]:
""""# total population for Yuba County broken out by tract

df3= pd.DataFrame(ck.acs5.get(('NAME','B00001_001E'),geo={'for': 'tract:*', 
               'in': 'state:{} county:115'.format(states.CA.fips)}, year=census_year))
df3


In [None]:
df5= pd.DataFrame(ck.acs5.get(('NAME','B00001_001E'),
          {'for': 'state:{}'.format(states.RI.fips)}, year=census_year))
df5

In [None]:
df2 = pd.DataFrame(ck.acs5.get('B01001_004E', {'for': 'state:*'}))
df2.head

In [None]:
df_variables_table