# Census Data

In [2]:
from census import Census
import pandas as pd
from us import states

## Age brackets for Male in each state (all races)

In [21]:
c = Census("01518d8c4cd0a768de3d4e54c9b2e07b24400070", year=2021)

# Data for male only
census_variables = [
    'B01001_003E', 'B01001_004E', 'B01001_005E', 'B01001_006E', 'B01001_007E', 'B01001_008E', 'B01001_009E', 'B01001_010E', 'B01001_011E', 
    'B01001_012E', 'B01001_013E', 'B01001_014E', 'B01001_015E', 'B01001_016E', 'B01001_017E', 'B01001_018E', 'B01001_019E', 'B01001_020E',
    'B01001_021E', 'B01001_022E', 'B01001_023E', 'B01001_024E', 'B01001_025E'
]

renames_variables = [
    'Under 5 years old', '5 to 9 years', '10 to 14 years', '15 to 17 years', '18 and 19 years', '20 years', '21 years', '22 to 24 years', '25 to 29 years',
    '30 to 34 years', '35 to 39 years', '40 to 44 years', '45 to 49 years', '50 to 54 years', '55 to 59 years', '60 and 61 years', '62 to 64 years', '65 and 66 years',
    '67 to 69 years', '70 to 74 years', '75 to 79 years', '80 to 84 years', '85 years and over'
]

census_data = c.acs5.get(tuple(census_variables),{'for': 'state:*'})

# Convert to DataFrame
male_age_census_pd = pd.DataFrame(census_data)

for i in range(len(census_variables)):
    male_age_census_pd = male_age_census_pd.rename(columns={census_variables[i]: renames_variables[i]})

# Rename states with two-letter acronym
for index, row in male_age_census_pd.iterrows():
    try:
        male_age_census_pd.loc[index,'state'] = states.lookup(male_age_census_pd.loc[index,'state']).abbr
    except:
        male_age_census_pd.loc[index,'state'] = 'DC'

# Calculate total population
male_age_census_pd['Total Population'] = male_age_census_pd[renames_variables].sum(axis=1)

male_age_census_pd.sort_values('state')

Unnamed: 0,Under 5 years old,5 to 9 years,10 to 14 years,15 to 17 years,18 and 19 years,20 years,21 years,22 to 24 years,25 to 29 years,30 to 34 years,...,60 and 61 years,62 to 64 years,65 and 66 years,67 to 69 years,70 to 74 years,75 to 79 years,80 to 84 years,85 years and over,state,Total Population
1,25965.0,27379.0,25485.0,14977.0,9878.0,5752.0,6367.0,16982.0,31655.0,29907.0,...,10519.0,13514.0,8773.0,10712.0,12359.0,6297.0,4085.0,2801.0,AK,384749.0
0,151356.0,155081.0,167636.0,99974.0,67361.0,36835.0,33355.0,93427.0,165130.0,154019.0,...,65142.0,94891.0,57161.0,74995.0,105919.0,65847.0,40690.0,27527.0,AL,2429703.0
3,95410.0,99723.0,105077.0,62435.0,41759.0,23469.0,20434.0,57146.0,99040.0,95296.0,...,38021.0,52218.0,33650.0,44903.0,62538.0,42038.0,24260.0,19336.0,AR,1483520.0
2,210607.0,224911.0,244849.0,144036.0,99223.0,51285.0,49589.0,147241.0,256594.0,242974.0,...,85825.0,118151.0,76219.0,107192.0,160666.0,110441.0,68680.0,53740.0,AZ,3533895.0
4,1203859.0,1245934.0,1362100.0,792617.0,529966.0,279345.0,269923.0,800634.0,1538050.0,1518085.0,...,482736.0,643638.0,389220.0,508667.0,683160.0,422793.0,273543.0,272143.0,CA,19714044.0
5,165967.0,176827.0,191554.0,112904.0,78517.0,40101.0,35842.0,118708.0,229868.0,234150.0,...,71058.0,97753.0,61271.0,80442.0,107867.0,60174.0,35468.0,34042.0,CO,2895936.0
6,93734.0,99242.0,115470.0,72468.0,52870.0,25866.0,23950.0,74077.0,113856.0,112538.0,...,52437.0,71499.0,39968.0,54345.0,73549.0,46331.0,28795.0,30915.0,CT,1768860.0
8,21890.0,17335.0,16300.0,7748.0,9435.0,4547.0,3904.0,13869.0,35427.0,37517.0,...,6256.0,8478.0,5345.0,6660.0,9414.0,6039.0,3312.0,3748.0,DC,325490.0
7,27567.0,28646.0,30950.0,18348.0,12680.0,6756.0,6479.0,16265.0,32141.0,31668.0,...,12531.0,19347.0,12072.0,17742.0,23098.0,15122.0,8791.0,7181.0,DE,477219.0
9,571443.0,583755.0,644169.0,379009.0,249648.0,127817.0,129735.0,374472.0,695887.0,694218.0,...,286697.0,390219.0,253050.0,356097.0,528650.0,384361.0,237146.0,217598.0,FL,10489548.0


## Save as CSV

In [33]:
male_age_census_pd.to_csv('clean_data/clean_male_population_age.csv', index=False)

## Distribution of population over age brackets

In [31]:
state = 'CA'

state_data = male_age_census_pd.loc[male_age_census_pd['state']==state,:]
state_data = state_data.reset_index()

state_data = state_data.drop(columns='index')
state_data

Unnamed: 0,Under 5 years old,5 to 9 years,10 to 14 years,15 to 17 years,18 and 19 years,20 years,21 years,22 to 24 years,25 to 29 years,30 to 34 years,...,60 and 61 years,62 to 64 years,65 and 66 years,67 to 69 years,70 to 74 years,75 to 79 years,80 to 84 years,85 years and over,state,Total Population
0,1203859.0,1245934.0,1362100.0,792617.0,529966.0,279345.0,269923.0,800634.0,1538050.0,1518085.0,...,482736.0,643638.0,389220.0,508667.0,683160.0,422793.0,273543.0,272143.0,CA,19714044.0


In [32]:
state_data_percent = state_data[:]

for b in renames_variables:
    state_data_percent.loc[0,b] = 100*state_data.loc[0,b]/state_data.loc[0,'Total Population']

state_data_percent

Unnamed: 0,Under 5 years old,5 to 9 years,10 to 14 years,15 to 17 years,18 and 19 years,20 years,21 years,22 to 24 years,25 to 29 years,30 to 34 years,...,60 and 61 years,62 to 64 years,65 and 66 years,67 to 69 years,70 to 74 years,75 to 79 years,80 to 84 years,85 years and over,state,Total Population
0,6.106606,6.320033,6.909288,4.02057,2.688266,1.416985,1.369191,4.061237,7.801799,7.700526,...,2.448691,3.26487,1.974329,2.580227,3.465347,2.144628,1.387554,1.380452,CA,19714044.0
