## Create 2008 Age Census Demo CSV

In [1]:
import pandas as pd

In [2]:
# For comma-separated values
age_2008_df = pd.read_csv('../censusoutputcsv/2008_census_age.csv', sep=',')

In [3]:
# Display dataframe head
age_2008_df.head()

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years\t,25_to_34_years,35_to_44_years\t,45_to_54_years\t,55_to_59_years,60_to_64_years\t,65_to_74_years,75_to_84_years,85_years_and_over\t,median_age(years)\t,state_id,congressional_district,year
0,Oregon,49.8,50.2,5.9,14.7,15.3,15.0,6.7,5.1,5.6,3.3,1.6,36.7,41,1,2008
1,Oregon,49.2,50.8,6.2,13.0,12.3,14.6,6.8,6.2,8.3,5.8,2.2,39.7,41,2,2008
2,Oregon,49.2,50.8,6.1,14.5,15.5,15.7,6.7,5.1,5.5,3.1,1.6,36.9,41,3,2008
3,Oregon,48.6,51.4,7.0,13.3,11.9,14.6,7.2,6.1,8.5,5.5,2.5,40.2,41,4,2008
4,Oregon,49.5,50.5,7.1,13.7,12.4,14.5,7.1,6.0,6.7,4.0,2.6,37.5,41,5,2008


In [4]:
# Remove unnecessary spaces
age_2008_df.columns = age_2008_df.columns.str.strip()

In [5]:
# Display dataframe head to maake sure spaces were removed
age_2008_df.head()

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),state_id,congressional_district,year
0,Oregon,49.8,50.2,5.9,14.7,15.3,15.0,6.7,5.1,5.6,3.3,1.6,36.7,41,1,2008
1,Oregon,49.2,50.8,6.2,13.0,12.3,14.6,6.8,6.2,8.3,5.8,2.2,39.7,41,2,2008
2,Oregon,49.2,50.8,6.1,14.5,15.5,15.7,6.7,5.1,5.5,3.1,1.6,36.9,41,3,2008
3,Oregon,48.6,51.4,7.0,13.3,11.9,14.6,7.2,6.1,8.5,5.5,2.5,40.2,41,4,2008
4,Oregon,49.5,50.5,7.1,13.7,12.4,14.5,7.1,6.0,6.7,4.0,2.6,37.5,41,5,2008


In [6]:
# Check dataframe columns
age_2008_df.columns

Index(['state', '18_and_over_population_male', '18_and_over_population_female',
       '20_to_24_years', '25_to_34_years', '35_to_44_years', '45_to_54_years',
       '55_to_59_years', '60_to_64_years', '65_to_74_years', '75_to_84_years',
       '85_years_and_over', 'median_age(years)', 'state_id',
       'congressional_district', 'year'],
      dtype='object')

In [7]:
# Convert the 'year' column to integer
age_2008_df['year'] = age_2008_df['year'].astype(int)

In [8]:
# Check dataframe types
age_2008_df.dtypes

state                             object
18_and_over_population_male      float64
18_and_over_population_female    float64
20_to_24_years                   float64
25_to_34_years                   float64
35_to_44_years                   float64
45_to_54_years                   float64
55_to_59_years                   float64
60_to_64_years                   float64
65_to_74_years                   float64
75_to_84_years                   float64
85_years_and_over                float64
median_age(years)                float64
state_id                           int64
congressional_district             int64
year                               int64
dtype: object

In [9]:
# Drop 'state_id' and 'congressional_district' columns
age_2008_df = age_2008_df.drop(columns=['state_id', 'congressional_district'])

# Display the updated DataFrame
age_2008_df

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Oregon,49.8,50.2,5.9,14.7,15.3,15.0,6.7,5.1,5.6,3.3,1.6,36.7,2008
1,Oregon,49.2,50.8,6.2,13.0,12.3,14.6,6.8,6.2,8.3,5.8,2.2,39.7,2008
2,Oregon,49.2,50.8,6.1,14.5,15.5,15.7,6.7,5.1,5.5,3.1,1.6,36.9,2008
3,Oregon,48.6,51.4,7.0,13.3,11.9,14.6,7.2,6.1,8.5,5.5,2.5,40.2,2008
4,Oregon,49.5,50.5,7.1,13.7,12.4,14.5,7.1,6.0,6.7,4.0,2.6,37.5,2008
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,Oklahoma,48.2,51.8,6.5,13.7,13.3,14.6,6.2,4.9,6.6,4.1,1.7,36.3,2008
433,Oklahoma,48.7,51.3,7.0,11.9,12.4,13.6,6.2,5.8,8.5,5.0,2.1,38.3,2008
434,Oklahoma,49.6,50.4,8.0,12.9,12.1,14.1,6.1,5.3,7.4,4.8,2.1,36.7,2008
435,Oklahoma,49.1,50.9,7.5,13.5,13.3,14.5,5.9,4.8,6.8,3.9,1.6,35.5,2008


In [10]:
# Check for null values in the entire DataFrame
null_values = age_2008_df.isnull().sum()

# Display columns with null values
print(null_values[null_values > 0])


Series([], dtype: int64)


In [11]:
# List unique state values
print(age_2008_df['state'].unique())

[' Oregon' ' Pennsylvania' ' Rhode Island' ' South Carolina'
 ' South Dakota' ' Tennessee' ' Texas' ' Utah' ' Vermont' ' Virginia'
 ' Washington' ' West Virginia' ' Wisconsin' ' Wyoming' ' Puerto Rico'
 ' Alabama' ' Alaska' ' Arizona' ' Arkansas' ' California' ' Colorado'
 ' Connecticut' ' Delaware' ' District of Columbia' ' Florida' ' Georgia'
 ' Hawaii' ' Idaho' ' Illinois' ' Indiana' ' Iowa' ' Kansas' ' Kentucky'
 ' Louisiana' ' Maine' ' Maryland' ' Massachusetts' ' Michigan'
 ' Minnesota' ' Mississippi' ' Missouri' ' Montana' ' Nebraska' ' Nevada'
 ' New Hampshire' ' New Jersey' ' New Mexico' ' New York'
 ' North Carolina' ' North Dakota' ' Ohio' ' Oklahoma']


In [12]:
# Group by 'State' and calculate the median of 'Median_Age' to reduce rows 
state_medians = age_2008_df.groupby('state')['median_age(years)'].median().reset_index()

state_medians['median_age(years)'] = state_medians['median_age(years)'].astype(int)

# Rename the columns if desired
state_medians.columns = ['state', 'median_age(years)']

# Display the results
print(state_medians)

                    state  median_age(years)
0                 Alabama                 37
1                  Alaska                 33
2                 Arizona                 36
3                Arkansas                 37
4              California                 34
5                Colorado                 35
6             Connecticut                 39
7                Delaware                 38
8    District of Columbia                 34
9                 Florida                 40
10                Georgia                 34
11                 Hawaii                 38
12                  Idaho                 34
13               Illinois                 36
14                Indiana                 36
15                   Iowa                 38
16                 Kansas                 35
17               Kentucky                 38
18              Louisiana                 35
19                  Maine                 41
20               Maryland                 37
21        

In [13]:

# List of columns with percentage data to convert to fractions for averaging
columns_to_convert = [
    '18_and_over_population_male',
    '18_and_over_population_female',
    '20_to_24_years',
    '25_to_34_years',
    '35_to_44_years',
    '45_to_54_years',
    '55_to_59_years',
    '60_to_64_years',
    '65_to_74_years',
    '75_to_84_years',
    '85_years_and_over',
]

# Convert the percentage columns to fractions by dividing by 100
age_2008_df[columns_to_convert] = age_2008_df[columns_to_convert] / 100

# Group by 'state' and calculate the average of all numerical columns
state_averages2008_df = age_2008_df.groupby('state').mean().reset_index()

# Convert the specific columns back to percentages by multiplying by 100
state_averages2008_df[columns_to_convert] = state_averages2008_df[columns_to_convert] * 100

# Display the results without percentage symbols
state_averages2008_df.head()


Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,47.557143,52.442857,6.928571,12.842857,13.414286,14.385714,6.185714,5.428571,7.385714,4.542857,1.785714,37.371429,2008.0
1,Alaska,52.3,47.7,8.4,14.4,14.0,15.3,6.8,4.3,4.5,2.1,0.5,33.0,2008.0
2,Arizona,49.8625,50.1375,6.6375,14.375,13.425,13.1875,5.6125,4.8,6.775,4.65,1.675,35.45,2008.0
3,Arkansas,48.15,51.85,6.625,12.775,13.175,13.9,5.875,5.725,7.6,4.875,1.775,37.325,2008.0
4,California,49.622642,50.377358,7.379245,14.177358,14.660377,14.045283,5.571698,4.449057,5.79434,3.798113,1.59434,34.930189,2008.0


In [14]:
print("\nstate_averages:")
state_averages2008_df.head()


state_averages:


Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,47.557143,52.442857,6.928571,12.842857,13.414286,14.385714,6.185714,5.428571,7.385714,4.542857,1.785714,37.371429,2008.0
1,Alaska,52.3,47.7,8.4,14.4,14.0,15.3,6.8,4.3,4.5,2.1,0.5,33.0,2008.0
2,Arizona,49.8625,50.1375,6.6375,14.375,13.425,13.1875,5.6125,4.8,6.775,4.65,1.675,35.45,2008.0
3,Arkansas,48.15,51.85,6.625,12.775,13.175,13.9,5.875,5.725,7.6,4.875,1.775,37.325,2008.0
4,California,49.622642,50.377358,7.379245,14.177358,14.660377,14.045283,5.571698,4.449057,5.79434,3.798113,1.59434,34.930189,2008.0


In [23]:
# Convert the 'year' column to integer
state_averages2008_df['year'] = state_averages2008_df['year'].astype(int)
state_averages2008_df['median_age(years)'] = state_averages2008_df['median_age(years)'].astype(int)

state_averages2008_df.head()

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,47.557143,52.442857,6.928571,12.842857,13.414286,14.385714,6.185714,5.428571,7.385714,4.542857,1.785714,37,2008
1,Alaska,52.3,47.7,8.4,14.4,14.0,15.3,6.8,4.3,4.5,2.1,0.5,33,2008
2,Arizona,49.8625,50.1375,6.6375,14.375,13.425,13.1875,5.6125,4.8,6.775,4.65,1.675,35,2008
3,Arkansas,48.15,51.85,6.625,12.775,13.175,13.9,5.875,5.725,7.6,4.875,1.775,37,2008
4,California,49.622642,50.377358,7.379245,14.177358,14.660377,14.045283,5.571698,4.449057,5.79434,3.798113,1.59434,34,2008


In [24]:
state_averages2008_df.dtypes

state                             object
18_and_over_population_male      float64
18_and_over_population_female    float64
20_to_24_years                   float64
25_to_34_years                   float64
35_to_44_years                   float64
45_to_54_years                   float64
55_to_59_years                   float64
60_to_64_years                   float64
65_to_74_years                   float64
75_to_84_years                   float64
85_years_and_over                float64
median_age(years)                  int64
year                               int64
dtype: object

In [25]:
state_averages2008_df.dtypes

state                             object
18_and_over_population_male      float64
18_and_over_population_female    float64
20_to_24_years                   float64
25_to_34_years                   float64
35_to_44_years                   float64
45_to_54_years                   float64
55_to_59_years                   float64
60_to_64_years                   float64
65_to_74_years                   float64
75_to_84_years                   float64
85_years_and_over                float64
median_age(years)                  int64
year                               int64
dtype: object

In [26]:
state_averages2008_df.head()

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,47.557143,52.442857,6.928571,12.842857,13.414286,14.385714,6.185714,5.428571,7.385714,4.542857,1.785714,37,2008
1,Alaska,52.3,47.7,8.4,14.4,14.0,15.3,6.8,4.3,4.5,2.1,0.5,33,2008
2,Arizona,49.8625,50.1375,6.6375,14.375,13.425,13.1875,5.6125,4.8,6.775,4.65,1.675,35,2008
3,Arkansas,48.15,51.85,6.625,12.775,13.175,13.9,5.875,5.725,7.6,4.875,1.775,37,2008
4,California,49.622642,50.377358,7.379245,14.177358,14.660377,14.045283,5.571698,4.449057,5.79434,3.798113,1.59434,34,2008


In [27]:
# Export DataFrame as cleaned csv
state_averages2008_df.to_csv('2008_Census_Age_et.csv', index=False)

## Create 2012 Age Census Demo CSV

In [28]:
# For comma-separated values
age_2012_df = pd.read_csv('../censusoutputcsv/2012_census_age.csv', sep=',')

In [29]:
# Display dataframe head
age_2012_df.head()

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years\t,25_to_34_years,35_to_44_years\t,45_to_54_years\t,55_to_59_years,60_to_64_years\t,65_to_74_years,75_to_84_years,85_years_and_over\t,median_age(years)\t,state_id,congressional_district,year
0,Virginia,49.3,50.7,9.2,15.9,12.1,14.2,6.4,5.2,7.0,3.6,1.5,35.0,51,2,2012
1,Virginia,47.5,52.5,10.4,16.2,11.7,12.8,6.2,5.1,6.2,3.5,1.6,32.9,51,3,2012
2,Virginia,48.9,51.1,6.6,12.0,13.8,16.1,7.0,5.7,7.1,3.6,1.4,38.5,51,4,2012
3,Virginia,47.6,52.4,7.3,11.2,11.8,14.3,7.0,7.1,10.4,5.1,2.0,41.7,51,5,2012
4,Virginia,47.8,52.2,8.4,12.1,11.8,13.8,6.8,6.3,8.7,5.2,2.3,39.2,51,6,2012


In [30]:
# Remove unnecessary spaces
age_2012_df.columns = age_2012_df.columns.str.strip()

In [31]:
# Check dataframe columns
age_2012_df.columns

Index(['state', '18_and_over_population_male', '18_and_over_population_female',
       '20_to_24_years', '25_to_34_years', '35_to_44_years', '45_to_54_years',
       '55_to_59_years', '60_to_64_years', '65_to_74_years', '75_to_84_years',
       '85_years_and_over', 'median_age(years)', 'state_id',
       'congressional_district', 'year'],
      dtype='object')

In [32]:
# Check dataframe types
age_2012_df.dtypes

state                             object
18_and_over_population_male      float64
18_and_over_population_female    float64
20_to_24_years                   float64
25_to_34_years                   float64
35_to_44_years                   float64
45_to_54_years                   float64
55_to_59_years                   float64
60_to_64_years                   float64
65_to_74_years                   float64
75_to_84_years                   float64
85_years_and_over                float64
median_age(years)                float64
state_id                           int64
congressional_district             int64
year                               int64
dtype: object

In [33]:
# Drop 'state_id' and 'congressional_district' columns
age_2012_df = age_2012_df.drop(columns=['state_id', 'congressional_district'])

# Display the updated DataFrame
age_2012_df

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Virginia,49.3,50.7,9.2,15.9,12.1,14.2,6.4,5.2,7.0,3.6,1.5,35.0,2012
1,Virginia,47.5,52.5,10.4,16.2,11.7,12.8,6.2,5.1,6.2,3.5,1.6,32.9,2012
2,Virginia,48.9,51.1,6.6,12.0,13.8,16.1,7.0,5.7,7.1,3.6,1.4,38.5,2012
3,Virginia,47.6,52.4,7.3,11.2,11.8,14.3,7.0,7.1,10.4,5.1,2.0,41.7,2012
4,Virginia,47.8,52.2,8.4,12.1,11.8,13.8,6.8,6.3,8.7,5.2,2.3,39.2,2012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,Minnesota,49.3,50.7,7.4,13.0,11.6,13.6,6.9,5.7,7.8,5.1,2.6,37.7,2012
433,Minnesota,48.5,51.5,4.9,13.0,13.2,15.9,7.4,6.4,7.4,4.0,1.8,39.7,2012
434,Minnesota,48.1,51.9,7.4,14.7,12.0,14.4,6.8,5.4,6.7,3.8,1.8,35.7,2012
435,Minnesota,50.4,49.6,6.3,13.2,14.4,15.5,6.4,4.8,6.1,3.0,1.3,36.0,2012


In [34]:
# Check for null values in the entire DataFrame
null_values = age_2012_df.isnull().sum()

# Display columns with null values
print(null_values[null_values > 0])

Series([], dtype: int64)


In [35]:
# Group by 'State' and calculate the median of 'Median_Age' to reduce rows 
state_medians_df = age_2012_df.groupby('state')['median_age(years)'].median().reset_index()

state_medians_df['median_age(years)'] = state_medians_df['median_age(years)'].astype(int)

# Rename the columns if desired
state_medians_df.columns = ['state', 'median_age(years)']

# Display the results
print(state_medians_df)

                    state  median_age(years)
0                 Alabama                 38
1                  Alaska                 33
2                 Arizona                 37
3                Arkansas                 37
4              California                 35
5                Colorado                 36
6             Connecticut                 40
7                Delaware                 39
8    District of Columbia                 33
9                 Florida                 40
10                Georgia                 35
11                 Hawaii                 38
12                  Idaho                 35
13               Illinois                 37
14                Indiana                 37
15                   Iowa                 38
16                 Kansas                 35
17               Kentucky                 38
18              Louisiana                 35
19                  Maine                 43
20               Maryland                 37
21        

In [36]:
state_medians_df.columns

Index(['state', 'median_age(years)'], dtype='object')

In [37]:
# List of columns with percentage data to convert to fractions for averaging
columns_to_convert = [
    '18_and_over_population_male',
    '18_and_over_population_female',
    '20_to_24_years',
    '25_to_34_years',
    '35_to_44_years',
    '45_to_54_years',
    '55_to_59_years',
    '60_to_64_years',
    '65_to_74_years',
    '75_to_84_years',
    '85_years_and_over',
]

# Convert the percentage columns to fractions by dividing by 100
age_2012_df[columns_to_convert] = age_2012_df[columns_to_convert] / 100

# Group by 'state' and calculate the average of all numerical columns
state_averages2012_df = age_2012_df.groupby('state').mean().reset_index()

# Convert the specific columns back to percentages by multiplying by 100
state_averages2012_df[columns_to_convert] = state_averages2012_df[columns_to_convert] * 100

# Display the results without percentage symbols
state_averages2012_df.head()


Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,47.642857,52.357143,7.142857,12.628571,12.785714,13.9,6.728571,5.985714,8.4,4.485714,1.671429,38.2,2012.0
1,Alaska,52.2,47.8,8.2,14.9,13.1,14.2,7.2,5.6,5.5,2.2,0.8,33.8,2012.0
2,Arizona,49.288889,50.711111,7.222222,13.344444,12.655556,12.777778,6.0,5.688889,8.533333,4.555556,1.733333,37.055556,2012.0
3,Arkansas,48.35,51.65,7.0,12.825,12.5,13.425,6.55,5.775,8.6,4.65,1.725,37.825,2012.0
4,California,49.284906,50.715094,7.673585,14.392453,13.607547,13.745283,6.133962,5.184906,6.679245,3.711321,1.711321,35.645283,2012.0


In [39]:
# Convert the 'year' column to integer
state_averages2012_df['year'] = state_averages2012_df['year'].astype(int)
state_averages2012_df['median_age(years)'] = state_averages2012_df['median_age(years)'].astype(int)

state_averages2012_df.head()

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,47.642857,52.357143,7.142857,12.628571,12.785714,13.9,6.728571,5.985714,8.4,4.485714,1.671429,38,2012
1,Alaska,52.2,47.8,8.2,14.9,13.1,14.2,7.2,5.6,5.5,2.2,0.8,33,2012
2,Arizona,49.288889,50.711111,7.222222,13.344444,12.655556,12.777778,6.0,5.688889,8.533333,4.555556,1.733333,37,2012
3,Arkansas,48.35,51.65,7.0,12.825,12.5,13.425,6.55,5.775,8.6,4.65,1.725,37,2012
4,California,49.284906,50.715094,7.673585,14.392453,13.607547,13.745283,6.133962,5.184906,6.679245,3.711321,1.711321,35,2012


In [40]:
# Export DataFrame as cleaned csv
state_averages2012_df.to_csv('2012_Census_Age_et.csv', index=False)

## Create 2016 Age Census Demo CSV

In [41]:
# For comma-separated values
age_2016_df = pd.read_csv('../censusoutputcsv/2016_census_age.csv', sep=',')

In [42]:
# Display dataframe head
age_2016_df.head()

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years\t,25_to_34_years,35_to_44_years\t,45_to_54_years\t,55_to_59_years,60_to_64_years\t,65_to_74_years,75_to_84_years,85_years_and_over\t,median_age(years)\t,state_id,congressional_district,year
0,Alabama,47.3,52.7,6.0,12.8,11.9,13.1,6.9,6.7,10.2,5.0,1.9,39.6,1,1,2016
1,Alabama,48.0,52.0,6.7,13.7,12.0,13.4,7.1,5.8,9.6,4.9,1.6,38.2,1,2,2016
2,Alabama,47.6,52.4,8.1,12.6,12.2,13.3,6.7,6.3,9.3,4.5,1.6,38.3,1,3,2016
3,Alabama,48.3,51.7,5.6,12.0,12.1,13.8,7.2,6.2,10.8,5.4,1.6,40.7,1,4,2016
4,Pennsylvania,44.4,55.6,9.1,18.6,11.2,11.4,6.3,5.7,7.9,4.1,2.0,34.1,42,2,2016


In [43]:
# Remove unnecessary spaces
age_2016_df.columns = age_2016_df.columns.str.strip()

In [44]:
# Check dataframe columns
age_2016_df.columns

Index(['state', '18_and_over_population_male', '18_and_over_population_female',
       '20_to_24_years', '25_to_34_years', '35_to_44_years', '45_to_54_years',
       '55_to_59_years', '60_to_64_years', '65_to_74_years', '75_to_84_years',
       '85_years_and_over', 'median_age(years)', 'state_id',
       'congressional_district', 'year'],
      dtype='object')

In [45]:
# Check dataframe types
age_2016_df.dtypes

state                             object
18_and_over_population_male      float64
18_and_over_population_female    float64
20_to_24_years                   float64
25_to_34_years                   float64
35_to_44_years                   float64
45_to_54_years                   float64
55_to_59_years                   float64
60_to_64_years                   float64
65_to_74_years                   float64
75_to_84_years                   float64
85_years_and_over                float64
median_age(years)                float64
state_id                           int64
congressional_district             int64
year                               int64
dtype: object

In [46]:
# Drop 'state_id' and 'congressional_district' columns
age_2016_df = age_2016_df.drop(columns=['state_id', 'congressional_district'])

# Display the updated DataFrame
age_2016_df

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,47.3,52.7,6.0,12.8,11.9,13.1,6.9,6.7,10.2,5.0,1.9,39.6,2016
1,Alabama,48.0,52.0,6.7,13.7,12.0,13.4,7.1,5.8,9.6,4.9,1.6,38.2,2016
2,Alabama,47.6,52.4,8.1,12.6,12.2,13.3,6.7,6.3,9.3,4.5,1.6,38.3,2016
3,Alabama,48.3,51.7,5.6,12.0,12.1,13.8,7.2,6.2,10.8,5.4,1.6,40.7,2016
4,Pennsylvania,44.4,55.6,9.1,18.6,11.2,11.4,6.3,5.7,7.9,4.1,2.0,34.1,2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,Michigan,47.0,53.0,6.9,13.7,12.4,13.3,6.8,5.8,8.6,4.3,2.5,37.7,2016
433,Minnesota,49.4,50.6,7.7,12.6,11.5,12.3,7.3,6.2,8.8,5.2,2.7,38.3,2016
434,Minnesota,48.1,51.9,4.5,12.9,12.7,14.6,7.5,6.8,8.6,4.2,1.9,40.1,2016
435,Minnesota,50.1,49.9,6.5,12.2,13.6,14.5,7.1,5.6,7.5,3.3,1.5,37.1,2016


In [47]:
# Check for null values in the entire DataFrame
null_values = age_2016_df.isnull().sum()

# Display columns with null values
print(null_values[null_values > 0])

Series([], dtype: int64)


In [48]:
# Group by 'State' and calculate the median of 'Median_Age' to reduce rows 
state_medians_df = age_2016_df.groupby('state')['median_age(years)'].median().reset_index()

state_medians_df['median_age(years)'] = state_medians_df['median_age(years)'].astype(int)

# Rename the columns if desired
state_medians_df.columns = ['state', 'median_age(years)']

# Display the results
print(state_medians_df)

                    state  median_age(years)
0                 Alabama                 39
1                  Alaska                 33
2                 Arizona                 37
3                Arkansas                 38
4              California                 36
5                Colorado                 36
6             Connecticut                 40
7                Delaware                 40
8    District of Columbia                 33
9                 Florida                 41
10                Georgia                 36
11                 Hawaii                 38
12                  Idaho                 36
13               Illinois                 38
14                Indiana                 38
15                   Iowa                 38
16                 Kansas                 36
17               Kentucky                 38
18              Louisiana                 36
19                  Maine                 44
20               Maryland                 38
21        

In [49]:
# List of columns with percentage data to convert to fractions for averaging
columns_to_convert = [
    '18_and_over_population_male',
    '18_and_over_population_female',
    '20_to_24_years',
    '25_to_34_years',
    '35_to_44_years',
    '45_to_54_years',
    '55_to_59_years',
    '60_to_64_years',
    '65_to_74_years',
    '75_to_84_years',
    '85_years_and_over',
]

# Convert the percentage columns to fractions by dividing by 100
age_2016_df[columns_to_convert] = age_2016_df[columns_to_convert] / 100

# Group by 'state' and calculate the average of all numerical columns
state_averages2016_df = age_2016_df.groupby('state').mean().reset_index()

# Convert the specific columns back to percentages by multiplying by 100
state_averages2016_df[columns_to_convert] = state_averages2016_df[columns_to_convert] * 100

# Display the results without percentage symbols
state_averages2016_df.head()


Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,47.585714,52.414286,6.7,12.942857,12.285714,13.271429,7.014286,6.214286,9.557143,4.771429,1.771429,38.914286,2016.0
1,Alaska,52.5,47.5,7.7,16.4,12.5,12.6,7.3,5.6,7.0,2.3,0.9,33.5,2016.0
2,Arizona,49.277778,50.722222,6.9,13.366667,12.233333,12.2,6.0,6.044444,9.933333,5.188889,1.866667,38.277778,2016.0
3,Arkansas,48.425,51.575,6.5,12.75,12.2,12.7,6.55,6.15,9.55,4.875,1.9,38.225,2016.0
4,California,49.245283,50.754717,7.154717,15.024528,13.154717,13.243396,6.330189,5.624528,7.888679,3.930189,1.830189,36.515094,2016.0


In [50]:
# Convert the 'year' column to integer
state_averages2016_df['year'] = state_averages2016_df['year'].astype(int)
state_averages2016_df['median_age(years)'] = state_averages2016_df['median_age(years)'].astype(int)

state_averages2016_df.head()

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,47.585714,52.414286,6.7,12.942857,12.285714,13.271429,7.014286,6.214286,9.557143,4.771429,1.771429,38,2016
1,Alaska,52.5,47.5,7.7,16.4,12.5,12.6,7.3,5.6,7.0,2.3,0.9,33,2016
2,Arizona,49.277778,50.722222,6.9,13.366667,12.233333,12.2,6.0,6.044444,9.933333,5.188889,1.866667,38,2016
3,Arkansas,48.425,51.575,6.5,12.75,12.2,12.7,6.55,6.15,9.55,4.875,1.9,38,2016
4,California,49.245283,50.754717,7.154717,15.024528,13.154717,13.243396,6.330189,5.624528,7.888679,3.930189,1.830189,36,2016


In [51]:
# Export DataFrame as cleaned csv
state_averages2016_df.to_csv('2016_Census_Age_et.csv', index=False)

## Create 2020 Age Census Demo CSV

In [3]:
# For comma-separated values
age_2020_df = pd.read_csv('../censusoutputcsv/2020_census_age.csv', sep=',')

In [53]:
# Display dataframe head
age_2020_df.head()

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years\t,25_to_29_years,30_to_34_years,35_to_39_years,40_to_44_years,45_to_49_years\t,50_to_54_years\t,...,60_to_64_years,65_to_69_years,70_to_74_years,75_to_79_years,80_to_84_years,85_years_and_over,median_age(years),state_id,congressional_district,year
0,Alabama,76.4,78.9,6.2,6.0,6.0,6.1,5.9,6.1,6.2,...,7.0,6.2,5.2,3.5,2.2,1.8,40.8,1,1,2020
1,Alabama,76.5,79.1,6.3,6.2,6.3,6.3,6.0,6.3,6.3,...,6.7,5.8,4.9,3.3,2.1,1.9,40.1,1,2,2020
2,Alabama,77.2,79.6,8.2,6.2,6.0,6.0,5.8,6.2,6.3,...,6.6,5.7,4.8,3.1,1.9,1.6,39.0,1,3,2020
3,Alabama,76.2,78.3,5.7,5.7,5.8,5.8,6.0,6.4,6.6,...,6.8,6.0,5.3,3.6,2.3,1.8,41.3,1,4,2020
4,Alabama,77.3,79.2,6.8,6.4,6.3,6.3,5.9,6.2,6.6,...,6.7,5.4,4.5,3.1,2.1,1.8,39.7,1,5,2020


In [54]:
# Remove unnecessary spaces
age_2020_df.columns = age_2020_df.columns.str.strip()

In [55]:
# Check dataframe columns
age_2020_df.columns

Index(['state', '18_and_over_population_male', '18_and_over_population_female',
       '20_to_24_years', '25_to_29_years', '30_to_34_years', '35_to_39_years',
       '40_to_44_years', '45_to_49_years', '50_to_54_years', '55_to_59_years',
       '60_to_64_years', '65_to_69_years', '70_to_74_years', '75_to_79_years',
       '80_to_84_years', '85_years_and_over', 'median_age(years)', 'state_id',
       'congressional_district', 'year'],
      dtype='object')

In [56]:
# Check dataframe types
age_2020_df.dtypes

state                             object
18_and_over_population_male      float64
18_and_over_population_female    float64
20_to_24_years                   float64
25_to_29_years                   float64
30_to_34_years                   float64
35_to_39_years                   float64
40_to_44_years                   float64
45_to_49_years                   float64
50_to_54_years                   float64
55_to_59_years                   float64
60_to_64_years                   float64
65_to_69_years                   float64
70_to_74_years                   float64
75_to_79_years                   float64
80_to_84_years                   float64
85_years_and_over                float64
median_age(years)                float64
state_id                           int64
congressional_district             int64
year                               int64
dtype: object

In [57]:
# Drop 'state_id' and 'congressional_district' columns
age_2020_df = age_2020_df.drop(columns=['state_id', 'congressional_district'])

# Display the updated DataFrame
age_2020_df

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_29_years,30_to_34_years,35_to_39_years,40_to_44_years,45_to_49_years,50_to_54_years,55_to_59_years,60_to_64_years,65_to_69_years,70_to_74_years,75_to_79_years,80_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,76.4,78.9,6.2,6.0,6.0,6.1,5.9,6.1,6.2,6.9,7.0,6.2,5.2,3.5,2.2,1.8,40.8,2020
1,Alabama,76.5,79.1,6.3,6.2,6.3,6.3,6.0,6.3,6.3,6.9,6.7,5.8,4.9,3.3,2.1,1.9,40.1,2020
2,Alabama,77.2,79.6,8.2,6.2,6.0,6.0,5.8,6.2,6.3,6.7,6.6,5.7,4.8,3.1,1.9,1.6,39.0,2020
3,Alabama,76.2,78.3,5.7,5.7,5.8,5.8,6.0,6.4,6.6,7.0,6.8,6.0,5.3,3.6,2.3,1.8,41.3,2020
4,Alabama,77.3,79.2,6.8,6.4,6.3,6.3,5.9,6.2,6.6,7.3,6.7,5.4,4.5,3.1,2.1,1.8,39.7,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,Wisconsin,78.7,79.5,5.9,5.7,6.1,6.3,5.8,5.8,6.6,7.6,7.5,6.4,5.0,3.4,2.3,2.4,42.3,2020
433,Wisconsin,78.4,79.2,4.6,5.0,5.5,5.9,5.5,5.7,6.5,8.0,8.2,7.2,5.7,3.9,2.6,2.5,45.2,2020
434,Wisconsin,77.1,78.2,5.5,5.8,6.2,6.3,6.0,5.8,6.6,7.6,7.1,6.1,4.7,3.2,2.1,2.0,41.0,2020
435,Wyoming,76.1,76.9,6.1,6.1,6.6,6.8,6.1,5.6,5.4,6.4,7.1,6.3,4.7,3.1,1.9,1.7,38.7,2020


In [65]:
age_2020_df["25_to_34_years"]= age_2020_df["25_to_29_years"] + age_2020_df["30_to_34_years"]
age_2020_df["35_to_44_years"]= age_2020_df["35_to_39_years"] + age_2020_df["40_to_44_years"]
age_2020_df["45_to_54_years"]= age_2020_df["45_to_49_years"] + age_2020_df["50_to_54_years"]
age_2020_df["65_to_74_years"]= age_2020_df["65_to_69_years"] + age_2020_df["70_to_74_years"]
age_2020_df["75_to_84_years"]= age_2020_df["75_to_79_years"] + age_2020_df["80_to_84_years"]

age_2020_df.head()

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_29_years,30_to_34_years,35_to_39_years,40_to_44_years,45_to_49_years,50_to_54_years,...,75_to_79_years,80_to_84_years,85_years_and_over,median_age(years),year,25_to_34_years,35_to_44_years,45_to_54_years,65_to_74_years,75_to_84_years
0,Alabama,0.764,0.789,0.062,0.06,0.06,0.061,0.059,0.061,0.062,...,0.035,0.022,0.018,40.8,2020,0.12,0.12,0.123,0.114,0.057
1,Alabama,0.765,0.791,0.063,0.062,0.063,0.063,0.06,0.063,0.063,...,0.033,0.021,0.019,40.1,2020,0.125,0.123,0.126,0.107,0.054
2,Alabama,0.772,0.796,0.082,0.062,0.06,0.06,0.058,0.062,0.063,...,0.031,0.019,0.016,39.0,2020,0.122,0.118,0.125,0.105,0.05
3,Alabama,0.762,0.783,0.057,0.057,0.058,0.058,0.06,0.064,0.066,...,0.036,0.023,0.018,41.3,2020,0.115,0.118,0.13,0.113,0.059
4,Alabama,0.773,0.792,0.068,0.064,0.063,0.063,0.059,0.062,0.066,...,0.031,0.021,0.018,39.7,2020,0.127,0.122,0.128,0.099,0.052


In [66]:
age_2020_df.columns

Index(['state', '18_and_over_population_male', '18_and_over_population_female',
       '20_to_24_years', '25_to_29_years', '30_to_34_years', '35_to_39_years',
       '40_to_44_years', '45_to_49_years', '50_to_54_years', '55_to_59_years',
       '60_to_64_years', '65_to_69_years', '70_to_74_years', '75_to_79_years',
       '80_to_84_years', '85_years_and_over', 'median_age(years)', 'year',
       '25_to_34_years', '35_to_44_years', '45_to_54_years', '65_to_74_years',
       '75_to_84_years'],
      dtype='object')

In [77]:
# List of columns to keep in the desired order
desired_columns = [
    'state',
    '18_and_over_population_male',
    '18_and_over_population_female',
    '20_to_24_years',
    '25_to_34_years',
    '35_to_44_years',
    '45_to_54_years',
    '55_to_59_years',
    '60_to_64_years',
    '65_to_74_years',
    '75_to_84_years',
    '85_years_and_over',
    'median_age(years)',
    'year'
]

# Drop columns not in the desired list
columns_to_drop = [col for col in age_2020_df.columns if col not in desired_columns]

# Drop the unwanted columns
age_2020_df_cleaned = age_2020_df.drop(columns=columns_to_drop)

# Reorder the columns to match the desired index
age_2020_df_cleaned = age_2020_df_cleaned[desired_columns]

# Display the cleaned DataFrame
age_2020_df_cleaned.head()


Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,0.00764,0.00789,0.00062,0.12,0.12,0.123,0.00069,0.0007,0.114,0.057,0.00018,40.8,2020
1,Alabama,0.00765,0.00791,0.00063,0.125,0.123,0.126,0.00069,0.00067,0.107,0.054,0.00019,40.1,2020
2,Alabama,0.00772,0.00796,0.00082,0.122,0.118,0.125,0.00067,0.00066,0.105,0.05,0.00016,39.0,2020
3,Alabama,0.00762,0.00783,0.00057,0.115,0.118,0.13,0.0007,0.00068,0.113,0.059,0.00018,41.3,2020
4,Alabama,0.00773,0.00792,0.00068,0.127,0.122,0.128,0.00073,0.00067,0.099,0.052,0.00018,39.7,2020


In [78]:
# Check for null values in the entire DataFrame
null_values = age_2020_df_cleaned.isnull().sum()

# Display columns with null values
print(null_values[null_values > 0])

Series([], dtype: int64)


In [79]:
# Group by 'State' and calculate the median of 'Median_Age' to reduce rows 
state_med_df = age_2020_df.groupby('state')['median_age(years)'].median().reset_index()

state_med_df['median_age(years)'] = state_med_df['median_age(years)'].astype(int)

# Rename the columns if desired
state_med_df.columns = ['state', 'median_age(years)']

# Display the results
print(state_med_df)

                    state  median_age(years)
0                 Alabama                 39
1                  Alaska                 35
2                 Arizona                 39
3                Arkansas                 39
4              California                 37
5                Colorado                 36
6             Connecticut                 41
7                Delaware                 41
8    District of Columbia                 33
9                 Florida                 42
10                Georgia                 37
11                 Hawaii                 40
12                  Idaho                 36
13               Illinois                 39
14                Indiana                 38
15                   Iowa                 39
16                 Kansas                 37
17               Kentucky                 39
18              Louisiana                 38
19                  Maine                 45
20               Maryland                 38
21        

In [80]:
# List of columns with percentage data to convert to fractions for averaging
columns_to_convert = [
    '18_and_over_population_male',
    '18_and_over_population_female',
    '20_to_24_years',
    '25_to_34_years',
    '35_to_44_years',
    '45_to_54_years',
    '55_to_59_years',
    '60_to_64_years',
    '65_to_74_years',
    '75_to_84_years',
    '85_years_and_over',
]

# Convert the percentage columns to fractions by dividing by 100
age_2020_df_cleaned[columns_to_convert] = age_2020_df_cleaned[columns_to_convert] / 100

# Group by 'state' and calculate the average of all numerical columns
state_averages2020_df = age_2020_df_cleaned.groupby('state').mean().reset_index()

# Convert the specific columns back to percentages by multiplying by 100
state_averages2020_df[columns_to_convert] = state_averages2020_df[columns_to_convert] * 100

# Display the results without percentage symbols
state_averages2020_df.head()

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,0.007669,0.007916,0.000691,0.124286,0.121,0.125,0.000683,0.00067,0.105857,0.052286,0.000179,39.642857,2020.0
1,Alaska,0.00757,0.00753,0.00067,0.153,0.135,0.116,0.00066,0.00064,0.089,0.032,9e-05,35.6,2020.0
2,Arizona,0.007681,0.007821,0.000671,0.131444,0.120667,0.116111,0.000623,0.000627,0.110444,0.057556,0.000187,39.511111,2020.0
3,Arkansas,0.00758,0.007785,0.000652,0.125,0.12275,0.1195,0.00067,0.000643,0.10475,0.054,0.000185,39.1,2020.0
4,California,0.007724,0.007872,0.000692,0.147547,0.13466,0.125906,0.000637,0.000594,0.089679,0.044132,0.000183,37.590566,2020.0


In [81]:
# Convert the 'year' column to integer
state_averages2020_df['year'] = state_averages2020_df['year'].astype(int)
state_averages2020_df['median_age(years)'] = state_averages2020_df['median_age(years)'].astype(int)

state_averages2020_df.head()

Unnamed: 0,state,18_and_over_population_male,18_and_over_population_female,20_to_24_years,25_to_34_years,35_to_44_years,45_to_54_years,55_to_59_years,60_to_64_years,65_to_74_years,75_to_84_years,85_years_and_over,median_age(years),year
0,Alabama,0.007669,0.007916,0.000691,0.124286,0.121,0.125,0.000683,0.00067,0.105857,0.052286,0.000179,39,2020
1,Alaska,0.00757,0.00753,0.00067,0.153,0.135,0.116,0.00066,0.00064,0.089,0.032,9e-05,35,2020
2,Arizona,0.007681,0.007821,0.000671,0.131444,0.120667,0.116111,0.000623,0.000627,0.110444,0.057556,0.000187,39,2020
3,Arkansas,0.00758,0.007785,0.000652,0.125,0.12275,0.1195,0.00067,0.000643,0.10475,0.054,0.000185,39,2020
4,California,0.007724,0.007872,0.000692,0.147547,0.13466,0.125906,0.000637,0.000594,0.089679,0.044132,0.000183,37,2020


In [82]:
state_averages2020_df.columns

Index(['state', '18_and_over_population_male', '18_and_over_population_female',
       '20_to_24_years', '25_to_34_years', '35_to_44_years', '45_to_54_years',
       '55_to_59_years', '60_to_64_years', '65_to_74_years', '75_to_84_years',
       '85_years_and_over', 'median_age(years)', 'year'],
      dtype='object')

In [83]:
# Export DataFrame as cleaned csv
state_averages2020_df.to_csv('2020_Census_Age_et.csv', index=False)

In [87]:
# Merge the DataFrames on 'state'
merged_2008_2012 = pd.merge(state_averages2008_df, state_averages2012_df, on='state', how='outer', suffixes=('_2008', '_2012'))
merged_2016_2020 = pd.merge(state_averages2016_df, state_averages2020_df, on='state', how='outer', suffixes=('_2016', '_2020'))

# Merge the results to combine all years
combined_years_df = pd.merge(merged_2008_2012, merged_2016_2020, on='state', how='outer')

# Read the 2022 DataFrame from CSV
state_averages2022_df = pd.read_csv('../censusoutputcsv/2022_Census_Age_et.csv')

# Rename columns in the 2022 DataFrame
columns_to_rename = {
    '18_and_over_population_male': '18_and_over_population_male_2022',
    '18_and_over_population_female': '18_and_over_population_female_2022',
    '20_to_24_years': '20_to_24_years_2022',
    '25_to_34_years': '25_to_34_years_2022',
    '35_to_44_years': '35_to_44_years_2022',
    '45_to_54_years': '45_to_54_years_2022',
    '55_to_59_years': '55_to_59_years_2022',
    '60_to_64_years': '60_to_64_years_2022',
    '65_to_74_years': '65_to_74_years_2022',
    '75_to_84_years': '75_to_84_years_2022',
    '85_years_and_over': '85_years_and_over_2022',
    'median_age(years)': 'median_age(years)_2022',
    'year': 'year_2022'
}

state_averages2022_df.rename(columns=columns_to_rename, inplace=True)


# Merge the combined DataFrame with the 2022 DataFrame on 'state'
final_merged_df = pd.merge(combined_years_df, state_averages2022_df, on='state', how='outer')

# Export the final merged DataFrame to a new CSV file
final_merged_df.to_csv('age_combined_census_data.csv', index=False)

# Display the first few rows of the final merged DataFrame
final_merged_df.head()

Unnamed: 0,state,18_and_over_population_male_2008,18_and_over_population_female_2008,20_to_24_years_2008,25_to_34_years_2008,35_to_44_years_2008,45_to_54_years_2008,55_to_59_years_2008,60_to_64_years_2008,65_to_74_years_2008,...,25_to_34_years_2022,35_to_44_years_2022,45_to_54_years_2022,55_to_59_years_2022,60_to_64_years_2022,65_to_74_years_2022,75_to_84_years_2022,85_years_and_over_2022,median_age(years)_2022,year_2022
0,Alabama,47.557143,52.442857,6.928571,12.842857,13.414286,14.385714,6.185714,5.428571,7.385714,...,12.86,12.47,12.17,6.27,6.73,10.77,5.57,1.69,39,2022
1,Alaska,52.3,47.7,8.4,14.4,14.0,15.3,6.8,4.3,4.5,...,15.5,14.3,11.2,5.9,5.9,9.4,3.7,0.8,35,2022
2,Arizona,49.8625,50.1375,6.6375,14.375,13.425,13.1875,5.6125,4.8,6.775,...,13.86,12.58,11.61,5.66,6.2,10.63,6.42,1.78,39,2022
3,Arkansas,48.15,51.85,6.625,12.775,13.175,13.9,5.875,5.725,7.6,...,12.62,12.7,11.95,6.0,6.6,10.38,5.58,1.78,39,2022
4,California,49.622642,50.377358,7.379245,14.177358,14.660377,14.045283,5.571698,4.449057,5.79434,...,14.74,13.83,12.51,6.04,5.98,9.14,4.82,1.83,37,2022


In [88]:
final_merged_df.columns

Index(['state', '18_and_over_population_male_2008',
       '18_and_over_population_female_2008', '20_to_24_years_2008',
       '25_to_34_years_2008', '35_to_44_years_2008', '45_to_54_years_2008',
       '55_to_59_years_2008', '60_to_64_years_2008', '65_to_74_years_2008',
       '75_to_84_years_2008', '85_years_and_over_2008',
       'median_age(years)_2008', 'year_2008',
       '18_and_over_population_male_2012',
       '18_and_over_population_female_2012', '20_to_24_years_2012',
       '25_to_34_years_2012', '35_to_44_years_2012', '45_to_54_years_2012',
       '55_to_59_years_2012', '60_to_64_years_2012', '65_to_74_years_2012',
       '75_to_84_years_2012', '85_years_and_over_2012',
       'median_age(years)_2012', 'year_2012',
       '18_and_over_population_male_2016',
       '18_and_over_population_female_2016', '20_to_24_years_2016',
       '25_to_34_years_2016', '35_to_44_years_2016', '45_to_54_years_2016',
       '55_to_59_years_2016', '60_to_64_years_2016', '65_to_74_years_2016'