# Disability Status Script
Run all of these code blocks in order.

### Import Statements

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### Function to create row for each year

In [4]:
def disability(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Convert percentage estimates to float
    df['Percentage'] = df['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else None)

    # For noninstitutionalized population under 18 years old
    under18 = df.iloc[2:4]
    # Move Total Population Segment from row to column
    under18['Total Civilian Noninstitutionalized Population Under 18'] = int(under18.iloc[0]['Estimate'].replace(',',''))
    under18.dropna(inplace=True)
    # Drop unnecessary columns
    under18.drop(columns=['Estimate','Margin of Error'], inplace=True)
    # Calculate population estimates
    under18['Noninstitutionalized Under 18 Population w/ Disability Estimate'] = under18['Percentage']*under18['Total Civilian Noninstitutionalized Population Under 18']
    # Drop unnecessary columns
    under18.drop(columns=['Percentage'], inplace=True)
    # Change Label to Year
    under18['Label'] = year
    under18.rename(columns={'Label':'Year'}, inplace=True)

    # For noninstitutionalized population between 18-64 years old
    middle = df.iloc[4:6]
    # Move Total Population Segment from row to column
    middle['Total Civilian Noninstitutionalized Population 18-64'] = int(middle.iloc[0]['Estimate'].replace(',',''))
    middle.dropna(inplace=True)
    # Drop unnecessary columns
    middle.drop(columns=['Estimate','Margin of Error'], inplace=True)
    # Calculate population estimates
    middle['Noninstitutionalized 18-64 Population w/ Disability Estimate'] = middle['Percentage']*middle['Total Civilian Noninstitutionalized Population 18-64']
    # Drop unnecessary columns
    middle.drop(columns=['Percentage'], inplace=True)
    # Change Label to Year
    middle['Label'] = year
    middle.rename(columns={'Label':'Year'}, inplace=True)

    # For noninstitutionalized population 65 years and older
    above65 = df.iloc[6:]
    # Move Total Population Segment from row to column
    above65['Total Civilian Noninstitutionalized Population 65+'] = int(above65.iloc[0]['Estimate'].replace(',',''))
    above65.dropna(inplace=True)
    # Drop unnecessary columns
    above65.drop(columns=['Estimate','Margin of Error'], inplace=True)
    # Calculate population estimates
    above65['Noninstitutionalized 65+ Population w/ Disability Estimate'] = above65['Percentage']*above65['Total Civilian Noninstitutionalized Population 65+']
    # Drop unnecessary columns
    above65.drop(columns=['Percentage'], inplace=True)
    # Change Label to Year
    above65['Label'] = year
    above65.rename(columns={'Label':'Year'}, inplace=True)

    # Merge dataframes
    merged_df = pd.merge(under18, middle, on='Year')
    merged_df = pd.merge(merged_df, above65, on='Year')

    # Calculate extra columns
    merged_df['Total Civilian Noninstitutionalized Population'] = merged_df['Total Civilian Noninstitutionalized Population Under 18']+merged_df['Total Civilian Noninstitutionalized Population 18-64']+merged_df['Total Civilian Noninstitutionalized Population 65+']
    merged_df['Total Noninstitutionalized Population w/ Disability Estimate'] = merged_df['Noninstitutionalized Under 18 Population w/ Disability Estimate']+merged_df['Noninstitutionalized 18-64 Population w/ Disability Estimate']+merged_df['Noninstitutionalized 65+ Population w/ Disability Estimate']

    # Rename all columns with abbreviated underscored naming conventions
    merged_df.rename(columns={'Year':'year', 'Total Civilian Noninstitutionalized Population Under 18':'total_civ_noninst_population_under_18', 'Noninstitutionalized Under 18 Population w/ Disability Estimate':'noninst_under_18_population_with_disability', 'Total Civilian Noninstitutionalized Population 18-64':'total_civ_noninst_population_18_to_64', 'Noninstitutionalized 18-64 Population w/ Disability Estimate':'noninst_18_to_64_population_with_disability', 'Total Civilian Noninstitutionalized Population 65+':'total_civ_noninst_population_65_plus', 'Noninstitutionalized 65+ Population w/ Disability Estimate':'noninst_65_plus_population_with_disability', 'Total Civilian Noninstitutionalized Population':'total_civ_noninst_population', 'Total Noninstitutionalized Population w/ Disability Estimate':'total_noninst_population_with_disability'}, inplace=True)

    return merged_df

### Test that function works

In [5]:
disability('data/2015/DISABILITY_STATUS.CSV', 2015)

Unnamed: 0,year,total_civ_noninst_population_under_18,noninst_under_18_population_with_disability,total_civ_noninst_population_18_to_64,noninst_18_to_64_population_with_disability,total_civ_noninst_population_65_plus,noninst_65_plus_population_with_disability,total_civ_noninst_population,total_noninst_population_with_disability
0,2015,17650,405.95,51888,2801.952,12022,3390.204,81560,6598.106


### Test that function works for each year
Should return only "No file for year '2020'"

In [6]:
for year in range(2010,2023):
    file = 'data/'+str(year)+'/DISABILITY_STATUS.CSV'

    try:
        disability(file,year)
    except:
        print(f"No file for year '{year}'")

No file for year '2020'


### Run function for each year

In [7]:
DISABILITY_STATUS_2022 = disability('data/2022/DISABILITY_STATUS.CSV',2022)
DISABILITY_STATUS_2021 = disability('data/2021/DISABILITY_STATUS.CSV',2021)
DISABILITY_STATUS_2019 = disability('data/2019/DISABILITY_STATUS.CSV',2019)
DISABILITY_STATUS_2018 = disability('data/2018/DISABILITY_STATUS.CSV',2018)
DISABILITY_STATUS_2017 = disability('data/2017/DISABILITY_STATUS.CSV',2017)
DISABILITY_STATUS_2016 = disability('data/2016/DISABILITY_STATUS.CSV',2016)
DISABILITY_STATUS_2015 = disability('data/2015/DISABILITY_STATUS.CSV',2015)
DISABILITY_STATUS_2014 = disability('data/2014/DISABILITY_STATUS.CSV',2014)
DISABILITY_STATUS_2013 = disability('data/2013/DISABILITY_STATUS.CSV',2013)
DISABILITY_STATUS_2012 = disability('data/2012/DISABILITY_STATUS.CSV',2012)
DISABILITY_STATUS_2011 = disability('data/2011/DISABILITY_STATUS.CSV',2011)
DISABILITY_STATUS_2010 = disability('data/2010/DISABILITY_STATUS.CSV',2010)

### Combine each year into single dataframe containing all years

In [8]:
DISABILITY_STATUS = pd.concat([DISABILITY_STATUS_2022,DISABILITY_STATUS_2021,DISABILITY_STATUS_2019,DISABILITY_STATUS_2018,DISABILITY_STATUS_2017,DISABILITY_STATUS_2016,DISABILITY_STATUS_2015,DISABILITY_STATUS_2014,DISABILITY_STATUS_2013,DISABILITY_STATUS_2012,DISABILITY_STATUS_2011,DISABILITY_STATUS_2010])
DISABILITY_STATUS

Unnamed: 0,year,total_civ_noninst_population_under_18,noninst_under_18_population_with_disability,total_civ_noninst_population_18_to_64,noninst_18_to_64_population_with_disability,total_civ_noninst_population_65_plus,noninst_65_plus_population_with_disability,total_civ_noninst_population,total_noninst_population_with_disability
0,2022,19453,466.872,55094,3305.64,15582,4565.526,90129,8338.038
0,2021,18035,414.805,53935,4045.125,16319,5515.822,88289,9975.752
0,2019,18859,584.629,54497,2561.359,16790,4953.05,90146,8099.038
0,2018,18357,532.353,54856,2303.952,15609,3902.25,88822,6738.555
0,2017,17674,459.524,51163,1637.216,14504,3916.08,83341,6012.82
0,2016,14237,597.954,50018,2200.792,10736,3746.864,74991,6545.61
0,2015,17650,405.95,51888,2801.952,12022,3390.204,81560,6598.106
0,2014,15589,374.136,52904,3174.24,10916,3820.6,79409,7368.976
0,2013,18242,383.082,53803,2044.514,11427,4022.304,83472,6449.9
0,2012,16591,663.64,51062,2246.728,10440,3476.52,78093,6386.888


### Reset index on combined dataframe

In [9]:
DISABILITY_STATUS = DISABILITY_STATUS.reset_index()
DISABILITY_STATUS.drop(columns=['index'], inplace=True)
DISABILITY_STATUS

Unnamed: 0,year,total_civ_noninst_population_under_18,noninst_under_18_population_with_disability,total_civ_noninst_population_18_to_64,noninst_18_to_64_population_with_disability,total_civ_noninst_population_65_plus,noninst_65_plus_population_with_disability,total_civ_noninst_population,total_noninst_population_with_disability
0,2022,19453,466.872,55094,3305.64,15582,4565.526,90129,8338.038
1,2021,18035,414.805,53935,4045.125,16319,5515.822,88289,9975.752
2,2019,18859,584.629,54497,2561.359,16790,4953.05,90146,8099.038
3,2018,18357,532.353,54856,2303.952,15609,3902.25,88822,6738.555
4,2017,17674,459.524,51163,1637.216,14504,3916.08,83341,6012.82
5,2016,14237,597.954,50018,2200.792,10736,3746.864,74991,6545.61
6,2015,17650,405.95,51888,2801.952,12022,3390.204,81560,6598.106
7,2014,15589,374.136,52904,3174.24,10916,3820.6,79409,7368.976
8,2013,18242,383.082,53803,2044.514,11427,4022.304,83472,6449.9
9,2012,16591,663.64,51062,2246.728,10440,3476.52,78093,6386.888


### Save final dataframe as CSV to 'final' folder in 'data' folder

In [10]:
DISABILITY_STATUS.to_csv('data/final/DISABILITY_STATUS.CSV', index=False)

### Check that dataframe was saved properly as CSV

In [11]:
df = pd.read_csv('data/final/DISABILITY_STATUS.CSV')
df

Unnamed: 0,year,total_civ_noninst_population_under_18,noninst_under_18_population_with_disability,total_civ_noninst_population_18_to_64,noninst_18_to_64_population_with_disability,total_civ_noninst_population_65_plus,noninst_65_plus_population_with_disability,total_civ_noninst_population,total_noninst_population_with_disability
0,2022,19453,466.872,55094,3305.64,15582,4565.526,90129,8338.038
1,2021,18035,414.805,53935,4045.125,16319,5515.822,88289,9975.752
2,2019,18859,584.629,54497,2561.359,16790,4953.05,90146,8099.038
3,2018,18357,532.353,54856,2303.952,15609,3902.25,88822,6738.555
4,2017,17674,459.524,51163,1637.216,14504,3916.08,83341,6012.82
5,2016,14237,597.954,50018,2200.792,10736,3746.864,74991,6545.61
6,2015,17650,405.95,51888,2801.952,12022,3390.204,81560,6598.106
7,2014,15589,374.136,52904,3174.24,10916,3820.6,79409,7368.976
8,2013,18242,383.082,53803,2044.514,11427,4022.304,83472,6449.9
9,2012,16591,663.64,51062,2246.728,10440,3476.52,78093,6386.888
