# Responsibility for Grandchildren Under 18 Years Script
Run all of these code blocks in order to create the RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS data table.

### Import Statements

In [38]:
import pandas as pd 
import warnings
warnings.filterwarnings("ignore")

### Function to create row for each year

In [39]:
def read_file(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Convert percentage estimates to float
    df['Percentage'] = df['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else 0)

    # Move Total Population 30+ from row to column
    df['total_pop_30_plus'] = int(df.iloc[0]['Estimate'].replace(',',''))
    df = df.drop(df.index[0])

    # Drop unnecessary columns
    df.drop(columns=['Estimate','Margin of Error'], inplace=True)

    # Calculate first population estimate
    living_with = df.iloc[[0]]
    living_with['estimate'] = living_with['Percentage']*living_with['total_pop_30_plus']
    living_with.drop(columns=['Percentage'], inplace=True)
    lw_est = living_with.iat[0, 2]  # Row index 0, Column index 1

    # Calculate second population estimate
    responsible_for = df.iloc[[1]]
    responsible_for['estimate'] = responsible_for['Percentage'].apply(lambda x: x*lw_est)
    responsible_for.drop(columns=['Percentage'], inplace=True)

    # Combine estimate dataframes back together
    df = pd.concat([living_with,responsible_for])

    # Move Total Popuation 30+ back to row from column
    column_as_df = df[['total_pop_30_plus']]
    row = column_as_df.transpose()
    row.reset_index(inplace=True)
    row.drop(columns=[2], inplace=True)
    new_col_names = ['Label','estimate']
    row.columns = new_col_names
    df.drop(columns=['total_pop_30_plus'], inplace=True)
    df = pd.concat([df,row])

    # Change index and transpose table
    df.set_index('Label', inplace=True)
    df = df.T

    # Change index to Year
    df.index = [year]

    # Rename all columns with abbreviated underscored naming conventions
    col_names = ['living_with','responsible_for','total_pop_30_plus']
    df.columns = col_names

    return df

### Test that function works

In [40]:
read_file('data/2014/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV', 2010)

Unnamed: 0,living_with,responsible_for,total_pop_30_plus
2010,2993.896,0.0,50744.0


### Test that function works for each year
Should return only "No file for year '2020'"

In [41]:
for year in range(2010,2023):
    file = 'data/'+str(year)+'/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV'

    try:
        read_file(file,year)
    except:
        print(f"No file for year '{year}'")

No file for year '2020'


### Run function for each year

In [42]:
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2022 = read_file('data/2022/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV',2022)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2021 = read_file('data/2021/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV',2021)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2019 = read_file('data/2019/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV',2019)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2018 = read_file('data/2018/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV',2018)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2017 = read_file('data/2017/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV',2017)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2016 = read_file('data/2016/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV',2016)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2015 = read_file('data/2015/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV',2015)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2014 = read_file('data/2014/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV',2014)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2013 = read_file('data/2013/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV',2013)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2012 = read_file('data/2012/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV',2012)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2011 = read_file('data/2011/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV',2011)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2010 = read_file('data/2010/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV',2010)

### Combine each year into single dataframe containing all years

In [43]:
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS = pd.concat([RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2022,RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2021,RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2019,RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2018,RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2017,RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2016,RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2015,RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2014,RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2013,RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2012,RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2011,RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS_2010])
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS

Unnamed: 0,living_with,responsible_for,total_pop_30_plus
2022,2531.55,349.3539,60275.0
2021,2183.024,0.0,57448.0
2019,3995.952,623.368512,58764.0
2018,3066.792,0.0,57864.0
2017,3475.458,524.794158,55166.0
2016,2968.565,584.807305,48665.0
2015,3936.525,0.0,52487.0
2014,2993.896,0.0,50744.0
2013,1703.712,0.0,53241.0
2012,2098.488,0.0,49964.0


### Reset index on combined dataframe

In [44]:
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.reset_index(inplace=True)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.rename(columns={'index':'year'}, inplace=True)
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS

Unnamed: 0,year,living_with,responsible_for,total_pop_30_plus
0,2022,2531.55,349.3539,60275.0
1,2021,2183.024,0.0,57448.0
2,2019,3995.952,623.368512,58764.0
3,2018,3066.792,0.0,57864.0
4,2017,3475.458,524.794158,55166.0
5,2016,2968.565,584.807305,48665.0
6,2015,3936.525,0.0,52487.0
7,2014,2993.896,0.0,50744.0
8,2013,1703.712,0.0,53241.0
9,2012,2098.488,0.0,49964.0


### Save final dataframe as CSV to 'final' folder in 'data' folder

In [45]:
RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.to_csv("data/final/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV", index=False)

### Check that dataframe was saved properly as CSV

In [46]:
df = pd.read_csv('data/final/RESPONSIBILITY_FOR_GRANDCHILDREN_UNDER_18_YEARS.CSV')
df

Unnamed: 0,year,living_with,responsible_for,total_pop_30_plus
0,2022,2531.55,349.3539,60275.0
1,2021,2183.024,0.0,57448.0
2,2019,3995.952,623.368512,58764.0
3,2018,3066.792,0.0,57864.0
4,2017,3475.458,524.794158,55166.0
5,2016,2968.565,584.807305,48665.0
6,2015,3936.525,0.0,52487.0
7,2014,2993.896,0.0,50744.0
8,2013,1703.712,0.0,53241.0
9,2012,2098.488,0.0,49964.0
