### Chart of the Day

##### Childcare costs as a percentage of income

In [101]:
import pandas as pd, altair as alt, requests, country_converter as coco
import sys, importlib

sys.path.append('../')
import api_wrapper.api_hub as api_hub

In [118]:
df = pd.read_csv('oecd_childcare_costs.csv')
# Drop `TIME`, `Flag`, `Flag Codes` columns
df = df.drop(columns=['TIME', 'Flag Codes', 'Flags']).copy()
df

Unnamed: 0,LOCATION,Country,TYPE,Type of indicator,COMPONENTS,Net childcares cost by item,FAMILY,Family type,EARNINGS,Earnings of the first adult,SATOPUPS,Include social assistance benefits,HBTOPUPS,Include housing benefits,Year,Value
0,AUS,Australia,0,National currency,1,Gross childcare fees,SINGLE2C,Single person with 2 children,MIN,Minimum Wage,1,Yes,1,Yes,2004,17472.0
1,AUS,Australia,0,National currency,1,Gross childcare fees,SINGLE2C,Single person with 2 children,MIN,Minimum Wage,1,Yes,1,Yes,2008,21632.0
2,AUS,Australia,0,National currency,1,Gross childcare fees,SINGLE2C,Single person with 2 children,MIN,Minimum Wage,1,Yes,1,Yes,2012,28353.0
3,AUS,Australia,0,National currency,1,Gross childcare fees,SINGLE2C,Single person with 2 children,MIN,Minimum Wage,1,Yes,1,Yes,2015,33280.0
4,AUS,Australia,0,National currency,1,Gross childcare fees,SINGLE2C,Single person with 2 children,MIN,Minimum Wage,1,Yes,1,Yes,2018,38272.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
221755,OECD,OECD - Total,2,Percentage of average wage,5,Total,2EARNERC2C_MIN,Couple with 2 children - partner's earnings: M...,67AW,67% of the Average Wage,0,No,0,No,2018,13.0
221756,OECD,OECD - Total,2,Percentage of average wage,5,Total,2EARNERC2C_MIN,Couple with 2 children - partner's earnings: M...,67AW,67% of the Average Wage,0,No,0,No,2019,11.0
221757,OECD,OECD - Total,2,Percentage of average wage,5,Total,2EARNERC2C_MIN,Couple with 2 children - partner's earnings: M...,67AW,67% of the Average Wage,0,No,0,No,2020,11.0
221758,OECD,OECD - Total,2,Percentage of average wage,5,Total,2EARNERC2C_MIN,Couple with 2 children - partner's earnings: M...,67AW,67% of the Average Wage,0,No,0,No,2021,10.0


Let's explore the indicators available

In [103]:
df['Type of indicator'].unique()

array(['National currency', 'Percentage of net household income',
       'Percentage of average wage'], dtype=object)

In [104]:
df['Net childcares cost by item'].unique()

array(['Gross childcare fees', 'Childcare benefits', 'Change in taxes',
       'Changes in other benefits', 'Total'], dtype=object)

In [105]:
df['Family type'].unique()

array(['Single person with 2 children',
       "Couple with 2 children - partner's earnings: AW",
       "Couple with 2 children - partner's earnings: 67% of the AW",
       "Couple with 2 children - partner's earnings: Minimum Wage"],
      dtype=object)

In [106]:
df['Earnings of the first adult'].unique()

array(['Minimum Wage', 'Average Wage', '67% of the Average Wage'],
      dtype=object)

In [107]:
df['Include social assistance benefits'].unique()

array(['Yes', 'No'], dtype=object)

In [108]:
df['Include housing benefits'].unique()

array(['Yes', 'No'], dtype=object)

In [109]:
df['Year'].unique()

array([2004, 2008, 2012, 2015, 2018, 2019, 2020, 2021, 2022])

#### Filter dataset

Let's filter to:
- `Type of indicator`: Percentage of net household income
- `Net childcare cost by item`: Total
- `Family type`:
    - Single person with 2 children
    - Couple with 2 children - partner's earnings: 67% of the AW
- `Earnings of the first adult`: 67% of the Average Wage
- `Include social assistance benefits`: Yes
- `Include housing benefits`: Yes

In [119]:
# Filter
df = df[df['Type of indicator'] == 'Percentage of net household income'].copy()
df = df[df['Net childcares cost by item'] == 'Total'].copy()
# 
family_types = ['Single person with 2 children', "Couple with 2 children - partner's earnings: 67% of the AW"]
df = df[df['Family type'].isin(family_types)].copy()
df = df[df['Earnings of the first adult'] == '67% of the Average Wage'].copy()

df = df[df['Include social assistance benefits'] == 'Yes'].copy()
df = df[df['Include housing benefits'] == 'No'].copy()
df = df[df['Year'] == 2022].copy()

In [120]:
df.head()

Unnamed: 0,LOCATION,Country,TYPE,Type of indicator,COMPONENTS,Net childcares cost by item,FAMILY,Family type,EARNINGS,Earnings of the first adult,SATOPUPS,Include social assistance benefits,HBTOPUPS,Include housing benefits,Year,Value
208121,AUS,Australia,1,Percentage of net household income,5,Total,SINGLE2C,Single person with 2 children,67AW,67% of the Average Wage,1,Yes,0,No,2022,10.0
208175,AUT,Austria,1,Percentage of net household income,5,Total,SINGLE2C,Single person with 2 children,67AW,67% of the Average Wage,1,Yes,0,No,2022,5.0
208229,BEL,Belgium,1,Percentage of net household income,5,Total,SINGLE2C,Single person with 2 children,67AW,67% of the Average Wage,1,Yes,0,No,2022,9.0
208283,CAN,Canada,1,Percentage of net household income,5,Total,SINGLE2C,Single person with 2 children,67AW,67% of the Average Wage,1,Yes,0,No,2022,-2.0
208337,CZE,Czech Republic,1,Percentage of net household income,5,Total,SINGLE2C,Single person with 2 children,67AW,67% of the Average Wage,1,Yes,0,No,2022,41.0


Now let's validate we have reduced the dataset to two measures

In [121]:
# For each column, print unique values
for col in df.columns:
    if col != 'LOCATION' and col != 'Country':
        print(col, df[col].unique())

TYPE [1]
Type of indicator ['Percentage of net household income']
COMPONENTS [5]
Net childcares cost by item ['Total']
FAMILY ['SINGLE2C' '2EARNERC2C_67AW']
Family type ['Single person with 2 children'
 "Couple with 2 children - partner's earnings: 67% of the AW"]
EARNINGS ['67AW']
Earnings of the first adult ['67% of the Average Wage']
SATOPUPS [1]
Include social assistance benefits ['Yes']
HBTOPUPS [0]
Include housing benefits ['No']
Year [2022]
Value [10.  5.  9. -2. 41.  3.  4.  1.  0. 11.  7.  6.  2. 12. 14. nan  8. 50.
 33. 17. 15. 22. 24. 32. 21. 23. 29.]


In [134]:
df_subset = df.drop(columns=['TYPE', 'Type of indicator', 'COMPONENTS', 'Net childcares cost by item', 'FAMILY', 'EARNINGS', 'Earnings of the first adult', 'SATOPUPS', 'Include social assistance benefits', 'HBTOPUPS', 'Include housing benefits', 'Year']).reset_index(drop=True).copy()

# Clean indicator names
replace = {
    "Single person with 2 children": "Single - 2 children",
    "Couple with 2 children - partner's earnings: AW": "Couple - 2 children",
     "Couple with 2 children - partner's earnings: 67% of the AW": "Couple - 2 children"
}

# Replace `Family type` values
df_subset['Family type'].replace(replace, inplace=True)
df_subset

Unnamed: 0,LOCATION,Country,Family type,Value
0,AUS,Australia,Single - 2 children,10.0
1,AUT,Austria,Single - 2 children,5.0
2,BEL,Belgium,Single - 2 children,9.0
3,CAN,Canada,Single - 2 children,-2.0
4,CZE,Czech Republic,Single - 2 children,41.0
...,...,...,...,...
75,LTU,Lithuania,Couple - 2 children,8.0
76,MLT,Malta,Couple - 2 children,0.0
77,ROU,Romania,Couple - 2 children,10.0
78,SVN,Slovenia,Couple - 2 children,8.0


Filter countries

In [135]:
df['Country'].unique()

array(['Australia', 'Austria', 'Belgium', 'Canada', 'Czech Republic',
       'Denmark', 'Finland', 'France', 'Germany', 'Greece', 'Hungary',
       'Iceland', 'Ireland', 'Italy', 'Japan', 'Korea', 'Luxembourg',
       'Netherlands', 'New Zealand', 'Norway', 'Poland', 'Portugal',
       'Slovak Republic', 'Spain', 'Sweden', 'Switzerland', 'Türkiye',
       'United Kingdom', 'United States', 'Bulgaria', 'Croatia', 'Cyprus',
       'Estonia', 'Israel', 'Latvia', 'Lithuania', 'Malta', 'Romania',
       'Slovenia', 'OECD - Total'], dtype=object)

In [136]:
to_drop = ['LUX']
df_subset = df_subset[~df_subset['LOCATION'].isin(to_drop)].copy()

Add feature with rank of countries by `Value` and `Family type`='Couple with 2 children - partner's earnings: 67% of the AW'

In [137]:
df_subset.head()

Unnamed: 0,LOCATION,Country,Family type,Value
0,AUS,Australia,Single - 2 children,10.0
1,AUT,Austria,Single - 2 children,5.0
2,BEL,Belgium,Single - 2 children,9.0
3,CAN,Canada,Single - 2 children,-2.0
4,CZE,Czech Republic,Single - 2 children,41.0


In [147]:
df_couple = df_subset[df_subset['Family type'] == 'Couple - 2 children'].sort_values(by='Value', ascending=True).dropna().reset_index(drop=True).reset_index(names='Rank_couple')
df_single = df_subset[df_subset['Family type'] == 'Single - 2 children'].sort_values(by='Value', ascending=True).dropna().reset_index(drop=True).reset_index(names='Rank_single')

In [148]:
df_main = pd.concat([df_couple, df_single], axis=0, ignore_index=True)

---

### Chart

In [125]:
import altair_wrapper.eco_styles as eco_styles
styles = eco_styles.EcoStyles()

In [157]:
importlib.reload(eco_styles)
styles = eco_styles.EcoStyles()
styles.register_and_enable_theme(dark_mode=True)

alt.Chart(df_subset).mark_point().encode(
    x=alt.X('LOCATION:N', axis=alt.Axis(labelAngle=-40), sort='y'),
    y=alt.Y('Value:Q'),
    color=alt.Color('Family type:N', legend=alt.Legend(orient='top-left'), scale=alt.Scale(range=['#e6224b', '#e6224be6'])),
).properties(
    width = 400,
    height = 300
)