# Income inequality

Exploring income data by gender.

To obtain the raw data, visit the table [Nettoinkomst för boende i Sverige hela året](http://www.statistikdatabasen.scb.se/pxweb/sv/ssd/START__HE__HE0110__HE0110A/NetInk02/?rxid=e9488ae4-e81e-410e-b7ce-760a5828ec57) at SCB. Select "Medelinkomst, tkr", "Riket", data for men and women, "Ålder i ettårsklasser", and all years.

Note that in order to generate a GIF, [ImageMagick](https://www.imagemagick.org/) needs to be installed and accessible from the command-line.

- Date: 2019-02-06
- Source: SCB

## Setup

In [1]:
import pathlib
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from src.plot import ds_plot
from IPython.display import display, Markdown

%config InlineBackend.figure_format = 'retina'

# Custom chart style
plt.style.use('../assets/datastory.mpltstyle')

In [2]:
df = pd.read_csv('../data/raw/income.csv', encoding='latin1', skiprows=1)

## Transform

In [3]:
df = (df
    .drop('region', axis=1)
    .assign(age=lambda x: x['ålder'].str.replace('år', ''))
    .assign(age=lambda x: x.age.str.replace('+', ''))
    .assign(age=lambda x: x.age.str.strip().astype(int))
    .drop('ålder', axis=1)
    .rename(columns={'kön': 'sex'})
    .query('age <= 90')
    .melt(id_vars=['sex', 'age'],
          value_vars=[str(x) for x in range(2000, 2018)],
          var_name='year')
    .pivot_table(columns='sex',
                 index=['year', 'age'],
                 values='value')
    .rename(columns={'kvinnor': 'women', 'män': 'men'}))

## Overview

In [4]:
df.head()

Unnamed: 0_level_0,sex,women,men
year,age,Unnamed: 2_level_1,Unnamed: 3_level_1
2000,16,8.5,8.8
2000,17,18.7,18.3
2000,18,25.1,25.3
2000,19,44.6,43.5
2000,20,69.1,72.2


## Plot

In [5]:
wc = '#57009f' # color women
mc = '#35c983' # color men
nc = '#606062' # neutral color
fc = '#e1e1e1' # fill between color

# Create directory for GIF frames
TEMP_IMG_DIR = '.tmp'
pathlib.Path(TEMP_IMG_DIR).mkdir(exist_ok=True)

# Setup figure
w = 12
h = w * 0.597
figsize = (w, h)
fig, ax = ds_plot(figsize=figsize)
plt.tight_layout(rect=(0.05, 0.02, 1, 1))

for i in range(2000, 2018):
    # Calculate monthly income from yearly and tkr to kr
    data = df.loc[str(i)][['men', 'women']] / 12 * 1000
    
    # Create axis
    ax = data.plot(ax=ax, lw=3, color=[mc, wc], legend=False)
    ax.fill_between(list(range(16, 91)),
                    data.men, data.women,
                    color=fc, alpha=0.5)
    ax.set_xlim(16, 90)
    ax.set_ylim(0, 40_000)
    ax.set_xlabel('Ålder', color=nc, size=14, weight=500)
    
    # Format income values
    ff = plt.FuncFormatter(lambda x, _: "{:,} kr".format(int(x)).replace(',', ' '))
    ax.get_yaxis().set_major_formatter(ff)

    # Fraction of women's income to men's
    frac = int(round(data.women.sum() / data.men.sum() * 100, 0))
    
    # Add text with fraction for a given year
    textargs = {'size': 14, 'weight': 'bold', 'ha': 'left',
                'backgroundcolor': '#ffffff', 'transform': ax.transAxes}
    ax.text(0.31, 0.2, 'Kvinnors', color=wc, **textargs)
    ax.text(0.389, 0.2, f'inkomst {frac}% av ', color=nc, **textargs)
    ax.text(0.53, 0.2, 'männens', color=mc, **textargs)
    
    # Display year
    ax.text(45.5, 4500, str(i), color=nc,
            fontdict={'size': 38, 'weight': 'bold'},
            backgroundcolor='white', ha='left')
    
    # Save image
    fig.savefig(f'{TEMP_IMG_DIR}/{i}.png', dpi=150)
    
    # Resize image
    !convert {TEMP_IMG_DIR}/{i}.png -resize 65% -quality 100 {TEMP_IMG_DIR}/{i}.png
    
    # Paste image onto template image
    !convert ../assets/chart-templates/income-inequality-template.png \
        {TEMP_IMG_DIR}/{i}.png \
        -gravity northwest \
        -geometry +20+120 \
        -composite {TEMP_IMG_DIR}/{i}.png
    
    ax.clear()

# Create GIF and remove intermediate frames
!convert -loop 0 -delay 100 {TEMP_IMG_DIR}/*.png ../charts/2019-02-06-income-inequality.gif
shutil.rmtree(TEMP_IMG_DIR)

# Display GIF
plt.close();
display(Markdown("![income-gif](../charts/2019-02-06-income-inequality.gif)"))

![income-gif](../charts/2019-02-06-income-inequality.gif)