In [1]:
import pandas as pd
import plotly.express as px

In [10]:
data = pd.read_csv("/Users/adsouza/Desktop/Work/data624/Data/CCHS/PUMFCCHS201718.csv")

In [11]:
data.shape

(113290, 355)

In [12]:
for c in data.columns:
    print(c)

ADM_RNO
VERDATE
GEO_PRV
GEODGHR4
GEODVBHA
DHH_SEX
DHHGMS
DHHDGHSZ
DHHDGLVG
DHHDG611
DHHDGL12
DHHDGLE5
ADM_PRX
DHHGAGE
DOMAC
MACG005
MAC_010
MAC_015
MACG020
EHG2DVR3
EHG2DVH3
DOGEN
GEN_005
GEN_010
GEN_015
GEN_020
GEN_025
GEN_030
GENDVHDI
GENDVMHI
GENDVSWL
DOHWT
HWT_050
HWTDGHTM
HWTDGWTK
HWTDGBMI
HWTDVCOL
HWTDGISW
HWTDVWHO
HWTDGCOR
HWTDGBCC
DOCCC
CCC_005
CCC_010
CCC_015
CCC_020
CCC_025
CCC_030
CCC_035
CCC_040
CCC_045
CCC_050
CCC_055
CCC_060
CCC_065
CCC_070
CCC_075
CCC_080
CCC_085
CCC_090
CCC_095
CCCG100
CCC_105
CCC_110
CCC_115
CCC_120
CCC_125
CCC_130
CCC_135
CCC_140
CCC_170
CCC_175A
CCC_175B
CCC_175C
CCC_175D
CCC_175E
CCC_175F
CCC_175G
CCC_175H
CCC_180
CCC_185
CCC_190
CCC_195
CCC_200
CCCDVDIA
DOADL
ADL_005
ADL_010
ADL_015
ADL_020
ADL_025
ADL_030
ADLDVIHS
ADLDVIPC
DOCIH
CIH_005
CIHG010
CIH_015
CIH_020
CIH_025
CIH_030A
CIH_030B
CIH_030C
CIH_030D
CIH_030E
CIH_030F
CIH_030G
CIH_030H
CIH_030J
CIHG030K
CIH_035
CIH_040A
CIH_040B
CIH_040C
CIH_040D
CIH_040F
CIH_040G
CIHG040I
DOSLP
SLPG005
SLP_010

In [13]:
data['WTS_M'].describe()

count    113290.000000
mean        276.055892
std         403.624682
min           5.080000
25%          84.960000
50%         158.900000
75%         303.807500
max       13894.040000
Name: WTS_M, dtype: float64

# Food insecurity - plots. 

## Horizontal bar charts. Most basic version with default options.

In [14]:
counts_by_status=data[['FSCDVHFS', 'WTS_M']].groupby('FSCDVHFS').sum().reset_index()
counts_by_status

Unnamed: 0,FSCDVHFS,WTS_M
0,0,28022382.52
1,1,1777500.07
2,2,801980.76
3,9,672508.66


In [15]:
px.bar(counts_by_status, x='FSCDVHFS', y='WTS_M')

Plotly does not realise that 'FSCDVHFS' is categorical rather than numerical. We can fix this by converting the category labels to strings.

In [16]:
counts_by_status['FSCDVHFS'].replace({0: 'Food secure', 
                                     1: 'Moderately food insecure', 
                                     2: 'Severely food insecure', 
                                     9: 'Not stated'}, inplace=True)
px.bar(counts_by_status, x='FSCDVHFS', y='WTS_M')

Alright, this tells us that most people in Canada belong to food-secure households. But, is this uniformly true across the country and across demographics, or are there areas of concern? Let's break this down by province and territory. Since they have very different population counts, we should compare proportions rather than absolute numbers.

### Proportion of respondents experiencing food insecurity by province/territory.

In [17]:
numerators=data[['GEO_PRV', 'FSCDVHFS', 'WTS_M']].groupby(['GEO_PRV', 'FSCDVHFS']).sum().reset_index()
numerators.rename(columns={'WTS_M': 'NUM_PROV_FS'}, inplace=True)
numerators['GEO_PRV']=numerators['GEO_PRV'].astype(str)
numerators['FSCDVHFS']=numerators['FSCDVHFS'].astype(str)
denominators=data[['GEO_PRV', 'WTS_M']].groupby(['GEO_PRV']).sum().reset_index()
denominators['GEO_PRV']=denominators['GEO_PRV'].astype(str)
denominators.rename(columns={'WTS_M': 'NUM_PROV'}, inplace=True)
data_fs_prov=pd.merge(numerators, denominators, how='left')
data_fs_prov['FS_PROP']=data_fs_prov['NUM_PROV_FS']/data_fs_prov['NUM_PROV']

In [18]:
fig=px.bar(data_fs_prov, x='GEO_PRV', y='FS_PROP', color='FSCDVHFS', barmode='group')
fig.show()

Looks like the group to the right is very different from the others. But which one is it? Let's use the codebook to provide province/territory labels that make sense. While we're at it, let's also give it a better y-axis title and legend labels. And, maybe percentages are more intuitive to government bureaucrats than proportions, so let's change that too.

In [19]:
# Put in province/territory labels.
provinces = {'Newfoundland and Labrador': '10', 'Prince Edward Island': '11', 'Nova Scotia': '12', 'New Brunswick': '13',
            'Quebec': '24', 'Ontario': '35', 'Manitoba': '46', 'Saskatchewan': '47', 'Alberta': '48', 
             'British Columbia': '59', 'Yukon': '60', 'Northwest Territories': '61', 'Nunavut': '62'}
provinces_inverse = {provinces[k]: k for k in provinces}
data_fs_prov['PROVINCE']=data_fs_prov['GEO_PRV'].replace(provinces_inverse)

# Replace proportion with percentage.
data_fs_prov['FS_PERCENT']=data_fs_prov['FS_PROP']*100.0

# Put in user-friendly legend labels.
data_fs_prov['FS_CAT']=data_fs_prov['FSCDVHFS'].replace({'0': 'Food secure', 
                                     '1': 'Moderately food insecure', 
                                     '2': 'Severely food insecure', 
                                     '9': 'Not stated'})

In [20]:
fig=px.bar(data_fs_prov, x='PROVINCE', y='FS_PERCENT', color='FS_CAT', barmode='group')
fig.show()

Okay, so the one that's very different is Nunavut. Just to be a bit more user-friendly, we can sort the groups in descending order of proportion of food-secure households. But aren't those slanting province/territory labels annoying? We can switch the x- and y-axes to make this a bit nicer.

In [21]:
fig=px.bar(data_fs_prov, x='FS_PERCENT', y='PROVINCE', color='FS_CAT', barmode='group')
fig.show()

In [22]:
data_fs_prov.sort_values(by='FS_PERCENT', ascending=True, inplace=True)
fig=px.bar(data_fs_prov, x='FS_PERCENT', y='PROVINCE', color='FS_CAT', barmode='group')
fig.show()

Okay! Now, we need more useful labels.

In [23]:
fig.to_dict()['layout']

{'template': {'data': {'histogram2dcontour': [{'type': 'histogram2dcontour',
     'colorbar': {'outlinewidth': 0, 'ticks': ''},
     'colorscale': [[0.0, '#0d0887'],
      [0.1111111111111111, '#46039f'],
      [0.2222222222222222, '#7201a8'],
      [0.3333333333333333, '#9c179e'],
      [0.4444444444444444, '#bd3786'],
      [0.5555555555555556, '#d8576b'],
      [0.6666666666666666, '#ed7953'],
      [0.7777777777777778, '#fb9f3a'],
      [0.8888888888888888, '#fdca26'],
      [1.0, '#f0f921']]}],
   'choropleth': [{'type': 'choropleth',
     'colorbar': {'outlinewidth': 0, 'ticks': ''}}],
   'histogram2d': [{'type': 'histogram2d',
     'colorbar': {'outlinewidth': 0, 'ticks': ''},
     'colorscale': [[0.0, '#0d0887'],
      [0.1111111111111111, '#46039f'],
      [0.2222222222222222, '#7201a8'],
      [0.3333333333333333, '#9c179e'],
      [0.4444444444444444, '#bd3786'],
      [0.5555555555555556, '#d8576b'],
      [0.6666666666666666, '#ed7953'],
      [0.7777777777777778, '#fb9f3a

In [24]:
fig.update_layout(yaxis_title_text="Province/Territory", 
                  xaxis_title_text="Percentage of Households", 
                  legend_title_text="Food Security Status")