# Societal Factors

> A collection of visualizations to highlight how societal factors impact our mental well-being.

- toc: true
- badges: false
- comments: false

In [None]:
# hide
# Code Import

## Imports:

### Libraries
import numpy as np
import pandas as pd
import altair as alt
import os, os.path
import pycountry
from vega_datasets import data

alt.data_transformers.enable('default', max_rows=None)

import country_converter as coco
import geopandas as gpd
import json

### Data
cwd = os.getcwd()
df_file = '/depression_final_df.csv'
df = pd.read_csv(cwd + df_file)

countries = alt.topo_feature(data.world_110m.url, 'countries')

df = df.dropna(subset=['id'])
df.id = df.id.astype(int)
df = df.rename(columns={"Prevalence - Depressive disorders - Sex: Male - Age: Age-standardized (Percent)": "Depression in males (%)",
                        "Prevalence - Depressive disorders - Sex: Female - Age: Age-standardized (Percent)": "Depression in females (%)"})

sum_column = df["Depression in males (%)"] + df["Depression in females (%)"]

df['Depression (%)'] = sum_column

dfi = df.fillna(0)
dfi['Year'] = dfi['Year'].apply(int)
df['Year'] = pd.to_datetime(dfi.Year, format='%Y')

In [None]:
# hide
df_1990 = df[df['Year'] == '1990']
df_2000 = df[df['Year'] == '2000']
df_2010 = df[df['Year'] == '2010']
df_2017 = df[df['Year'] == '2017']

In [None]:
# hide
df_gender = dfi[['Country','Year','Depression in males (%)','Depression in females (%)']]
df_stats = df.drop(columns=['country_code','geometry','Continent',
                            'id','Population (historical estimates)',
                            'Depression in males (%)','Depression in females (%)','Social support',
                            'Time spent in social interactions','Life satisfaction',
                            'Access to green space']) 
df_gender = pd.melt(df_gender, id_vars=['Country','Year'], var_name=['Statistic'])
df_stats = pd.melt(df_stats, id_vars=['Country','Year'], var_name=['Statistic'])

df_europe = dfi.loc[dfi['Access to green space'] != 0]

In [None]:
# hide
df_e = dfi.loc[dfi['Employment rate'] != 0]
df_ls = dfi.loc[dfi['Life satisfaction'] != 0]
df_ph = dfi.loc[dfi['Perceived health'] != 0]
df_stu = dfi.loc[dfi['Satisfaction with time use'] != 0]
df_stu = df_stu.loc[df_stu['Year'] == 2013]

df_ss = dfi.loc[dfi['Social support'] != 0]
df_tssi = dfi.loc[dfi['Time spent in social interactions'] != 0]
df_dr = dfi.loc[dfi['Divorce Rate'] != 0]

In [None]:
#hide
click = alt.selection_multi(fields=['Country'], empty='none')

main_width = 675
main_height = 350
main_colour = alt.Color('Depression (%):Q',scale=alt.Scale(scheme='viridis',reverse=True, domain=[4, 13]))

main_choropleth = (alt.Chart(countries).mark_geoshape(
    stroke='black',
    strokeWidth=0.2
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(df_2017, 'id', ['Depression (%)','Country']))
.encode(
    color=main_colour,
    opacity=alt.condition(click, alt.value(1), alt.value(0.3)),
    tooltip=['Country:N', 'Depression (%):Q'])
.add_selection(click)
.project(
    type='equirectangular')
).properties(
    width=main_width,
    height=main_height
)

main_background = alt.Chart(countries).mark_geoshape(
    fill='lightgrey',
    stroke='black',
    strokeWidth=0.1
).project(
    'equirectangular'
).properties(
    width=main_width,
    height=main_height
)

main_map = (main_background + main_choropleth )

In [None]:
# hide
slider = alt.binding_range(min=1990, max=2017, step=1)
select_year = alt.selection_single(name='Select', fields=['Year'],
                                     bind=slider, init={'Year': 2017})

gender_bar = alt.Chart(df_gender).mark_bar().encode(
    x = 'value:Q',
    y = alt.Y('Country:N'),
    color = alt.Color('Statistic:N', scale=alt.Scale(scheme = 'set2'),title = 'Gender')
).add_selection(
    select_year
).transform_filter(
    click
).transform_filter(
    select_year
).properties(
    width=main_width*0.5,
    height=100
)

In [None]:
# hide
click_name = alt.selection_multi(fields=['Country'], bind='legend')

## Gender Split
  
Here you can see the difference in recorded depression cases by percent for both males and females, what is shown is largely an uneven split between them. There are theories that this may be due to social pressures, resulting in men underreporting depression symptoms. However, this discussion is a deep one and is beyond the scale of this website. Papers such as [Sigmon et al.](https://link.springer.com/content/pdf/10.1007/s11199-005-6762-3.pdf)’s study (2005) are a good place to start if you wish to learn more.  

> Shift-click to select multiple countries.

In [20]:
# hide_input
alt.vconcat(main_map, gender_bar, center=True)
#(main_map & gender_bar).properties(padding=50)

In [None]:
# hide
slider_e = alt.binding_range(min=2004, max=2017, step=1)
select_year_e = alt.selection_single(name='Select', fields=['Year'],
                                     bind=slider_e, init={'Year': 2017})
employ_scatter = alt.Chart(df_e).mark_point().encode(
    x = alt.X('Employment rate:Q',scale=alt.Scale(domain=[40,100]),title = 'Employment rate (%)'),
    y = alt.Y('Depression (%):Q',scale=alt.Scale(domain=[2,11])),
    opacity = alt.condition(click_name, alt.value(1), alt.value(0.3)),
    color = alt.Color('Country:N', scale=alt.Scale(scheme = 'set2'),title = 'Country'),
    tooltip = ['Country','Depression (%)','Employment rate']
).properties(
    width=500,
    height=500
).add_selection(select_year_e).transform_filter(select_year_e).add_selection(click_name).transform_filter(click_name)

employ_line = alt.Chart(df_e).mark_line().encode(
    x = alt.X('Employment rate:Q',scale=alt.Scale(domain=[40,100])),
    y = alt.Y('Depression (%):Q',scale=alt.Scale(domain=[2,11])),
    opacity = alt.condition(click_name, alt.value(1), alt.value(0.2)),
    color = alt.Color('Country:N', scale=alt.Scale(scheme = 'set2'),title = 'Country'),
    order=alt.Order('Year:O')#, sort=['Some Topic', 'Amazing Topic', 'Some Other', 'Minor Topic'])
    
).properties(
    width=500,
    height=500
)

---
# Employment vs Depression

Working helps to give us a sense of purpose and incorporates plenty of social interaction and movement. These factors may very well link with the development of depression. This visualization plots the possible relationship between employment and depression over time.  

> Click on any country name to focus on that country.

In [18]:
# hide_input
(employ_scatter + employ_line).interactive()

When moving the year slider, it becomes clear that many countries see improvements with their employment rates over the years and at the same time, depression percentages also go down, a promising statistic.

In [None]:
# hide
slider_stu = alt.binding_range(min=2004, max=2017, step=1)
select_year_stu = alt.selection_single(name='Select', fields=['Year'],
                                     bind=slider_stu, init={'Year': 2017})
stu_scatter = alt.Chart(df_stu).mark_circle(size=50).encode(
    x = alt.X('Satisfaction with time use:Q',scale=alt.Scale(domain=[5,10]),title = 'Satisfaction with time use (Average score)'),
    y = alt.Y('Depression (%):Q',scale=alt.Scale(domain=[3,10])),
    color = alt.Color('Country:N', scale=alt.Scale(scheme = 'set2'),title = 'Country'),
    tooltip = ['Country','Depression (%)','Satisfaction with time use']
).properties(
    width=500,
    height=500
)

stu_line = stu_scatter.transform_regression('Satisfaction with time use', 'Depression (%)').mark_line().transform_fold(["reg-line"], as_=["Regression", "y"]).encode(alt.Color("Regression:N"))

---
# Satisfaction With Time Use vs Depression

Here is a simple scatter plot showing a weak positive correlation between average scores for satisfaction with time use and depression percentage. The nature of the correlation from this data does not immediately suggest a link but as the two subjects are likely related, further investigation may be needed.

In [19]:
# hide_input
(stu_scatter+stu_line).interactive()

In [None]:
# hide
slider_dr = alt.binding_range(min=1990, max=2017, step=1)
select_year_dr = alt.selection_single(name='Select', fields=['Year'],
                                     bind=slider_dr, init={'Year': 2017})

dr_scatter = alt.Chart(df_dr).mark_point().encode(
    x = alt.X('Divorce Rate:Q',scale=alt.Scale(domain=[0,7])),
    y = alt.Y('Depression (%):Q',scale=alt.Scale(domain=[2,12])),
    opacity = alt.condition(click_name, alt.value(1), alt.value(0.3)),
    color = alt.Color('Country:N', scale=alt.Scale(scheme = 'set2'),title = 'Country'),
    tooltip = ['Country','Depression (%)','Divorce Rate']
).properties(
    width=500,
    height=500
).add_selection(select_year_dr).transform_filter(select_year_dr).add_selection(click_name).transform_filter(click_name)

In [None]:
# hide
drs = alt.Chart(df_dr).mark_point().encode(
    x = alt.X('Divorce Rate:Q',title ='Divorce Rate (per 1000 people)'),
    y = alt.Y('Depression (%):Q'),
)

dr_line = drs.transform_regression('Divorce Rate', 'Depression (%)').mark_line().transform_fold(["reg-line"], as_=["Regression", "y"]).encode(alt.Color("Regression:N"))


---
# Divorce Rate vs Depression

This visualization shows divorce rate data against depression over time, a relationship that is backed here by a positive correlation. However, the points for many countries can be seen to move with no visible trend in the divorce axis. This may suggest that the correlation could be a coincidence.  

> Again click on the country name you want to focus on.

In [17]:
# hide_input
dr_scatter + dr_line