## Non-EU citizens are more affected by brain waste in Europe than citizens of other EU countries, particularly women

In [230]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.disable_max_rows()
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', None)

# Employment and Unemployment by citizenship
- Unemployment: https://ec.europa.eu/eurostat/databrowser/view/lfsa_urgan__custom_8307744/default/table?lang=en
- Employment: https://ec.europa.eu/eurostat/databrowser/view/lfsa_ergan__custom_8308004/default/table?lang=en

    - Downloaded age class 20-64
    - Years 2013 to 2022
    - Citizenship: EU27 countries (from 2020) except reporting country, Non-EU27 countries (from 2020) nor reporting country, Reporting country
    


In [231]:
market = pd.read_csv("clean_emp_unemp.csv")

In [236]:
market_2022 = market[market['Year'] == '2022']

dom= ['Nationals ','EU27 citizens other than nationals', 'Non-EU27']
rng= ['#636363','#F48518','#8A54A5']

alt.Chart(market).mark_bar().encode(
    x= alt.X("Rate:Q", title=None),
    y=alt.Y('Citizenship',title=None),
    color=alt.Color('Citizenship', scale=alt.Scale(domain=dom, range=rng), legend=None),
    column=alt.Column('Type', title=None)
).properties(
     title={
      "text": ["Employment and unemployment rate (%) by citizenship in 2022"], 
      "subtitle": [""],
    },
    width=300,
    height=100
)

In [386]:
unemployment = market[market['Type'] == 'Unemployment']

dom= ['Nationals ','EU27 citizens other than nationals', 'Non-EU27']
rng= ['#636363','#F48518','#8A54A5']

chart = alt.Chart(unemployment).encode(color=alt.Color('Citizenship', scale=alt.Scale(domain=dom, range=rng), legend=None)
).properties(
    width=600,
    height=300
)

line = chart.mark_line(interpolate='monotone', point= True).encode(
    x=alt.X("Year", title="Year", axis=alt.Axis(tickMinStep=1)),
    y=alt.Y("Rate:Q", title="Unemployment rate (%)")
)

label = chart.encode(
    x=alt.X("max(Year)"),
    y=alt.Y("Rate:Q", aggregate=alt.ArgmaxDef(argmax="Year")),
    text="Citizenship",
)

text = label.mark_text(align='left', dx=4)

circle = label.mark_circle()

line + circle + text

# Unemployment rate by sex, citizenship and educational attainment level
- https://ec.europa.eu/eurostat/databrowser/view/lfsa_urganedm__custom_8309847/default/table?lang=en

    - Downloaded age class 20-64
    - Only years 2021 to 2022 available
    - Citizenship: EU27 countries (from 2020) except reporting country, Non-EU27 countries (from 2020) nor reporting country, Reporting country
    - Highest educational attainment level: Tertiary education (levels 5-8) only

In [137]:
up_tert = pd.read_csv("Unemployed_TertiaryEducation.csv")

In [239]:
up_tert_2022 = up_tert[up_tert['Year']== 2022]

dom= ['Nationals ','EU27 citizens other than nationals', 'Non-EU27']
rng= ['#636363','#F48518','#8A54A5']

alt.Chart(up_tert_2022).mark_point(filled=True, size=100, shape="cross").encode(
    alt.X("Rate", title="Unemployment rate (%)"),
    alt.Y('Type:N', axis=alt.Axis(grid=True), title=None),
    alt.Color('Citizenship', scale=alt.Scale(domain=dom, range=rng), legend=None),
    alt.Row('Citizenship:N', title="Citizenship")
).properties(
    width=600,
    height=100,
    title={
      "text": ["Unemployment rate among people with the highest education attainment is higher", "for non European citizens and particularily, women"], 
      "subtitle": [""],
    },
)

In [242]:
dom= [2022,2021]
rng= ['#636363','#F48518']

alt.Chart(up_tert).mark_point(filled=True, size=100).encode(
    alt.X("Rate", title="Unemployment rate (%)"),
    alt.Y('Type:N', axis=alt.Axis(grid=True), title=None),
    alt.Color('Year:N', scale=alt.Scale(domain=dom, range=rng)),
    alt.Row('Citizenship:N', title="Citizenship")
).properties(
    width=600,
    height=100,
    title = "The unemployment rate among people with a tertiary educational attainment by citizenship and sex"
)

# Over-qualification rates by citizenship, sex and country
- https://ec.europa.eu/eurostat/databrowser/view/lfsa_urganedm__custom_8309847/default/table?lang=en
Percentage of people with a tertiary level of educational attainment who are employed in a low or medium skilled occupation
    - Downloaded age class 20-64
    - Years 2013 to 2022
    - Citizenship: EU27 countries (from 2020) except reporting country, Non-EU27 countries (from 2020) nor reporting country, Reporting country

In [245]:
overquali = pd.read_csv("over-quali.csv")

In [287]:
overquali_total = overquali[overquali['Type'] == 'Total']
dom= ['Nationals ','EU27 citizens other than nationals', 'Non-EU27']
rng= ['#636363','#F48518','#8A54A5']
alt.Chart(overquali_total).mark_line(interpolate='monotone', point = True).encode(
    x="Year:O",
    y="Rate:Q",
    color = alt.Color('Citizenship', scale=alt.Scale(domain=dom, range=rng)),
).properties(
    width=600,
    height=300,
    title={
      "text": ["Non-EU citizens are the most over-qualified employed individuals in Europe"], 
      "subtitle": [""],
    },
)

In [285]:
overquali_22_non = overquali[(overquali['Year'] == 2022) & (overquali['Citizenship'] == 'Non-EU27')]

Unnamed: 0,Citizenship,Rate,Year,Type
27,Non-EU27,39.4,2022,Total
28,Non-EU27,35.9,2022,Males
29,Non-EU27,43.4,2022,Females


In [303]:
overquali_22 = overquali[(overquali['Year'] == 2022)]
dom= ['Nationals ','EU27 citizens other than nationals', 'Non-EU27']
rng= ['#636363','#F48518','#8A54A5']
alt.Chart(overquali_22).mark_bar(cornerRadiusTopLeft=3,
    cornerRadiusTopRight=3).encode(
    x= alt.X('Type', title=None),
    y="Rate:Q",
    color = alt.Color('Citizenship', scale=alt.Scale(domain=dom, range=rng), legend = None),
    column=alt.Column('Citizenship', title=None) 
).properties(
    width=150,
    height=300,
    title={
      "text": ["Women have the highest over-qualification rate"], 
      "subtitle": [""],
    },
)

In [392]:
over_count = pd.read_csv("overquali_country.csv")
over_count.head()

Unnamed: 0,Citizenship,Country,Rate
0,EU27 citizens other than nationals,European Union,31.8
1,EU27 citizens other than nationals,Belgium,20.4
2,EU27 citizens other than nationals,Czechia,11.1
3,EU27 citizens other than nationals,Denmark,31.8
4,EU27 citizens other than nationals,Germany,35.8


In [452]:
over_count_new = over_count[over_count['Rate'] != ":"]
dom= ['Nationals ','EU27 citizens other than nationals', 'Non-EU27']
rng= ['#636363','#F48518','#8A54A5']
alt.Chart(over_count_new).mark_circle().encode(
    x='Country',
    y='Citizenship',
    size='Rate:Q',
    color = alt.Color('Citizenship', scale=alt.Scale(domain=dom, range=rng), legend = None),

).properties(
    width=500,
    height=150,
    title={
      "text": ["Over-qualification rate by country in 2022"], 
      "subtitle": [""],
    },
)

In [449]:
over_count_non = over_count_new[over_count_new['Citizenship'] == "Non-EU27"]
alt.Chart(over_count_non).mark_bar().encode(
    x= alt.X("Rate:Q"),
    y= alt.Y("Country", sort='-x'),
    color = alt.Color("Rate", scale=alt.Scale(scheme='oranges'), legend=None),
).properties(
    width=500,
    height=500,
    title={
      "text": ["2022 Non-EU citizens over-qualification rates by country"], 
      "subtitle": [""],
    },
)

# Foreign-born population by main obstacle to get a suitable job, sex, country of birth and educational attainment level
- https://ec.europa.eu/eurostat/databrowser/view/lfso_21obst01/default/table?lang=en
    - Downloaded age class 20-64
    - Year 2021
    - Citizenship: EU27 countries (from 2020) except reporting country and Non-EU27 countries (from 2020) nor reporting country
    -Calculated the rate based on the total (subtracting the "No response")

In [313]:
obsta = pd.read_csv("obstacles.csv")

In [314]:
obsta_non = obsta[obsta['Citizenship'] == 'Non-EU27']
obsta_non.head()

Unnamed: 0,Citizenship,Type,Obstacle,Total,Rate
3,Non-EU27,Total,,13282.7,62.02
4,Non-EU27,Males,,7045.3,68.45
5,Non-EU27,Females,,6237.3,56.07
9,Non-EU27,Total,Never sought work or never worked,2266.2,10.58
10,Non-EU27,Males,Never sought work or never worked,479.7,4.66


In [348]:
dom= ['Faced obstacles', 'None', 'Never sought work or never worked']
rng= ['#FF7F00','#DADAEB','#9E9AC7']
alt.Chart(obsta_non).mark_bar().encode(
    x=alt.X('Rate:Q', title="%"),
    y=alt.Y('Type:O', title = None, sort=alt.Sort(custom_sort_order)),
    color=alt.Color('Obstacle', scale=alt.Scale(domain=dom, range=rng))
).properties(
    width=500,
    height=150,
    title={
      "text": ["The proportion of non-EU women who faced obstacles in", "getting a suitable job was slightly higher than the men"], 
      "subtitle": [""],
    },
)

In [366]:
type_obstac = pd.read_csv("type_obstac.csv")

In [367]:
type_obstac.head()

Unnamed: 0,Educational attainment,Obstacle,Total,Rate
0,Less than primary,Lack of language skills,549.7,31.07
1,Less than primary,Lack of recognition of qualifications,96.7,5.46
2,Less than primary,Citizenship or residence permit,172.9,9.77
3,Less than primary,Discrimination due to foreign origin,103.7,5.86
4,Less than primary,No suitable job available,182.7,10.32


In [383]:
alt.Chart(type_obstac).mark_bar().encode(
    x=alt.X('Educational attainment:O', title="%", axis=alt.Axis(labelAngle=360)),
    y=alt.Y('Rate', title = None, sort=alt.Sort(custom_sort_order)),
    color=alt.Color('Obstacle', scale=alt.Scale(scheme='set2'))
).properties(
    width=300,
    height=250,
    title={
      "text": ["For non EU citizens with higher education, the lack of recognition of qualifications" , "presents a more significant obstacle compared to those with lower education"], 
      "subtitle": [""],
    },
)

In [387]:
type_obstac_sex = pd.read_csv("Obstac_by_sex_noneu.csv")

In [391]:
alt.Chart(type_obstac_sex).mark_bar().encode(
    x=alt.X('Educational attainment:O', title="%", axis=alt.Axis(labelAngle=360)),
    y=alt.Y('Rate', title = None, sort=alt.Sort(custom_sort_order)),
    color=alt.Color('Obstacle', scale=alt.Scale(scheme='set3'))
).properties(
    width=300,
    height=250,
    title={
      "text": ["Non EU citizens who reportedfacing obstacles finding a suitable job"],
    },
)