# Creating Visualization Prototypes

> Creating the visualization prototypes for our observations

In [None]:
#| hide
import warnings

In [None]:
#| hide
warnings.filterwarnings("ignore")

In [None]:
import altair as alt
import pandas as pd
import numpy as np

from dataviz_course.explore_data import prepare_dataset

In [None]:
data = prepare_dataset()
data.head()

Unnamed: 0,Work Year,Experience Level,Job Title,Salary (usd),Employee Residence,Company Location,On-site/Remote,Number of Employees,Working for a Foreign Company
0,2020,Mid-level / Intermediate,Data Scientist,79833,DE,DE,On-site,>250,False
1,2020,Senior-level / Expert,Machine Learning Scientist,260000,JP,JP,On-site,<50,False
2,2020,Senior-level / Expert,Big Data Engineer,109024,GB,GB,Hybrid,50-250,False
3,2020,Mid-level / Intermediate,Product Data Analyst,20000,HN,HN,On-site,<50,False
4,2020,Senior-level / Expert,Machine Learning Engineer,150000,US,US,Hybrid,>250,False


# Prototypes

In [None]:
#| hide
WIDTH = 500
HEIGHT = 300

In [None]:
#| hide

sort_max_salary = alt.EncodingSortField(field="Salary (usd)", op="max")
salary_scale = alt.Scale(domain=[data["Salary (usd)"].min(), data["Salary (usd)"].max()])

def get_selection_opacity(selection):
    return alt.condition(selection, alt.value(1), alt.value(.2))

## Company Location

In [None]:
alt.Chart(data).mark_circle().encode(
    y=alt.Y(field="Salary (usd)", type="quantitative"),
    x=alt.X(field="Company Location", type="nominal", sort=sort_max_salary),
    tooltip=["Salary (usd)", "Job Title", "Employee Residence", "Number of Employees"]
).properties(height=HEIGHT*1.2, width=WIDTH, title="Company Location vs Salary")

## Working for a Foreign Company

In [None]:
alt.Chart(data).mark_bar().encode(
    x=alt.X(field="Salary (usd)", type="quantitative", bin=alt.Bin(step=50000), scale=salary_scale),
    y="count()",
    tooltip=["count()"],
    color="Working for a Foreign Company"
).properties(width=WIDTH, height=HEIGHT, title="Salary for Domestic or Foreign Employment")

In [None]:
averages = data.groupby(['Employee Residence', 'Working for a Foreign Company'])['Salary (usd)'].mean().reset_index()
averages.head()

Unnamed: 0,Employee Residence,Working for a Foreign Company,Salary (usd)
0,AE,False,100000.0
1,AR,True,60000.0
2,AT,False,76738.666667
3,AU,False,108042.666667
4,BE,False,85699.0


In [None]:
chart = alt.Chart(averages).mark_line(point=True).encode(
    x=alt.X('Working for a Foreign Company', type="nominal"),
    y=alt.Y('Salary (usd)', type="quantitative"),
    color='Employee Residence',  # Connect points based on Employee Residence
    order='Employee Residence',  # Sort the lines based on Employee Residence
    tooltip=["Salary (usd)", "Employee Residence"]
).properties(
    title='Average Salary Comparison (Domestic vs. Foreign)',
    width=WIDTH,
    height=HEIGHT
)
chart


## Company Size and Experience

In [None]:
selection = alt.selection(type="multi", fields=["Number of Employees"])

scatter = alt.Chart(data).mark_circle().encode(
    x=alt.X(field="Number of Employees", type="nominal", sort=alt.EncodingSortField(field="Salary (usd)", op="max")),
    y=alt.Y(field="Salary (usd)", type="quantitative"),
    color="Experience Level",
    tooltip=["Job Title", "Employee Residence", "Salary (usd)"],
    opacity=get_selection_opacity(selection)
).add_selection(selection).properties(width=WIDTH*0.4, height=HEIGHT, title="Salary based on Company Size")

histogram = alt.Chart(data).mark_bar().encode(
    x=alt.X(field="Salary (usd)", type="quantitative", bin=alt.Bin(step=50000), scale=salary_scale),
    y="count()",
    color="Experience Level",
    tooltip=["count()"]
).transform_filter(selection).properties(width=WIDTH*0.5, height=HEIGHT, title="Salary based on Experience Level").interactive()

scatter | histogram

## Remote Work

In [None]:
salaries_chart = alt.Chart(data).mark_circle().encode(
    x=alt.X(field="On-site/Remote", type="nominal"),
    y="Salary (usd)",
    tooltip=["Salary (usd)", "Job Title", "Employee Residence", "Company Location"]
).properties(width=WIDTH, height=HEIGHT)

average_salaries_chart = alt.Chart(data).mark_line(color="black").encode(
    x=alt.X(field="On-site/Remote", type="nominal"),
    y=alt.Y("Salary (usd)", type="quantitative", aggregate="mean", axis=alt.Axis(title="Salary (usd)")),
).properties(width=WIDTH, height=HEIGHT, title="Salaries for Different Work Types")

salaries_chart + average_salaries_chart

## Yearly Changes

In [None]:
salaries_chart = alt.Chart(data).mark_circle().encode(
    x="Work Year",
    y="Salary (usd)",
    tooltip=["Job Title", "Employee Residence", "Company Location"]
).properties(width=WIDTH, height=HEIGHT)

average_salaries_chart = alt.Chart(data).mark_line(color="black").encode(
    x=alt.X(field="Work Year", type="nominal"),
    y=alt.Y("Salary (usd)", type="quantitative", aggregate="mean", axis=alt.Axis(title="Salary (usd)")),
).properties(width=WIDTH, height=HEIGHT, title="Salaries in each Year")

salaries_chart + average_salaries_chart