# 4 PLOTLY

In [1]:
import pandas as pd

df=pd.read_csv('jobs_in_data.csv')

# convert salary_in_usd to BRL
df['salary_in_brl'] = df['salary_in_usd']*5.5
# criei uma nova coluna com o salário mensal com no maximo 2 casas decimais
df['salary_in_brl_per_month'] = (df['salary_in_brl']/12).round(2)
# drop salary_in_usd, salary
df = df.drop(columns=['salary_in_usd', 'salary', 'salary_currency'])
# adicionar uma voluna numerica para representar experience_level
df['experience_level_num'] = df['experience_level'].map({'Entry-level':1, 'Mid-level': 2, 'Senior':3, 'Executive':4})

#  criar uma coluna com o nome dos continents baseado no country da coluna company_location
df['continent'] = df['company_location'].map({'United States': 'North America', 'Canada': 'North America', 'Brazil': 'South America', 'Germany': 'Europe', 'Netherlands': 'Europe', 'United Kingdom': 'Europe', 'Australia': 'Oceania', 'Singapore': 'Asia', 'India': 'Asia', 'South Africa': 'Africa', 'Kenya': 'Africa', 'Nigeria': 'Africa'})

df['years_of_experience'] = df['experience_level'].map({'Entry-level': 1, 'Mid-level': 3, 'Senior': 6, 'Executive': 10})
df['years_of_experience'] = df['years_of_experience'] + (2*df['years_of_experience'].sample(frac=1).reset_index(drop=True) - 1) 


df.head()

Unnamed: 0,work_year,job_title,job_category,employee_residence,experience_level,employment_type,work_setting,company_location,company_size,salary_in_brl,salary_in_brl_per_month,experience_level_num,continent,years_of_experience
0,2023,Data DevOps Engineer,Data Engineering,Germany,Mid-level,Full-time,Hybrid,Germany,L,522566.0,43547.17,2,Europe,14
1,2023,Data Architect,Data Architecture and Modeling,United States,Senior,Full-time,In-person,United States,M,1023000.0,85250.0,3,North America,11
2,2023,Data Architect,Data Architecture and Modeling,United States,Senior,Full-time,In-person,United States,M,449900.0,37491.67,3,North America,17
3,2023,Data Scientist,Data Science and Research,United States,Senior,Full-time,In-person,United States,M,1166000.0,97166.67,3,North America,17
4,2023,Data Scientist,Data Science and Research,United States,Senior,Full-time,In-person,United States,M,513150.0,42762.5,3,North America,17


In [2]:
import plotly.express as px
fig = px.scatter(df, x='continent', y='salary_in_brl', color='experience_level', size='experience_level_num', hover_name='job_title', title='Salário em BRL por continente e nível de experiência')
fig.show()


In [7]:
# criar um dataframe com a média de salário por continente e nível de experiência
df_mean = df.groupby(['continent', 'experience_level']).mean().reset_index()
df_mean.head()


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



Unnamed: 0,continent,experience_level,work_year,salary_in_brl,salary_in_brl_per_month,experience_level_num,years_of_experience
0,Africa,Entry-level,2023.0,220000.0,18333.333333,1.0,10.0
1,Africa,Mid-level,2021.75,310750.0,25895.835,2.0,12.5
2,Africa,Senior,2022.4,562083.5,46840.294,3.0,15.8
3,Asia,Entry-level,2022.2,168443.0,14036.916,1.0,10.8
4,Asia,Mid-level,2022.2,316177.4,26348.12,2.0,12.8


In [8]:
# crie um gráfico de barras com a média de salário por continente e nível de experiência
fig = px.bar(df_mean, x='continent', y='salary_in_brl_per_month', color='experience_level', title='Média de salário por continente e nível de experiência')
fig.show()

In [10]:
fig = px.histogram(df, x='experience_level', title='Distribuição de cargos')
fig.show()

# 5 HVPLOT

In [6]:
import hvplot.pandas

df.hvplot.scatter(x='years_of_experience', y='salary_in_brl', by='experience_level', title='Salário por anos de experiência')

# 6 PANEL

In [40]:
import panel as pn
pn.extension()

qtd_rows = pn.widgets.EditableIntSlider(name='Valores', start=0, end=df.shape[0], value=10)

In [42]:
def get_head(qtd_rows):
    return df.head(qtd_rows)

def show_histogram(event):
    fig = px.histogram(get_head(qtd_rows.value), x='experience_level', title='Distribuição de cargos')
    fig.show()

histogram = pn.bind(show_histogram, qtd_rows)

panel_layout = pn.Column(qtd_rows, histogram)
panel_layout.servable()




BokehModel(combine_events=True, render_bundle={'docs_json': {'b5cb6ccb-e500-4146-aacf-3525774b1936': {'version…

In [87]:

import panel as pn
import plotly.express as px
pn.extension('plotly')

options = df['company_location'].unique()
options.sort()
options = list(options)

company_location = pn.widgets.Select(name='Localização', options=options)

@pn.depends(company_location.param.value)
def show_mean_salary(event):
    df_filtered = df[df['company_location'] == company_location]
    # crie um scatter plot com a média de salário por nível de experiência
    fig = px.scatter(df_filtered, x='experience_level', y='salary_in_brl_per_month', title='Média de salário por nível de experiência')
    return fig

# mean_salary = pn.bind(company_location, show_mean_salary)

panel_layout = pn.Column(company_location, show_mean_salary)
panel_layout.servable()

BokehModel(combine_events=True, render_bundle={'docs_json': {'c160c919-7a58-4e47-a3da-5b5656cfe61e': {'version…

# Gráfico interativo

In [77]:
select = pn.widgets.Select(name='Localização', options=options)
select

dfi = df.interactive()
df_pipeline=(dfi[dfi['company_location'] == select])

df_pipeline.hvplot(kind='scatter', x='years_of_experience', y='salary_in_brl', by='experience_level', title='Salário por anos de experiência')


BokehModel(combine_events=True, render_bundle={'docs_json': {'6d7768ac-440c-4900-93cf-be4da4e575ec': {'version…