In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')


In [35]:
df = pd.read_csv('../../data/job_data.csv')

In [13]:
df.head(10)

Unnamed: 0.1,Unnamed: 0,title,company_name,location,via,description,schedule_type,salary,query,qualifications,responsibilities,benefits,degree,experience,remote
0,0,Ethereum Blockchain Developer (Remote),Ex Populus,Anywhere,Built In,Company Overview:\nEx Populus is a cutting-edg...,Full-time,,blockchain,['2-3 years of Software Development experience...,"['Design, maintain and deploy smart contracts ...",,,2.5,True
1,1,Blockchain Engineer,21.co,"New York, NY",Greenhouse,We are seeking a highly motivated and skilled ...,Full-time,180000.0,blockchain,"[""Bachelor's or Master's degree in Computer Sc...","['As a Blockchain Engineer, you will be respon...",['(NYC only) Pursuant to Section 8-102 of titl...,Master's,,False
2,2,Blockchain Course Instructor,Blockchain Institute of Technology,Anywhere,LinkedIn,"Are you a blockchain, cryptocurrency, NFT, Met...",Contractor,,blockchain,"['3+ years of experience in blockchain, crypto...",['Our expert technical team will provide the s...,,,3.0,True
3,3,Python based - Blockchain developer to join ex...,Upwork,Anywhere,Upwork,Need someone to join our existing team to spee...,Contractor,41600.0,blockchain,"['Candidates must be willing to sign, non-disc...",['Will discuss details with the selected candi...,,,,True
4,4,Blockchain DevOps Engineer (Remote),Telnyx,United States,Startup Jobs,"About Telnyx\n\nAt Telnyx, we’re architecting ...",Full-time,,blockchain,['You are a highly motivated and experienced B...,['To build a best-in-class Filecoin (FIL) Mini...,,Bachelor's,,True
5,5,Ethereum Developer Remote US (Blockchain-DeFi),Turnblock.io,Anywhere,ZipRecruiter,Our client is on crypto’s cutting edge technol...,Full-time,150000.0,blockchain,"['5+ years of development experience', 'Experi...","['As a member of an agile engineering team, yo...",['Great compensation package'],,5.0,True
6,6,Social Media Marketing (Blockchain/Crypto),Bitquery,Anywhere,AngelList,• Maintain a social media calendar\n• Build Bi...,Full-time,,blockchain,['English Proficiency (Written and Speaking) w...,['Put our Bitquery updates and educational con...,['Opportunity to work & collaborate with a tru...,,,True
7,7,Senior Software Engineer - Blockchain Network ...,Jobot,Anywhere,Dice,"competitive salaries, stock options, company p...",Full-time,175000.0,blockchain,['At least 5 years of experience with designin...,"['Rotating breakfast menu served daily', 'Dinn...","['Salary: $150,000 - $200,000 per year', 'Our ...",,5.0,True
8,8,Blockchain Developer,Atechstar,United States,OPTnation,Requirements: Strong software development back...,Full-time,141000.0,blockchain,['Requirements: Strong software development ba...,,,,,True
9,9,DeFi Blockchain Co-Founder,Cryptops Exchange,Anywhere,LinkedIn,Cryptops is currently looking for a Co-Founder...,Full-time,,blockchain,['Can personally invest or immediately support...,['Other decentralized derivatives exchanges re...,,,,True


In [76]:

# Group the data by 'remote'
grouped_df = df.groupby('remote')

# Create the figure object
fig = go.Figure()

# Add the box traces for the remote groups
for remote, group_df in grouped_df:
    fig.add_trace(go.Box(
        y=group_df.loc[group_df['remote'] == remote, 'salary'],
        name=str(remote),
        marker_color='indianred' if remote else 'lightseagreen'))

# Create the dropdown menu options
dropdown_options = [
    {'label': 'Remote', 'value': 'remote'},
    {'label': "Bachelor's", 'value': "Bachelor's"},
    {'label': "Master's", 'value': "Master's"},
    {'label': 'PhD', 'value': 'PhD'}
]

# Define the update menu
updatemenu = go.layout.Updatemenu(
    buttons=[
        dict(label=option['label'],
             method='update',
             args=[{'y': [df.loc[(df['remote'] == True) & (df['degree'] == option['value']), 'salary'],
                           df.loc[(df['remote'] == False) & (df['degree'] == option['value']), 'salary']]},
                   {'yaxis': {'title': 'Salaries for {} Holders'.format(option['value'])}}])
        for option in dropdown_options[1:]
    ],
    direction='down',
    showactive=True,
    x=1,
    y=1.2
)

# Add the updatemenu to the layout
fig.update_layout(updatemenus=[updatemenu])

# Define the layout
fig.update_layout(
    title='Salaries by Remote Work Status, for Different Degree Holders',
    xaxis=dict(title='Work Remotely?'),
    yaxis=dict(title='Salaries for Bachelor\'s Holders'),
    template="plotly_dark"
)

# Show the figure
fig.show()


In [43]:
df['query'].value_counts()

blockchain                      90
natural language processing     90
big data and cloud computing    90
data analyst                    90
machine learning                90
reinforcement learning          90
neural networks                 90
deep learning                   90
data scientist                  90
time series analysis             7
time series                      6
Name: query, dtype: int64

In [56]:
def capitalize_words_except_and(s):
    return ' '.join(word if word == 'and' else word.title() for word in s.split())

# Apply the function to the 'query' column
df['query'] = df['query'].apply(capitalize_words_except_and)

In [75]:
# Count the number of jobs for each query
query_counts = df['query'].value_counts()

# Create the pie chart trace
pie_trace = go.Pie(
    labels=query_counts.index.tolist(),
    values=query_counts.tolist(),
    hovertemplate='Query: %{label}<br>Count: %{value}',
    name = ""
)

# Create the layout for the pie chart
layout = go.Layout(
    title='Percentage of Job Results, by Query',
    width = 800,
    template="plotly_dark"
)

# Create the figure object
fig = go.Figure(data=[pie_trace], layout=layout)

# Facet the pie chart by the 'query' column
#fig.for_each_trace(lambda trace: trace.update(title=''))

# Display the figure
fig.show()


In [77]:
# group the data by query and degree, and count the number of jobs
df_grouped = df.groupby(['query', 'degree']).size().reset_index(name='count')

# define the colors for each degree
colors = {'Bachelor\'s': 'rgb(31, 119, 180)', 'Master\'s': 'rgb(255, 127, 14)', 'PhD': 'rgb(44, 160, 44)'}

# create a trace for each degree
traces = []
for degree in df_grouped['degree'].unique():
    trace = go.Bar(
        x=df_grouped[df_grouped['degree'] == degree]['query'],
        y=df_grouped[df_grouped['degree'] == degree]['count'],
        name=degree,
        marker=dict(color=colors[degree]),
        hovertemplate='Query: %{x} '+ '<br>Count: %{y}'
    )
    traces.append(trace)

# set the layout for the chart
layout = go.Layout(
    title='Number of Job Results, by Query and Degree',
    barmode='group',
    template="plotly_dark",
    xaxis=dict(title='Query'),
    yaxis=dict(title='Count')
)

# create the figure object and plot it
fig = go.Figure(data=traces, layout=layout)
fig.show()
