In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import os
import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from ipywidgets import widgets, Layout
from IPython.display import display
from dash import Dash, dcc, html, Input, Output
import os
from threading import Timer
import webbrowser

In [3]:
import warnings
warnings.simplefilter(action="ignore",category=FutureWarning)

In [4]:
def open_browser():
    if not os.environ.get("WERKZEUG_RUN_MAIN"):
        webbrowser.open_new('http://127.0.0.1:8080/')

In [5]:
linkedin_job_postings=pd.read_csv("..\Data\linkedin_jobs_and_skills\linkedin_job_postings_cleaned.csv")

## Part 1

In [6]:
# Function to identify top hiring companies for a specific job position
def top_hiring_companies(jobs_df, top_n=9):
    
    # Count occurrences of each company
    company_counts = jobs_df['company'].value_counts().head(top_n)
    
    return company_counts

# Create initial plot
default_job_position = ''
initial_jobs_df = linkedin_job_postings[linkedin_job_postings['job_title'].str.contains(default_job_position.lower(), case=False)]
top_companies = top_hiring_companies(initial_jobs_df)

fig1 = go.FigureWidget(
    data=[
        go.Bar(
            y=top_companies.index, 
            x=top_companies.values,
            orientation='h',
        )
    ]
).update_layout(
    title=f"Top Companies hiring",
    yaxis_title="Company",
    xaxis_title="Number of Job Postings",
    yaxis=dict(autorange="reversed"),
    height=405,
)

# Function to update plot based on filtered job position
def update_fig1_plot(search_value, top_n):
    if search_value:
        filtered_jobs_df = linkedin_job_postings[linkedin_job_postings['job_title'].str.contains(search_value.lower(), case=False)]
        if len(filtered_jobs_df) > 0:
            top_companies = top_hiring_companies(filtered_jobs_df, top_n)
            fig1.data[0].y = [(i[:30] + '..') if len(i) > 30 else i for i in top_companies.index]
            fig1.data[0].x = top_companies.values
            fig1.update_layout(
                title=f"Top companies hiring for \'{search_value}\'",
                yaxis=dict(autorange="reversed"),
                height = 400 if (top_n < 9) else (45 * top_n),
            )
        else:
            fig1.data[0].y = []
            fig1.data[0].x = []
            fig1.update_layout(title=f"No matching job positions for {search_value}")
    else:
        top_companies = top_hiring_companies(linkedin_job_postings, top_n)
        fig1.data[0].y = [(i[:30] + '..') if len(i) > 30 else i for i in top_companies.index]
        fig1.data[0].x = top_companies.values
        fig1.update_layout(
            title=f"Top companies hiring",
            yaxis=dict(autorange="reversed"),
            height = 400 if (top_n < 9) else (45 * top_n),
        )

## Part 2

In [7]:
country_list = linkedin_job_postings['search_country'].unique().tolist()
country_list.sort()

linkedin_job_postings_by_country_dict = {}

for country in country_list:
    linkedin_job_postings_by_country_dict[country] = linkedin_job_postings[linkedin_job_postings['search_country']==country]

top = 10
top_cities_dict = {}

for country in country_list:
    top_cities_dict[country] = linkedin_job_postings_by_country_dict[country]['search_city'].value_counts().head(top)

fig_dict = {}

for country in country_list:
    total = top_cities_dict[country].values.sum()
    fig_dict[country] = go.Figure(
                            data=[
                                go.Pie(
                                    labels=top_cities_dict[country].index, 
                                    values=top_cities_dict[country].values,
                                    textinfo='label+value',
                                    insidetextorientation='horizontal',
                                    showlegend=False,
                                    pull=top_cities_dict[country].values / total,
                                )
                            ]
                        )
    fig_dict[country].update_layout(
        title_text=f"Top {top} cities with highest jobs in {country}",
        title_x=0.5,
        title_y=0.95,
    )

fig2 = make_subplots(
    rows=2, 
    cols=2, 
    subplot_titles=[f"Top {top} cities with highest jobs in {country}" for country in country_list],
    specs=[[{"type": "sunburst"}, {"type": "sunburst"}], [{"type": "sunburst"}, {"type": "sunburst"}]],
    horizontal_spacing=0.2,
    vertical_spacing=0.1,
)
for country in country_list:
    fig2.add_trace(fig_dict[country].data[0], row=country_list.index(country)//2+1, col=country_list.index(country)%2+1)

fig2.update_layout(
    height=1200, 
    # width=1000, 
    title_text="Top 10 cities with highest jobs in each country",
)

## Dash Code

In [9]:
app = Dash(
    meta_tags=[
        {'name': 'viewport', 'content': 'width=device-width, initial-scale=1.0'}
    ]
)

In [10]:
# fig1 Update
@app.callback(
    Output('bar-chart', 'figure'),
    Input('search-button', 'n_clicks'),
    Input('search-input', 'value'),
    Input('top-n-input', 'value'),
)
def update_bar_chart(n_clicks, search_value, top_n):
    if n_clicks:
        update_fig1_plot(search_value, top_n)
    return fig1

In [11]:
# Define app layout
app.layout = html.Div([
    # fig1 Layoout
    html.Div([
        dcc.Input(
            id='search-input',
            type='text',
            placeholder='Search for job titles...',
            debounce=True
        ),
        dcc.Input(
            id='top-n-input',
            type='number',
            placeholder='Top N Companies',
            min=1,
            max=20,
            value=9,
            debounce=True
        ),
        html.Button('Search', id='search-button')
    ]),
    html.Div([
        dcc.Graph(figure=fig1, id='bar-chart')
    ]),
    
    # fig2 Layout
    html.Div([
        dcc.Graph(figure=fig2)
    ])
])

In [12]:
Timer(1, open_browser).start()
app.run_server(debug=True, port=8080)