In [1]:

# visit this link for the video https://youtu.be/1Z4yed3vPnE


In [2]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output

# loading Data
q1 = pd.read_csv("question_1_data.csv")
q2 = pd.read_csv("question_2_data.csv")
q3 = pd.read_csv("question_3_data.csv")
q4 = pd.read_csv("question_4_data.csv")



In [3]:
# normalizing the population data from q1
population = q1[
    (q1["Gender (3)"] == "Total - Gender") &
    (q1["Occupation - Unit group - National Occupational Classification (NOC) 2021 (821A)"] == "All occupations")
].groupby("GEO")["VALUE"].sum().to_dict()


essential_map = {
    "31301 Registered nurses and registered psychiatric nurses": "Nurses",
    "42100 Police officers (except commissioned)": "Police officers",
    "42101 Firefighters": "Firefighters"
}
q1_filtered = q1[q1["Occupation - Unit group - National Occupational Classification (NOC) 2021 (821A)"].isin(essential_map.keys())].copy()
q1_filtered["Occupation"] = q1_filtered["Occupation - Unit group - National Occupational Classification (NOC) 2021 (821A)"].map(essential_map)
q1_filtered["Per100k"] = q1_filtered.apply(lambda row: row["VALUE"] / population.get(row["GEO"], 1) * 100000, axis=1)


q2["Per100k"] = q2.apply(lambda row: row["VALUE"] / population.get(row["GEO"], 1) * 100000, axis=1)


q3["Per100k"] = q3.apply(lambda row: row["VALUE"] / population.get(row["GEO"], 1) * 100000, axis=1)


q4["Per100k"] = q4.apply(lambda row: row["VALUE"] / population.get(row["GEO"], 1) * 100000, axis=1)

In [4]:
# App
app = Dash(__name__)
app.title = "Canadian Employment Dashboard"

# App Layout
app.layout = html.Div([
    html.H1("Employment Statistics Dashboard - Canada", style={'textAlign': 'center'}),

    html.H2("1. Essential Services Distribution (Per 100k People)"),
    dcc.Dropdown(id='q1_occupation_selector',
                 options=[{'label': o, 'value': o} for o in ['Nurses', 'Police officers', 'Firefighters']],
                 value='Nurses'),
    dcc.Graph(id='q1_graph'),

    html.H2("2. Gender-Based Employment by Major NOC Group (Per 100k)"),
    dcc.Dropdown(id='q2_province',
                 options=[{'label': g, 'value': g} for g in sorted(q2['GEO'].unique())],
                 value='Canada'),
    dcc.Graph(id='q2_graph'),

    html.H2("3. EV Factory Engineer Availability (Per 100k People)"),
    dcc.Graph(id='q3_graph'),

    html.H2("4. Insight: Education Level Breakdown (Bachelor's vs Master's in Health Occupations)"),
    dcc.Graph(id='q4_graph')
], style={
    'backgroundColor': 'white',
    'color': 'black',
    'padding': '20px',
    'fontFamily': 'Calibri' 
})

# Q1 Callback
@app.callback(
    Output('q1_graph', 'figure'),
    Input('q1_occupation_selector', 'value')
)
def update_q1_graph(occupation):
    df = q1_filtered[q1_filtered['Occupation'] == occupation]
    df_grouped = df[df['Gender (3)'] == 'Total - Gender']
    fig = px.bar(
        df_grouped, 
        x='GEO', 
        y='Per100k', 
        color='GEO',
        color_discrete_sequence=px.colors.qualitative.Set1,
        title=f'{occupation} per 100k Residents by Province',
        labels={"Per100k": "Per 100,000 Residents"}
    )
    fig.update_layout(font=dict(family='Calibri')) 


    return fig

# Q2 Callback
@app.callback(
    Output('q2_graph', 'figure'),
    Input('q2_province', 'value')
)
def update_q2_graph(province):
    df = q2[(q2['GEO'] == province) & (q2["Gender (3)"].isin(["Men+", "Women+"]))]
    df['NOC_Major'] = df['Occupation - Unit group - National Occupational Classification (NOC) 2021 (821A)'].str.extract(r'^\d?\s?([^\(]+)')

    fig = px.bar(
        df,
        x='NOC_Major',
        y='Per100k',
        color='Gender (3)',
        color_discrete_sequence=px.colors.qualitative.Set2,
        barmode='group',
        title=f"Gender Distribution by NOC Major Group in {province}",
        labels={"Per100k": "Per 100,000 Residents", "NOC_Major": "Occupation Group"}
    )

    fig.update_layout(
        xaxis_tickangle=-20,
        xaxis_tickfont=dict(size=10),
        margin=dict(b=150),
        width=1100,
        font=dict(family='Calibri')  
    )

    return fig

# Q3 Callback
@app.callback(
    Output('q3_graph', 'figure'),
    Input('q2_province', 'value') )

def update_q3_graph(_):
    fig = px.bar(
        q3, 
        x='GEO', 
        y='Per100k',
        color='Occupation - Unit group - National Occupational Classification (NOC) 2021 (821A)',
        color_discrete_sequence=px.colors.qualitative.Set3,
        title='Engineering Workforce Availability by Province (Per 100k)',
        labels={"Per100k": "Per 100,000 Residents"}
    )
    fig.update_layout(font=dict(family='Calibri'))  
    return fig

# Q4 callback
@app.callback(
    Output('q4_graph', 'figure'),
    Input('q2_province', 'value') 
)
def update_q4_graph(_):
    df = q4[q4['Highest certificate, diploma or degree (16)'].isin([
        "Bachelor's degree", "Master's degree"
    ])].copy()

    fig = px.bar(
        df,
        x='GEO',
        y='Per100k',
        color='Highest certificate, diploma or degree (16)',
        color_discrete_sequence=px.colors.qualitative.Bold,
        barmode='stack',
        title="Education Level Comparison in Health Occupations by Province",
        labels={"Per100k": "Per 100,000 Residents", "GEO": "Province"}
    )

    fig.update_traces(marker_pattern_shape=None)  
    fig.update_layout(
        legend_title_text="Education Level",
        xaxis_tickangle=-30,
        font=dict(family='Calibri') 
    )

    return fig


In [5]:
# Run App
if __name__ == '__main__':
    app.run(debug=True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

