# Install dependencies if required

In [1]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


# Import Packages

In [2]:
# Import packages
import os
import re
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from dotenv import load_dotenv
from matplotlib.lines import Line2D
from openai import OpenAI
from tqdm import tqdm

load_dotenv()

if not os.getenv("OPENAI_API_KEY"):
    raise Exception("OPENAI_API_KEY was not found. Add it to your .env file")

# **Remember to add you API key to `.env`**


In [3]:


client = OpenAI(
)

# **GENERATE AI EXPOSURE SCORES AND ANALYSIS**

# **Load in Data for Exposure Score Analysis**

---



In [10]:
# Column names are role, task and grade

df = pd.read_csv("example_data/task_data.csv")

# Check the column names are present
required_columns=['role', 'task', 'grade']
if not df.columns.isin(required_columns).all():
    raise Exception({f"Data should have columns {required_columns}, got {df.columns}"})

Exception: {"Data should have columns ['role', 'task', 'gade'], got Index(['role', 'task', 'grade'], dtype='object')"}

# **Exposure Score Generator**

In [None]:
# Define a function to assign exposure score to each task
def task_label(task, role):
  response = client.chat.completions.create(
  model="gpt-4-turbo",
  messages=[
    {
      "role": "system",
      "content": "You are a skills and AI specialist. " + "You will provide a score of potential automation with GPT technology for a given task. Follow instructions closely."
    },
    {
      "role": "user",
      "content": "Look at this job task: " + str(task) + "It is related to the job role: " + str(role) + "Provide a score of potential automation of this task with GPT technology, given that the job is located in a high[low] income country: " + "The score should range 0-1. Provide a score in one line (Score:), and a justification in next line (Justification:). Do not provide any other commentary, only the score and justification. " + "Do not give any ranges just one score for each task."
    }
  ],
  temperature=0.01,
  max_tokens=500,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0
)
  return response.choices[0].message.content

In [None]:
# Progress tracker
tqdm.pandas()

# Apply the function with a progress bar
df['output'] = df.progress_apply(lambda row: task_label(row['task'], row['role']), axis=1)

In [None]:
# Split the results and create new columns for exposure score and justification
split_columns = df['output'].str.split('Justification:', n=1, expand=True)
df['exposure_score'] = split_columns[0].str.replace('Score: ', '').str.strip()
df['justification'] = split_columns[1].str.strip()

# Converting exposure score to numeric
df['exposure_score'] = pd.to_numeric(df['exposure_score'])

# Dropping the output column
df.drop(columns=['output'], inplace=True)

# **Calculate averages for each role**

In [None]:
# Create a dataframe which stores the mean and standard deviation of exposure score for each role
role_df = df.groupby('role')['exposure_score'].agg(['mean', 'std']).reset_index()
role_df.columns = ['role', 'mean_exposure_score', 'std_exposure_score']

# Round the values to two decimal places
role_df['mean_exposure_score'] = role_df['mean_exposure_score'].round(2)
role_df['std_exposure_score'] = role_df['std_exposure_score'].round(2)

role_df

# **Distribution of Exposure Scores across roles**

In [None]:
# Create density plot to show distribution of mean exposure score for all roles
plt.figure(figsize=(8, 6))
sns.kdeplot(role_df['mean_exposure_score'], color='black', linewidth=3)
plt.xlabel('Mean Automation Exposure Score', fontweight='bold')
plt.ylabel('Density', fontweight='bold')
plt.grid(False)  # Disable grid lines
plt.xlim(0, 1)  # Setting x-axis limits from 0 to 1
plt.xticks(np.arange(0, 1.1, 0.25))  # Setting x-axis ticks with intervals of 0.25
plt.title("Level of Exposure to AI for CDDO Roles", fontsize=14, fontweight='bold')

# Shading the areas for each exposure category
plt.axvspan(0, 0.25, color='white', alpha=1)
plt.text(0.125, 0.8, 'Very Low Exposure', horizontalalignment='center', verticalalignment='bottom', fontsize=11, color='black', bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.25'))

plt.axvspan(0.25, 0.5, color='#B8D4FF', alpha=1)
plt.text(0.375, 0.8, 'Low Exposure', horizontalalignment='center', verticalalignment='bottom', fontsize=11, color='black', bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.25'))

plt.axvspan(0.5, 0.75, color='#8DA9DF', alpha=1)
plt.text(0.625, 0.8, 'Medium Exposure', horizontalalignment='center', verticalalignment='bottom', fontsize=11, color='black', bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.25'))

plt.axvspan(0.75, 1, color='#6381B4', alpha=1)
plt.text(0.875, 0.8, 'High Exposure', horizontalalignment='center', verticalalignment='bottom', fontsize=11, color='black', bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.25'))

# Adding dotted vertical lines at the category boundaries
for x in [0.25, 0.5, 0.75]:
    plt.axvline(x, color='black', linestyle='dotted', linewidth=1.5)

plt.show()

# **Potential for Automation and Augumentation**

In [None]:
# Define a function to determine automation or augmentation category
def determine_category(row):
    if row['mean_exposure_score'] < 0.4 and (row['std_exposure_score'] + row['mean_exposure_score']) > 0.5:
        return 'Augmentation Potential'
    elif row['mean_exposure_score'] > 0.6 and (row['mean_exposure_score'] - row['std_exposure_score']) > 0.5:
        return 'Automation Potential'
    elif row['mean_exposure_score'] > 0.35 and row['std_exposure_score'] > 0.1:
        return 'Augmentation/Automation Potential'
    else:
        return 'Low Potential'

# Apply the function to create a new column
role_df['category'] = role_df.apply(determine_category, axis=1)

# Define markers and colors based on category
markers = role_df['category'].apply(lambda x: 's' if x == 'Augmentation Potential' else ('x' if x == 'Automation Potential' else ('^' if x == 'Augmentation/Automation Potential' else 'o')))
colors = role_df['category'].apply(lambda x: 'blue' if x == 'Augmentation Potential' else ('red' if x == 'Automation Potential' else ('green' if x == 'Augmentation/Automation Potential' else 'black')))
sizes = role_df['category'].apply(lambda x: 30 if x == 'Augmentation Potential' else (50 if x == 'Automation Potential' else (40 if x == 'Augmentation/Automation Potential' else 20)))

# Create the scatter plot
plt.figure(figsize=(8, 6))

# Plot each category separately to apply different markers
categories = role_df['category'].unique()
for category in categories:
    subset = role_df[role_df['category'] == category]
    marker = 's' if category == 'Augmentation Potential' else ('x' if category == 'Automation Potential' else ('^' if category == 'Augmentation/Automation Potential' else 'o'))
    color = 'blue' if category == 'Augmentation Potential' else ('red' if category == 'Automation Potential' else ('green' if category == 'Augmentation/Automation Potential' else 'black'))
    size = 30 if category == 'Augmentation Potential' else (50 if category == 'Automation Potential' else (40 if category == 'Augmentation/Automation Potential' else 20))
    plt.scatter(subset['mean_exposure_score'], subset['std_exposure_score'], c=color, marker=marker, s=size, label=category)

# Customize axis labels
plt.xlabel('Mean Exposure Score', fontweight='bold')
plt.ylabel('Standard Deviation', fontweight='bold')

# Add legend with a customized title
plt.legend(title='Category Type')

# Display the plot
plt.show()

# **Exposure score by grade**

In [None]:
# If grade includes Fast Stream, replace these grades with HEO equivalent
df['grade'] = df['grade'].replace('Fast Stream', 'HEO')

# Create a dataframe of the mean of exposure score for each grade
grade_df = df.groupby('grade')['exposure_score'].mean().reset_index().round({'score': 2})

In [None]:
# Define the specific order for the grades
grade_order = ['EO', 'HEO', 'SEO', 'G7', 'G6', 'SCS1']

# Convert the grade column to a categorical type with the specified order
grade_df['grade'] = pd.Categorical(grade_df['grade'], categories=grade_order, ordered=True)

# Sort the DataFrame based on the categorical order
grade_df = grade_df.sort_values('grade').reset_index(drop=True)

# Create bar chart with bold axis labels
plt.figure(figsize=(6, 6))
bar_width = 0.5
bars = plt.bar(grade_df['grade'], grade_df['exposure_score'], color='#073763', width=bar_width)

# Remove grid lines
plt.grid(False)

# Labels with bold font
plt.xlabel('Category', fontweight='bold')
plt.ylabel('Average Automation Score', fontweight='bold')

# Add data labels with specified styles and shifted up slightly
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 0.01, round(yval, 2),
             ha='center', va='bottom', fontsize=11, color='black',
             bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.25'))

# Adjust y-axis limits to provide space for the labels
plt.ylim(0, max(grade_df['exposure_score']) + 0.1)

# Display the chart
plt.show()

# **Combine task data for each role**

In [None]:
# Combine rows with the same role
grouped_df = df.groupby('role').agg({
    'task': ' '.join,
    'grade': 'first',
    'exposure_score': 'mean'
}).reset_index()

# Round the exposure_score to 2 decimal places
grouped_df['exposure_score'] = grouped_df['exposure_score'].round(2)

# **SKILLS IMPORTANCE ANALYSIS**

# **Load in skills data**

In [None]:
# Column names are role and skills

skills_df = pd.read_csv("example_data/skills_data.csv")
required_columns=['role', "skills"]
if not skills_df.columns.isin(required_columns).all():
    raise Exception({f"Data should have columns {required_columns}, got {skills_df.columns}"})

In [None]:
# Merge exposure score analysis with skills data
grouped_df = pd.merge(grouped_df, skills_df, on='role', how='left')

# **Skills Importance Score Generator**

In [None]:
# Define function to assess the importance of each skill using LLM
def get_importance_score(task, role, skills):
    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a skills and AI specialist. You will provide a score of importance of different skills. Follow instructions closely."
                ),
            },
            {
                "role": "user",
                "content": (
                    "Look at this set of job tasks: " + str(task) +
                    " Also look at this at list of tasks required to complete the job: " + str(skills) +
                    " These are related to the job role: " + str(role) +
                    " Use this information to assess the importance of each of the thirty-five skills listed below. "
                    "Assign a score between 0 and 1 to represent the importance of each skill, using the following scale:\n"
                    "0.0: Completely Unimportant The skill is entirely irrelevant to the job and not needed at all.\n"
                    "0.1: Negligible Importance The skill is very rarely used or needed in the job. It is almost never relevant.\n"
                    "0.2: Low Importance The skill is occasionally required but is not a significant part of the job. It is useful but not critical.\n"
                    "0.3: Somewhat Important The skill is needed from time to time. It is somewhat relevant to the job but not a major component.\n"
                    "0.4: Moderately Important The skill is regularly used and is a moderate part of the job. It contributes to the job's effectiveness.\n"
                    "0.5: Important The skill is important and frequently used. It plays a significant role in performing the job effectively.\n"
                    "0.6: Quite Important The skill is very important and used often. It is integral to many aspects of the job.\n"
                    "0.7: Very Important The skill is crucial and used regularly. It is essential for successful job performance.\n"
                    "0.8: Highly Important The skill is highly important and almost indispensable. It is used extensively and critical to job success.\n"
                    "0.9: Extremely Important The skill is extremely important and nearly always required. It is vital for almost all job functions.\n"
                    "1.0: Essential The skill is absolutely essential to the job. It is required for all core functions and without it, the job cannot be performed effectively.\n\n"
                    "Each skill has a brief description provided. Return only the name of each skill and the assigned score for each skill. Ensure you score all thirty-five skills. Return nothing else.\n\n"
                    "Instructing - Teaching others how to do something.\n"
                    "Service orientation - Actively looking for ways to help people.\n"
                    "Monitoring - Assessing performance of yourself, others, or organizations to improve or take corrective action.\n"
                    "Social perceptiveness - Being aware of others' reactions and understanding why they react as they do.\n"
                    "Coordination - Organizing and synchronizing activities and efforts to achieve a common goal efficiently.\n"
                    "Negotiation - Bringing others together and trying to reconcile differences.\n"
                    "Persuasion - Convincing others to change their minds or behavior.\n"
                    "Critical thinking - Using logic and reasoning to identify strengths and weaknesses of alternative solutions or approaches.\n"
                    "Writing - Communicating effectively in writing for the audience's needs.\n"
                    "Speaking - Conveying information effectively through speech.\n"
                    "Reading comprehension - Understanding written sentences and paragraphs in work-related documents.\n"
                    "Active listening - Fully focusing on what others are saying, understanding points being made, asking questions appropriately, and not interrupting.\n"
                    "Active learning - Grasping the implications of new information for current and future problem-solving and decision-making.\n"
                    "Learning strategies - Selecting and using training/instructional methods appropriate for the situation when learning or teaching.\n"
                    "Judgment and Decision making - Evaluating the relative costs and benefits of potential actions to choose the most appropriate one.\n"
                    "Science - Using scientific rules and methods to solve problems.\n"
                    "Mathematics - Using mathematics to solve problems.\n"
                    "Programming - Writing computer programs for various purposes.\n"
                    "Complex problem solving - Identifying complex problems and reviewing related information to develop and evaluate options and implement solutions.\n"
                    "Systems analysis - Determining how a system should work and how changes in conditions, operations, and the environment will affect outcomes.\n"
                    "Systems evaluation - Identifying measures or indicators of system performance and actions needed to improve or correct performance.\n"
                    "Operations analysis - Analyzing needs and product requirements to create a design.\n"
                    "Technology design - Generating or adapting equipment and technology to meet user needs.\n"
                    "Management of personnel resources - Motivating, developing, and directing people as they work, identifying the best people for the job.\n"
                    "Management of financial resources - Determining how money will be spent to get the work done, and accounting for these expenditures.\n"
                    "Management of material resources - Obtaining and seeing to the appropriate use of equipment, facilities, and materials needed to do certain work.\n"
                    "Time management - Managing one's own time and the time of others.\n"
                    "Troubleshooting - Determining causes of operating errors and deciding what to do about them.\n"
                    "Equipment selection - Determining the kind of tools and equipment needed to do a job.\n"
                    "Equipment maintenance - Performing routine maintenance on equipment and determining when and what kind of maintenance is needed.\n"
                    "Repairing - Fixing machines or systems using the needed tools.\n"
                    "Installation - Installing equipment, machines, wiring, or programs to meet specifications.\n"
                    "Operation monitoring - Watching gauges, dials, or other indicators to ensure a machine is working properly.\n"
                    "Quality control analysis - Conducting tests and inspections of products, services, or processes to evaluate quality or performance.\n"
                    "Operation and control - Controlling operations of equipment or systems."
                ),
            },
        ],
        temperature=0.01,
        max_tokens=500,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )

    return response.choices[0].message.content

In [None]:
# Define function to apply the LLM analysis to each skill
def importance_score(row):
    raw_response = get_importance_score(row['task'], row['role'], row['skills'])
    scores = {}
    for line in raw_response.split('\n'):
        if line:
            skill, score = line.split(' - ')
            skill = skill.strip().lower().replace(' ', '_').replace('-', '_')
            scores[skill] = float(score.strip())
    return pd.Series(scores)

In [None]:
# Define the skills list
skills_list = [
    "Instructing", "Service orientation", "Monitoring", "Social perceptiveness", "Coordination",
    "Negotiation", "Persuasion", "Critical thinking", "Writing", "Speaking", "Reading comprehension",
    "Active listening", "Active learning", "Learning strategies", "Judgment and Decision making",
    "Science", "Mathematics", "Programming", "Complex problem solving", "Systems analysis",
    "Systems evaluation", "Operations analysis", "Technology design", "Management of personnel resources",
    "Management of financial resources", "Management of material resources", "Time management",
    "Troubleshooting", "Equipment selection", "Equipment maintenance", "Repairing", "Installation",
    "Operation monitoring", "Quality control analysis", "Operation and control"
]

# Enable progress_apply
tqdm.pandas()

# Apply the function and create new columns for each skill
importance_scores_df = grouped_df.progress_apply(importance_score, axis=1)

# Join the scores with the original DataFrame
results_df = grouped_df.join(importance_scores_df)

100%|██████████| 2/2 [00:39<00:00, 19.54s/it]


# **Calculate Average Importance Score per Job Role**

In [None]:
skills_columns = [
    'instructing', 'service_orientation', 'monitoring', 'social_perceptiveness', 'coordination',
    'negotiation', 'persuasion', 'critical_thinking', 'writing', 'speaking', 'reading_comprehension',
    'active_listening', 'active_learning', 'learning_strategies', 'judgment_and_decision_making',
    'science', 'mathematics', 'programming', 'complex_problem_solving', 'systems_analysis',
    'systems_evaluation', 'operations_analysis', 'technology_design', 'management_of_personnel_resources',
    'management_of_financial_resources', 'management_of_material_resources', 'time_management',
    'troubleshooting', 'equipment_selection', 'equipment_maintenance', 'repairing', 'installation',
    'operation_monitoring', 'quality_control_analysis', 'operation_and_control'
]

# Categorise skills into different skill groups
skill_categories = {
    'social_skills': [
        'instructing', 'service_orientation', 'monitoring', 'social_perceptiveness', 'coordination',
        'negotiation', 'persuasion'
    ],
    'fundamental_skills': [
        'critical_thinking', 'writing', 'speaking', 'reading_comprehension',
        'active_listening', 'active_learning', 'learning_strategies', 'judgment_and_decision_making'
    ],
    'analytical_skills': [
        'science', 'mathematics', 'programming', 'complex_problem_solving', 'systems_analysis',
        'systems_evaluation', 'operations_analysis', 'technology_design'
    ],
    'managerial_skills': [
        'management_of_personnel_resources', 'management_of_financial_resources',
        'management_of_material_resources', 'time_management'
    ],
    'mechanical_skills': [
        'troubleshooting', 'equipment_selection', 'equipment_maintenance', 'repairing',
        'installation', 'operation_monitoring', 'quality_control_analysis', 'operation_and_control'
    ]
}

# Convert the skill columns to numeric values
for col in skills_columns:
    results_df[col] = pd.to_numeric(results_df[col], errors='coerce')

In [None]:
# Calculate the average importance score across all 35 skills for each role
results_df['average_importance_score'] = results_df[skills_columns].mean(axis=1)

# Function to calculate the average score for each skill category
def calculate_average_skills(results_df, skill_list):
    return results_df[skill_list].mean(axis=1)

# Iterate over the skill categories and calculate the average scores
for category, skills in skill_categories.items():
    results_df[category + '_average'] = calculate_average_skills(results_df, skills)


# **Regressions for all 5 skill categories and grade**


In [None]:
# Create a numerical scale to order dataframes
results_df['grade_scale'] = results_df['grade'].map({'EO':1, 'HEO': 2, 'SEO': 3, 'G7': 4, 'G6': 5, 'SCS1': 6})

# Select the columns for the independent variables and ensure they are numeric
X = results_df[['analytical_skills_average', 'managerial_skills_average', 'mechanical_skills_average', 'social_skills_average', 'fundamental_skills_average', 'grade_scale']].astype(float)
y = results_df['exposure_score'].astype(float)

# Ensure X and y are numeric arrays
X = sm.add_constant(X.astype(float))

# Fit the OLS regression model
model = sm.OLS(y, X).fit()

# Store the regression results
regression_results = model

# Print the regression summary
print("\nRegression Summary for Combined Skill Categories:")
print(model.summary())


# **Graph showing statistically significant coefficients**


In [None]:
# Extracting the coefficients and p-values
coefficients = regression_results.params
p_values = regression_results.pvalues

# Set a significance level
significance_level = 0.05

# Filter the significant variables including the constant if significant
significant_vars = coefficients[p_values < significance_level]

# Create a dictionary to map original column names to desired display names
name_mapping = {
    'const': 'Constant',
    'analytical_skills_average': 'Analytical Skills',
    'managerial_skills_average': 'Managerial Skills',
    'mechanical_skills_average': 'Mechanical Skills',
    'social_skills_average': 'Social Skills',
    'fundamental_skills_average': 'Fundamental Skills',
    'grade_scale': 'Grade',
    'ddat_scale': 'DDaT'
}

# Rename the indices of the significant variables using the dictionary
significant_vars.index = [name_mapping.get(name, name) for name in significant_vars.index]

# Rename the indices of the significant variables using the dictionary
significant_vars.index = [name_mapping.get(name, name) for name in significant_vars.index]

# Create a bar chart for significant coefficients including the constant if significant
plt.figure(figsize=(12, 8))
bars = plt.bar(significant_vars.index, significant_vars.values, color='#073763')
plt.ylabel('Coefficient Value', fontsize=16)
plt.xticks(rotation=0, fontsize=16)  # Ensure x-axis labels are horizontal
plt.yticks(fontsize=16)  # Increase y-axis tick labels font size

# Add horizontal dotted line at y=0
plt.axhline(0, color='gray', linestyle='--')

# Extend y-axis limits
y_min = significant_vars.min() - 0.1
y_max = significant_vars.max() + 0.1
plt.ylim(y_min, y_max)

# Add data labels
for bar in bars:
    yval = bar.get_height()
    # Adjust label position based on the sign of yval
    if yval > 0:
        label_position = yval + 0.02
        va = 'bottom'
    else:
        label_position = yval - 0.02
        va = 'top'
    plt.annotate(f'{yval:.2f}',
                 xy=(bar.get_x() + bar.get_width() / 2, label_position),
                 xytext=(0, 0),  # No offset needed
                 textcoords="offset points",
                 ha='center', va=va, color='black', fontweight='bold', fontsize=16)

plt.tight_layout()
plt.show()

# **Average Importance Score across Job Grade**

In [None]:
# Calculate mean of skill importance for each grade, and order grades by seniority
avg_grade_df = results_df.groupby('grade').apply(
    lambda x: pd.Series({
        'fundamental_skills': x[skill_categories['fundamental_skills']].mean().mean(),
        'analytical_skills': x[skill_categories['analytical_skills']].mean().mean(),
        'social_skills': x[skill_categories['social_skills']].mean().mean(),
        'managerial_skills': x[skill_categories['managerial_skills']].mean().mean(),
        'mechanical_skills': x[skill_categories['mechanical_skills']].mean().mean()
    })
).reset_index()

avg_grade_df['grade_scale'] = avg_grade_df['grade'].map({'EO':1, 'HEO': 2, 'SEO': 3, 'G7': 4, 'G6': 5, 'SCS1': 6})

avg_grade_df = avg_grade_df.sort_values('grade_scale')

In [None]:
# Plotting the data
plt.figure(figsize=(12, 8))
line1, = plt.plot(avg_grade_df['grade'], avg_grade_df['fundamental_skills'], marker='o', linestyle='-', label='Fundamental Skills Average', color='#073763ff')
line2, = plt.plot(avg_grade_df['grade'], avg_grade_df['analytical_skills'], marker='o', linestyle='--', label='Analytical Skills Average', color='#073763ff')
line3, = plt.plot(avg_grade_df['grade'], avg_grade_df['social_skills'], marker='o', linestyle=':', label='Social Skills Average', color='#073763ff')
line4, = plt.plot(avg_grade_df['grade'], avg_grade_df['managerial_skills'], marker='o', linestyle='-.', label='Managerial Skills Average', color='#073763ff')
line5, = plt.plot(avg_grade_df['grade'], avg_grade_df['mechanical_skills'], marker='o', linestyle=(0, (3, 5, 1, 5)), label='Mechanical Skills Average', color='#073763ff')

# Customizing the plot
plt.xlabel('Grade', fontsize=16)
plt.ylabel('Average Importance Score', fontsize=16)
plt.xticks(rotation=0, fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0, 1)  # Set y-axis limits from 0 to 1
plt.grid(False)  # Disable grid

# Create custom legend handles without markers
custom_lines = [Line2D([0], [0], color='#073763ff', linestyle='-'),
                Line2D([0], [0], color='#073763ff', linestyle='--'),
                Line2D([0], [0], color='#073763ff', linestyle=':'),
                Line2D([0], [0], color='#073763ff', linestyle='-.'),
                Line2D([0], [0], color='#073763ff', linestyle=(0, (3, 5, 1, 5)))]

plt.legend(custom_lines, ['Fundamental Skills Average', 'Analytical Skills Average', 'Social Skills Averagae', 'Managerial Skills Average', 'Mechanical Skills Average'], title='Skills')

plt.tight_layout()
plt.show()