In [53]:
import sys
import os
from pprint import pprint
import pandas as pd
import json
from tqdm import tqdm
import hashlib
import yaml
sys.path.append(os.path.abspath('../'))
from RAG.rag_utils import load_config, load_documents, split_documents
from RAG.rag_utils import get_embedding_model, make_custom_prompt, get_llm
from RAG.rag_utils import get_memory, get_vectorstore, get_multi_vectorstore
from RAG.rag_utils import get_multi_query_retriever, get_compression_retriever
from RAG.rag_utils import get_BM25_retriever, get_ensemble_retriever, get_reorder_retriever 
from RAG.rag_utils import get_conv_chain

In [54]:
def load_exam():
    """
    Reads topics from txt files and adds to a dataframe. Each topic can be in a separate file
    """
    exam = []
    #path where we store the exam topic txt files
    exam_topics_path = os.getenv("EXAM_TOPICS")

    #read in the exam topic files and return as a pandas DataFrame
    for file in list(filter(lambda f: f.endswith(".json"),os.listdir(exam_topics_path))):
        with open(os.path.join(exam_topics_path,file),'r') as file:
            exam.append(json.loads(file.read()))
    exam = pd.DataFrame(exam)
    return exam
    

In [55]:
def take_exam(exam, qa_chain):
    
    student_answers = []
    for topic_idx, topic in tqdm(exam.iterrows()):
        q1 = topic["Question 1"]
        a1 = topic["Teacher Answer 1"]
        answer1 = qa_chain({"domain":"HR and workforce transformation","question":q1,"chat_history":""})["answer"]
    
        q2 = topic["Question 2"]
        a2 = topic["Teacher Answer 2"]
        answer2 = qa_chain({"domain":"HR and workforce transformation","question":q2,"chat_history":""})["answer"]
    
        q3 = topic["Question 3"]
        a3 = topic["Teacher Answer 3"]
        answer3 = qa_chain({"domain":"HR and workforce transformation","question":q3,"chat_history":""})["answer"]
    
        q4 = topic["Question 4"]
        a4 = topic["Teacher Answer 4"]
        answer4 = qa_chain({"domain":"HR and workforce transformation","question":q4,"chat_history":""})["answer"]
        
        student_answers.append({"Question 1":q1,"Teacher Answer 1":a1,"Student Answer 1":answer1,
                                "Question 2":q2,"Teacher Answer 2":a2,"Student Answer 2":answer2,
                                "Question 3":q3,"Teacher Answer 3":a3,"Student Answer 3":answer3,
                                "Question 4":q4,"Teacher Answer 4":a4,"Student Answer 4":answer4,
                                "topic_hash":topic['topic_hash'],"topic":topic['topic']
                            })
    return pd.DataFrame(student_answers)

In [56]:
def write_exam_results(student_answers, config):
    student_id = hashlib.md5(yaml.dump(config).encode()).hexdigest()
    path = os.path.join(os.getenv("EXAM_ANSWERS"),student_id)
    student_answers.to_csv(os.path.join(path,student_answers))
    if not os.path.exists(path):
        os.makedirs(path)
    yaml_path = os.path.join(path,"config.yaml")
    with open(yaml_path,"w") as f:
        yaml.dump(config, f)
    

In [57]:
config = load_config()
config

{'random_seed': 1,
 'temperature': 0,
 'device': 'mps',
 'recreate_vector_store': False,
 'gradio_server_port': 7860,
 'chatbot_prompt': 'skills_edge_prompt_2.txt',
 'student_prompt': 'student_answer_exam_prompt.txt',
 'max_tokens': 1024,
 'chunk_size': 500,
 'chunk_overlap': 40,
 'embedding_model_name': 'thenlper/gte-large',
 'top_k': 15,
 'search_type': 'mmr',
 'score_threshold': 0.8,
 'chain_type': 'stuff',
 'multi_query': False,
 'ensemble': False,
 'bm25_retriever_k': 2,
 'compression': False,
 'multi_vector': False,
 'rephrase_question': False,
 'reorder': False}

## Load documents

In [58]:
documents = load_documents()

## Split documents

In [59]:
chunk_size = config['chunk_size']
chunk_overlap = config['chunk_overlap']
chunks = split_documents(documents, chunk_size, chunk_overlap)
print(f"""split documents with chunk size {chunk_size}, chunk_overlap {chunk_overlap}""")

split documents with chunk size 500, chunk_overlap 40


## Load embedding model

In [60]:
embedding_model_name = config["embedding_model_name"]
device = config["device"]

embeddings = get_embedding_model(embedding_model_name, device)
print(f"""Loading model {embedding_model_name}, on device {device}""")

Loading model thenlper/gte-large, on device mps


## Create a custom prompt

In [61]:
prompt_fname = config["student_prompt"]
prompt = make_custom_prompt(prompt_fname, input_variables=["domain","context","question"])

input_variables=['context', 'domain', 'question'] template='You are an expert in the domain of {domain}. To prove your expertise and add value to our project \nI would like you to do your best to answer a question from the domain. \nHere are you instructions\n1. The topic appears enclosed in triple backticks in the section Topic: after these instructions.\n2. The question appears enclosed in triple backticks in the section Teacher Question: after these instructions.\n3. Place your answer in the section Student Answer:\n\nTopic: ```{context}```\nTeacher Question: ```{question}```\nStudent:\n\n\n\n\n'


## Get memory - maybe not needed for exam?

In [62]:
memory = get_memory()

## Get LLM

In [63]:
temperature = config['temperature']
max_tokens = config['max_tokens']
random_seed = config['random_seed']
llm = get_llm(temperature, max_tokens, random_seed)
print(f"""Connect to LLM running on  {os.getenv("LLM_URL")} with temperature {temperature} and max_tokens {max_tokens}""")

Connect to LLM running on  http://localhost:1234/v1 with temperature 0 and max_tokens 1024


# Retrievers

## Base vector store retriever

In [64]:
search_type = config['search_type']
k = config['top_k']
multi_vector = config['multi_vector']

if multi_vector:
    retriever = get_multi_vectorstore(documents,
                                      embeddings,
                                      k,
                                      search_type
                                     )
else:
    retriever = get_vectorstore(chunks,
                                embeddings,
                                k,
                                search_type
                               )
print(f"""Created base retriever with search type {search_type} returning top {k} results""")

Created base retriever with search type mmr returning top 15 results


## Apply multiquery to retriever

In [65]:
multi_query = config['multi_query']
if multi_query:
    retriever = get_multi_query_retriever(llm, retriever)
print(f"""Applied multi_query to retriever: {multi_query}""")

Applied multi_query to retriever: False


## Apply compression to retriever

In [66]:
ensemble = config['ensemble']
if ensemble:
    
    bm25_retriever = get_BM25_retriever(chunks, k=2)
    retrievers=[bm25_retriever, retriever]
    
    retriever = get_ensemble_retriever(retrievers,weights=[0.5,0.5])
print(f"""Applied ensembling to retriever: {ensemble}""")

Applied ensembling to retriever: False


## Apply re-order to retriever

In [67]:
reorder = config['reorder']
if reorder:
    retriever = get_reorder_retriever(retriever)
print(f"""Applied re-order to retriever: {reorder}""")

Applied re-order to retriever: False


## Create QA Chain

In [68]:
chain_type = config["chain_type"]
rephrase_question = config["rephrase_question"]
#chain_type="refine"
memory=None

qa_chain = get_conv_chain(llm, retriever, memory, prompt, chain_type, rephrase_question)
print(f"""Created chain with chain_type: {chain_type}""")

input_variables=['context', 'domain', 'question'] template='You are an expert in the domain of {domain}. To prove your expertise and add value to our project \nI would like you to do your best to answer a question from the domain. \nHere are you instructions\n1. The topic appears enclosed in triple backticks in the section Topic: after these instructions.\n2. The question appears enclosed in triple backticks in the section Teacher Question: after these instructions.\n3. Place your answer in the section Student Answer:\n\nTopic: ```{context}```\nTeacher Question: ```{question}```\nStudent:\n\n\n\n\n'
Created chain with chain_type: stuff


In [69]:
print(f"""Running QA Chain with config \n""")
pprint(config)

Running QA Chain with config 

{'bm25_retriever_k': 2,
 'chain_type': 'stuff',
 'chatbot_prompt': 'skills_edge_prompt_2.txt',
 'chunk_overlap': 40,
 'chunk_size': 500,
 'compression': False,
 'device': 'mps',
 'embedding_model_name': 'thenlper/gte-large',
 'ensemble': False,
 'gradio_server_port': 7860,
 'max_tokens': 1024,
 'multi_query': False,
 'multi_vector': False,
 'random_seed': 1,
 'recreate_vector_store': False,
 'reorder': False,
 'rephrase_question': False,
 'score_threshold': 0.8,
 'search_type': 'mmr',
 'student_prompt': 'student_answer_exam_prompt.txt',
 'temperature': 0,
 'top_k': 15}


# Take the exam

## Load the exam questions

In [70]:
exam = load_exam()

In [72]:
student_answers = take_exam(exam,qa_chain)

  warn_deprecated(
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


A well-rounded Skills Library is essential for defining roles effectively and ensuring that organizations have the necessary workforce capabilities to meet their business needs. Including a variety of traditional categories such as competencies, qualifications, certifications, technical and behavioral skills, and region-specific expectations in a Skills Library offers several benefits:

1. Comprehensive understanding of roles: By incorporating various categories, organizations can gain a more complete understanding of the skills required for each role. This holistic approach allows them to account for both hard (technical) and soft (behavioral) skills, as well as any region-specific knowledge or qualifications that may be necessary.
2. Flexibility in workforce planning: A Skills Library with a diverse range of categories enables organizations to adapt to changing business needs and market conditions. For instance, they can identify the skills needed for new roles or projects and ensure

1it [01:21, 81.17s/it]

The 'Skills over Degrees' initiative refers to a talent strategy where organizations prioritize hiring individuals based on their skills rather than their educational background. IBM is one of the major companies that has been well-documented for implementing this approach. This shift towards skills-based hiring is driven by the increasing demand for in-demand skills and the need for companies to stay competitive in the market. Additionally, technology can help light the way for employees to assess their skills portfolio against market needs and choose the right upskilling opportunities. However, many companies face challenges in tracking the availability of internal and external skills, making it essential to invest in tools and technologies that enable easy updates and collaboration on skills taxonomies.Based on the information provided in the text, 45% of companies report that their need for in-demand skills is higher than for other skills. This makes talent search and retention eve

2it [02:49, 85.44s/it]

In a talent marketplace, skills serve as the primary currency for matching demand (for work) and supply (of talent). This approach is based on the understanding that jobs and their pre-defined career paths are no longer sufficient to meet the evolving needs of businesses and employees. Instead, organizations focus on the skills required to perform specific tasks or projects.

The use of skills as a currency in talent marketplaces disrupts traditional talent management practices by enabling greater agility, flexibility, and resilience. Here's how it works:

1. Strategic workforce planning: Organizations can apply internal and external supply and demand skills research data for enhanced insights and redefined talent/skills pools. This allows them to respond quickly to evolving business needs and make informed decisions about the skills they need to acquire or develop.
2. Reskilling and upskilling: Talent marketplaces enable organizations to conduct rapid reskilling without laborious curr

3it [04:08, 82.26s/it]

The shift from a job-based to a skills-based architecture in the workforce is driven by several strategic necessities and trends that require organizations to enlarge their technical skill sets, tap into universal skills taxonomies, and adapt to new markets or services. This change is not only about removing obsolete skills but also about encouraging employees to develop new skills, earn credentials, and focus on career advancement paths.

In the past, job titles and structures primarily determined how work was defined, organized, and performed within an organization. However, as businesses evolve and become more agile, there is a growing need for flexibility in talent management. This shift to skills-based talent practices means that employees are encouraged to develop new skills, earn credentials, and focus on their career paths.

The importance of skills in the workforce can be seen in the following statistics:
1. Strategic Necessities: Companies are leading the transformation to a 

4it [05:30, 82.34s/it]

The first step in adopting an agile approach to developing skills-based talent practices, as suggested by the topic, is to identify a specific unmet skills-related need or a current success that could be built upon or optimized. This starting point will help organizations focus their efforts and resources effectively while also demonstrating near-term return on investment (ROI), which is essential for gaining support from stakeholders.Developing use cases around successes or needs is a crucial step in adopting an agile approach to talent practices. This methodology allows organizations to identify specific areas where skills-based practices can be implemented, making it easier to gain business sponsorship and adoption. Use cases also provide a clear focus for the initial deployment of talent marketplaces, which can help minimize disruption and increase the chances of success.

By starting with a use case, organizations can address a specific business problem or unmet skills-related nee

5it [06:32, 75.10s/it]

A large global multinational bank started using talent marketplaces with an initial focus on providing visibility of opportunities to its colleagues and enabling agile ways of working. This approach allowed the bank to evolve its talent marketplace into an opportunity hub, including full-time jobs across its footprint. The bank's talent marketplace has been instrumental in driving greater agility, flexibility, and resilience in its business model by seamlessly deploying talent and matching skills to work in response to shifting business demand.

The bank began this transformation by generating awareness of the talent marketplace and focusing on user experience and advocacy. As employee adoption and usage increased, organizations and their people started looking to change the ways and rules of work. The bank's future plans for its talent marketplace include building manager capability to enable the deconstruction of work into tasks and turning them into attractive projects that motivate

6it [07:49, 75.73s/it]

Based on the data provided in the text, approximately 48% of companies view skills-based pay as important or very important for optimal compensation plans.The primary objective for the majority of companies (80%) when linking rewards to skills is attraction and retention. This means that these companies are using skills-based pay systems as a tool to attract and retain top talent, particularly in specialized and in-demand roles. By offering higher compensation for employees who possess valuable skills, organizations can differentiate their pay packages and make themselves more competitive in the job market. Additionally, by incentivizing skill development and career progression, companies can foster a culture of continuous learning and growth among their workforce.Based on the data provided in the text, companies differentiate rewards for specific skills in several ways. The most common way is by differentiating rewards for hard skills, which are the most frequently identified skills u

7it [08:54, 72.12s/it]

The current economic crisis underscores the significance of recognizing and focusing on an organization's critical work for future success. In a rapidly evolving business landscape, staying competitive necessitates rethinking and redesigning tasks that align with long-term strategic objectives.

Identifying 'must-have' work involves assessing market conditions and demand for specific skills, as well as evaluating the scarcity of those abilities within the organization. This process can be challenging due to various factors, such as the dynamic nature of markets and the need for a future orientation.

By investing in the development or acquisition of essential skills, organizations can better adapt to changing market conditions and remain competitive. Moreover, focusing on critical work can lead to increased innovation and diversity of thinking within the organization.

In summary, today's economic crisis emphasizes the importance of identifying an organization's 'must-have' work for fu

8it [10:18, 76.00s/it]

In the context of skills-based pay, a proficiency scale plays a crucial role in ensuring that the assessment and compensation of employees' skills are objective and fair. A proficiency scale defines the expected level of mastery for each skill, enabling organizations to differentiate between various levels of competency or expertise. This is essential as it allows for the creation of a transparent and data-driven approach to pay, where employees can see what skills are valued and how their development contributes to their compensation.

Moreover, proficiency scales help in aligning pay with market insights and industry standards, ensuring that organizations remain competitive in attracting and retaining talent. They also facilitate the comparison of skills across departments and employees, making it easier for organizations to identify skill gaps and opportunities for growth.

However, developing a proficiency scale can be complex as each organization's framework is unique, and there i

9it [11:14, 69.67s/it]

Based on Mercer's Future Skills Survey report, the three skills that are expected to be most important to businesses over the next three years are:

1. Growth mindset and adaptability
2. Skills related to people development
3. Resilience, stress tolerance and flexibility

These findings are consistent with the trend towards upskilling and reskilling as critical strategies for businesses going forward. The report also highlights that skills related to technological fluency and innovation, as well as desirable personality traits, will be important areas of focus for businesses. Additionally, the report indicates that skills related to people development are set to be a particular focus, despite being difficult to recruit for.

It's worth noting that Mercer's survey results may not directly apply to every organization, and individual business needs and priorities may vary. However, the findings do provide valuable insights into trends and priorities in the world of work, and can help info

10it [11:57, 61.40s/it]

The alignment of business, manager, and employee needs is crucial for the successful adoption of a talent marketplace. When these needs overlap, it leads to early adopters who are motivated to release trapped capacity, become more future-fit through upskilling, and provide transparent career opportunities for employee retention (Figure 4: Use cases).

Business needs may include unlocking trapped productivity or reskilling the workforce. Managers need to change their mindset and behaviors to enable new ways of working and create better opportunities for learning and development based on employees' skills and aspirations. Employees, in turn, seek career growth and development opportunities.

When these needs are aligned, it leads to a thriving talent marketplace where internal and external talent can be matched efficiently, creating better opportunities for career growth, learning, and development for the workforce. This alignment also helps organizations realize business benefits more e

11it [13:18, 67.29s/it]

Progressive firms are taking various steps to encourage their employees to stay relevant in the current job market. One of the primary ways is by helping them assess their skills portfolio against the market's needs and providing opportunities for upskilling, reskilling, or redeployment. This can be facilitated through career experience software programs that show employees the skills they need and the courses they can take to acquire them.

Another way firms are supporting their employees is by offering financial wellbeing advice and outplacement services. These initiatives can positively impact departing employees' lives, making it a responsible approach for employers.

Moreover, progressive organizations are viewing their workforce through a skills lens, recognizing their people as valuable assets with transferable skills that can be leveraged throughout the organization. They also see the potential within each employee to learn new skills and adapt to remain relevant in the rapidly

12it [14:35, 70.27s/it]

The decrease in interest in tracking skills within Production, Supply Chain, and Creative & Design job families can be attributed to the increasing automation and robotization of roles in these areas. As more tasks become automated, the need for human skills in traditional manufacturing and supply chain roles may become less priority. However, it's important to note that this trend does not necessarily mean that skills are no longer valuable in these job families. Instead, the types of skills required may shift towards areas such as problem-solving, adaptability, and technical expertise related to automation and robotics. Additionally, creative roles may require a greater focus on soft skills like creativity, communication, and collaboration as technology continues to advance. Ultimately, organizations need to keep up with the changing demands of these job families and invest in reskilling and upskilling their workforce to remain competitive.The rise of automation and robotics is leadi

13it [15:37, 67.96s/it]

In today's business environment, organizations are striving to future-proof themselves by preparing their workforces for the upheavals that may come. This involves aligning skills with future strategy and identifying critical work that will contribute to the organization's long-term success. Future-proofing an organization can be achieved through various use cases such as reskilling/upskilling the workforce, unlocking trapped capacity, matching work demand with availability, responding to employee needs, and empowering employees to choose opportunities in line with their aspirations.

One of the key aspects of future-proofing is predicting the skills of the future and investing in them before they become essential. This requires organizations to have a clear understanding of how work will change and the impact on skills demand. By doing so, they can reskill or upskill their people to be ready for the future and locate the right external or internal talent to fit future needs.

However,

14it [16:49, 72.14s/it]


student_answers = take_exam(exam, qa_chain)

In [73]:
write_exam_results(student_answers, config)

In [74]:
type(student_answers)

pandas.core.frame.DataFrame