## Set up
### Import Packages and set up environment with API keys

In [1]:
!pip install transformers datasets torch torchvision torchaudio langchain-community faiss-cpu sentence-transformers langchain gradio evaluate langchain_experimental


Collecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-community
  Downloading langchain_community-0.0.29-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-cpu
  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m57.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence-transformers
  Downloading sentence_transformers-2.6.1-py3-none-any.whl (163 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.3/163.3 kB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.1.13-py3-none-any.whl (810 kB)
[2K     [90m━━━━━━━━━━

In [2]:
# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token
from getpass import getpass
import os

HUGGINGFACEHUB_API_TOKEN = getpass()

os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

··········


### Load Model

In [3]:
from langchain_community.llms import HuggingFaceEndpoint
model_name = "tiiuae/falcon-7b-instruct"
llm = HuggingFaceEndpoint(
    repo_id=model_name,
    model=model_name,
    task="text-generation",
    temperature=0.5,
    # max_length:1024,
    max_new_tokens=200
)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## RAG from synthetic data set (job analysis)

### Set up prompts, embeddings and retriever

In [4]:
from requests import get
import csv
import re
import pandas as pd
from langchain_community.llms import HuggingFaceEndpoint
from langchain.chains import LLMChain, RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

In [5]:
#PROMPT1: Prompt with context
template = """Use the following context to answer the question at the end.
If you don't know the answer, please think rationally and answer from your own knowledge base.
Context: {context}

Question: {question}
Answer:
"""
QA_CHAIN_PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])

#PROMPT2: Normal prompting
template= """
        Please answer the question.
        Answer professionally, and where appropriate, in a Computer Science educational context.
        Question: {question}
        Response:
        """
prompt = PromptTemplate(template=template, input_variables=["question"])

In [6]:
modelPath = "sentence-transformers/gtr-t5-base" # Using t5 sentence transformer model to generate embeddings
model_kwargs = {'device':'cpu'}
encode_kwargs = {'normalize_embeddings': True} # Normalizing embeddings may help improve similarity metrics by ensuring that embeddings magnitude does not affect the similarity scores

# Initialise an instance of HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/1.86k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/219M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

2_Dense/config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

In [7]:
# Initialize text splitter
text_split = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

### Functions to connect to the API

In [8]:
def clean_html(raw_html):
    """Helper function to clean HTML tags from text."""
    CLEANR = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});')
    cleantext = re.sub(CLEANR, '', raw_html)
    return cleantext

In [9]:
reed_key = getpass("Enter the Reed API token: ")

Enter the Reed API token: ··········


In [10]:
def query_job_listings(job_name, location, reed_key):
    """Function to query job listings from the API."""
    BASE_URL = 'https://www.reed.co.uk/api/1.0/search'
    # Construct the request URL
    search_url = f'{BASE_URL}?keywords={job_name}&locationName={location}'
    search_response = get(search_url, auth=(reed_key, ''))  # authentication header as the username, with the password left empty

    # Check if the request was successful
    if search_response.status_code == 200:
        job_listings = search_response.json()["results"]
        return job_listings
    else:
        print(f'Error: {search_response.status_code}')
        return []

In [11]:
def create_jobs_csv(job_listings, reed_key):
    """Function to create a CSV file with details of job listings."""
    with open('job_listings.csv', 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Job Title', 'Job Description', 'Location', 'Part-time', 'Full-time']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        # Iterate through job listings to collect required details of each job into csv file
        for job in job_listings:
            job_id = job["jobId"]
            details_url = f'https://www.reed.co.uk/api/1.0/jobs/{job_id}'
            detail_response = get(details_url, auth=(reed_key, ''))
            detail = detail_response.json()
            job_title = detail.get("jobTitle", "")
            job_description = clean_html(detail.get("jobDescription", ""))
            location = detail.get("locationName", "")
            keywords = detail.get("keywords", "")
            part_time = detail.get("partTime", "")
            full_time = detail.get("fullTime", "")
            # Write job details to CSV
            writer.writerow({'Job Title': job_title, 'Job Description': job_description, 'Location': location, "Part-time": part_time, "Full-time": full_time})


In [12]:
def get_job(query):
    """Helper function that returns the Computer Science subject of the sentence to feed into job search."""
    helper_template = """
    [INST]Output only the Computer Science job title of the sentence, give one or two words.
    For example, the output of "What programming skills would IT managers require to possess?" is "IT manager".
    The output of "What are some software tools that an data scientist need to know?" is "data scientist". [\INST]
    Sentence: {query}
    The output is:
    """
    prompt = PromptTemplate(template=helper_template, input_variables=["query"])
    model_name = "mistralai/Mistral-7B-Instruct-v0.1"
    llm = HuggingFaceEndpoint(
        repo_id=model_name,
        model=model_name,
        task="text-generation",
        temperature=0.5,
        max_new_tokens=200
    )
    helper_llm = LLMChain(llm=llm, prompt=prompt)
    response = helper_llm.invoke(input=query)
    text = response["text"]
    print(text)
    return text

In [13]:
# Same implementation as final pipeline, but on top of returning output response, also return retrieved contexts for evaluation purposes
def pipeline(query):
        subject = get_job(query) # Find keywords to search jobs in API
        location = ""
        job_listings = query_job_listings(clean_html(subject), location, reed_key)
        create_jobs_csv(job_listings, reed_key)

        with open('job_listings.csv', 'r', encoding='utf-8') as csvfile:
            reader = csv.reader(csvfile)
            next(reader)  # Skip header
            first_row = next(reader, None)
            if first_row:
                loader = CSVLoader(file_path="job_listings.csv")
                documents = loader.load() # Load data for retrieval

                d = text_split.split_documents(documents)
                db = FAISS.from_documents(d, embeddings)

                chain_type_kwargs = {"prompt": QA_CHAIN_PROMPT}
                qa = RetrievalQA.from_chain_type(llm=llm,
                    retriever=db.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": .5, "k": 3}),
                    return_source_documents=True,
                    chain_type_kwargs=chain_type_kwargs, verbose=True)

                input_dict = {'query': query}
                result = qa.invoke(input_dict)
                documents = result.get("source_documents", [])
                for i in documents:
                    print (i)
                text = result['result']
                return documents, text
            else: # If no jobs are found, normal prompting and response is done
                llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=True)
                input_dict = {'question': query}
                response_dict = llm_chain.invoke(input_dict)
                response = response_dict['text']
                return [], response

# Evaluation

In [None]:
# RUN GOOGLE COLLAB WITH GPU!
!pip install torch tiktoken textstat

Collecting tiktoken
  Downloading tiktoken-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting textstat
  Downloading textstat-0.7.3-py3-none-any.whl (105 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.1/105.1 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
Collecting pyphen (from textstat)
  Downloading pyphen-0.14.0-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m37.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyphen, tiktoken, textstat, bert_score
Successfully installed bert_score-0.3.13 pyphen-0.14.0 textstat-0.7.3 tiktoken-0.6.0

Prepare evaluation dataset

In [14]:
data_cs_industry = pd.DataFrame({
    "question": [
        "What are the latest trends in artificial intelligence?",
        "What are some emerging programming languages that are gaining popularity in the Computer Science industry?",
        "I am a beginner that wants to get into Data Science, where should I start?",
        "I am a final-year Computer Science student wanting to find a graduate role in Cybersecurity. What are the practical skills required for a career in Cybersecurity that are currently in-demand?",
        "What are the essential skills required for a career in cybersecurity?",
        "What are some in-demand technical skills for aspiring data analysts?",
        "What are the career prospects for individuals with expertise in cybersecurity risk management?",
        "What are new roles created in the Artificial Intelligence industry in recent years?",
        "What are the main responsibilities of a web developer in computer science careers?",
        "What is the job of a software quality assurance (QA) engineer",
    ],
    "ground_truth": [
        "In the realm of artificial intelligence (AI), several notable trends have emerged recently. Firstly, there's a growing focus on explainable AI (XAI), which aims to make AI models more transparent and understandable to humans, crucial for applications in fields like healthcare and finance where interpretability is paramount. Secondly, federated learning has gained traction, enabling training of ML models across decentralized devices while preserving data privacy, pivotal for IoT and edge computing scenarios. Additionally, reinforcement learning (RL) advancements, particularly in deep RL, have seen remarkable progress, empowering AI systems to make sequential decisions in dynamic environments, with applications spanning robotics, autonomous vehicles, and gaming. Lastly, the integration of AI with other technologies like blockchain for enhanced security and trustworthiness and with quantum computing for tackling complex optimization problems signifies promising directions for future research and innovation in the AI landscape.",
        "Several emerging programming languages are gaining traction in the industry due to their unique features and capabilities. One such language is Rust, known for its emphasis on safety, concurrency, and performance, making it suitable for systems programming where reliability and efficiency are critical. Another language on the rise is Julia, which specializes in numerical and scientific computing, offering high performance comparable to traditional languages like C and Fortran while maintaining a user-friendly syntax and extensive library support. Additionally, Kotlin, a statically typed language interoperable with Java, has become increasingly popular for Android app development, offering modern features and improved developer productivity. Lastly, Swift, developed by Apple, has gained momentum for iOS and macOS development, providing a concise and expressive syntax along with powerful features like optionals and automatic memory management. These emerging languages cater to specific niches and address evolving industry needs, showcasing their growing relevance and adoption in the programming landscape.",
        "Here is a few things to learn about Data Science to get you started: \nLearn Python or R: Choose one as your primary programming language. \nBasic Statistics: Understand mean, median, mode, standard deviation, and probability.\nData Manipulation: Learn Pandas (Python) or dplyr (R) for data cleaning and manipulation.\nData Visualization: Use Matplotlib, Seaborn (Python), or ggplot2 (R) for visualization.\nMachine Learning Basics: Start with linear regression, logistic regression, decision trees, and evaluation metrics.\nPractice: Work on projects using real-world datasets from sources like Kaggle.\nStay Updated: Follow online resources and communities for the latest trends and techniques.",
        "As a final-year Computer Science student aiming for a graduate role in cybersecurity, it's essential to focus on developing practical skills that are currently in high demand in the industry.\nSome of these key skills include:\n\n1. Knowledge of Networking: Understanding networking fundamentals, protocols (such as TCP/IP), and network architecture is crucial for identifying and mitigating security threats. Familiarize yourself with concepts like firewalls, routers, VPNs, and intrusion detection systems (IDS).\n\n2. Proficiency in Operating Systems: Gain proficiency in operating systems such as Linux and Windows, including command-line operations, system administration tasks, and security configurations. Being able to secure and harden operating systems is essential for protecting against common cybersecurity threats.\n\n3. Understanding of Cryptography: Cryptography is at the heart of cybersecurity, so having a solid understanding of encryption algorithms, cryptographic protocols, and cryptographic techniques is vital. Learn about symmetric and asymmetric encryption, digital signatures, hashing algorithms, and their applications in securing data and communications.\n\n4. Penetration Testing and Ethical Hacking: Develop skills in penetration testing and ethical hacking to identify vulnerabilities and assess the security posture of systems and networks. Familiarize yourself with tools and techniques used by ethical hackers, such as Kali Linux, Metasploit, Nmap, and Wireshark.\n\n5. Security Assessment and Risk Management: Learn how to conduct security assessments, risk assessments, and threat modeling to identify, prioritize, and mitigate security risks effectively. Understand risk management frameworks like NIST, ISO 27001, and COBIT, and how to apply them in real-world scenarios.\n\n6. Incident Response and Forensics: Acquire knowledge of incident response procedures, including detection, analysis, containment, eradication, and recovery from security incidents. Understand digital forensics principles and techniques for investigating and analyzing security breaches and cybercrimes.\n\n7. Security Awareness and Communication: Develop strong communication skills to effectively convey cybersecurity concepts, risks, and recommendations to technical and non-technical stakeholders. Being able to raise awareness about cybersecurity best practices and policies is essential for promoting a security-conscious culture within organizations.\n\n8. Continuous Learning and Adaptability: Cybersecurity is a rapidly evolving field, so it's essential to cultivate a mindset of continuous learning and adaptability. Stay updated with the latest threats, trends, technologies, and best practices through professional development, certifications, and participation in cybersecurity communities and events.\n\nBy focusing on developing these practical skills and staying abreast of industry trends and advancements, you'll be well-prepared to pursue a successful career in cybersecurity upon graduation. Additionally, consider obtaining relevant certifications such as CompTIA Security+, CEH (Certified Ethical Hacker), CISSP (Certified Information Systems Security Professional), or others to further enhance your credentials and marketability in the field.",
        "A career in cybersecurity require a broad spectrum of practical skills, including proficiency in network security protocols and tools like firewalls and intrusion detection/prevention systems (IDS/IPS) for safeguarding network infrastructure. Secure coding practices and knowledge of common vulnerabilities are essential for developing secure software applications, with expertise in frameworks like OWASP Top 10 aiding in vulnerability mitigation. Encryption techniques and cryptographic protocols are vital for securing sensitive data, while incident response and digital forensics skills, alongside tools like SIEM systems, enable effective threat detection and response. Proficiency in penetration testing frameworks like Metasploit and security assessment tools is crucial for identifying and remediating security weaknesses, while knowledge of compliance frameworks such as GDPR ensures organizational adherence to cybersecurity regulations. Effective communication and collaboration skills are imperative for conveying cybersecurity risks and recommendations to stakeholders and collaborating with cross-functional teams to implement security measures. Continued learning and staying updated with the latest cybersecurity trends and technologies are key for navigating this ever-evolving field successfully.",
        "In-demand technical skills for data analysts include proficiency in programming languages like Python, R, or SQL for data manipulation, analysis, and visualization. Familiarity with statistical analysis techniques, such as regression analysis, hypothesis testing, and predictive modeling, is essential for deriving insights from data. Knowledge of data querying and database management systems like MySQL, PostgreSQL, or MongoDB is valuable for accessing and organizing large datasets. Expertise in data wrangling techniques, using tools like pandas, dplyr, or data.table, enables cleaning and transforming raw data into actionable insights. Proficiency in data visualization libraries like Matplotlib, ggplot2, or seaborn is crucial for creating informative and visually appealing charts, graphs, and dashboards to communicate findings effectively. Additionally, experience with machine learning frameworks like scikit-learn or TensorFlow, along with knowledge of data mining techniques, enhances the ability to build predictive models and extract patterns from data.",
        "Career opportunities for individuals in cybersecurity risk management include roles such as cybersecurity risk analysts, security consultants, risk managers, compliance officers, and cybersecurity architects. These professionals play a critical role in identifying, evaluating, and prioritizing cybersecurity risks, developing risk mitigation strategies, and ensuring compliance with regulatory requirements and industry standards. With the ever-evolving threat landscape and the increasing complexity of cybersecurity challenges, individuals with expertise in cybersecurity risk management can expect to have a wide range of career opportunities and advancement prospects in both the public and private sectors, including government agencies, financial institutions, healthcare organizations, and consulting firms. Additionally, obtaining relevant certifications such as Certified Information Systems Security Professional (CISSP), Certified Information Security Manager (CISM), or Certified Risk and Information Systems Control (CRISC) can further enhance career prospects and credibility in the field.",
        "In recent years, the rapid advancement of artificial intelligence (AI) technologies has led to the emergence of several new roles within the AI industry. One such role is that of the AI Ethics Officer, responsible for ensuring that AI systems are developed and deployed in an ethical and responsible manner, addressing concerns related to bias, fairness, transparency, and accountability. Additionally, AI Product Managers play a crucial role in overseeing the development and implementation of AI-driven products and services, from ideation to launch, ensuring alignment with business objectives and user needs. Another emerging role is that of the AI Solutions Architect, tasked with designing and implementing scalable and efficient AI solutions tailored to specific business challenges and requirements. Furthermore, AI Security Specialists focus on safeguarding AI systems and data from cyber threats and vulnerabilities, addressing the unique security challenges posed by AI technologies. These new roles reflect the growing importance of ethics, governance, and interdisciplinary collaboration in the AI industry, as organizations seek to harness the transformative potential of AI while mitigating risks and ensuring responsible AI deployment.",
        "Web developers design, build, and maintain websites and web applications, including front-end and back-end development, user interface design, and database integration, to deliver a seamless user experience.",
        "Software QA engineers develop and execute test plans, scripts, and procedures to ensure the quality, reliability, and performance of software applications and systems before release to production. They identify and report defects and work closely with development teams to resolve issues and improve software quality."
    ]
})

pipeline_eval = pd.DataFrame({
    "question": [
        "Can you give me a typical job description of graduate role for Software Engineering?",
        "Can you give an example job description for a Software Engineer intern?",
        "What particular skills do recruiters look for in a Web developer?",
        "What are cybersecurity analyst qualifications that recruiters look for?",
        "What are some software tools that an IT consultant need to know?",
        "What are skill descriptions most recruiters look for in a software engineer?",
        "What are some programming languages demanded in web developer jobs?",
        "Can you give common job requirements in Computer Vision?",
        "Please explain the responsibilities of a software architect.",
        "What qualifications does a full-stack engineer need?",
        "What responsibilities would I have as an AI solutions architect?",
        "What are recent topics a Cybersecurity consultant should learn about?",
        "Give me some fairly recent topics in the realm of data science.",
        "Can you explain the skills or experiences that recruiters look for in a Software Architect?",
        "What programming skills would IT managers require to possess?",
    ],

    "ground_truth": [
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",

    ]
})

pipeline_eval2 = pd.DataFrame({
    "question": [
        "What programming languages should a software engineer know these days?",
        "Please explain to me skills I need to learn to become a cloud engineer.",
        "What are the responsibilities for a typical IT manager?",
        "What are examples of frameworks I would be working on as a Database Administrator?",
        "Can you explain some skills or qualifications to become a game developer?",
        "What are the special skills a professional working in cybersecurity should have?",
        "What does a typical UX designer do?",
        "What do recruiters look for in an AI engineer?",
        "If I want to get a graduate role in game developing, what skills should I expand on?",
        "Based on job descriptions, what does a software architect do?",
        "If I want to become a software anlayst, please advise me where to start.",
        "Explain several frameworks an AI engineer need to know how to use.",
        "Please describe the responsibilities of a UX designer",
        "Advise on what to learn to become a proficient cloud engineer.",
        "What are characteristics that recruiters typically look for in a database administrator"
    ],

    "ground_truth": [
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
        "",
    ]
})

In [16]:
for question in pipeline_eval['question']:
  get_job(question)

for question in pipeline_eval2['question']:
  get_job(question)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful

    Software Engineer
Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful

    Software Engineer intern
Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful

    Web developer
Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved

Run the model over the evaluation data set once and save results to their respective csv file (for static evaluation)

In [17]:
# Helper function: To prevent having to run predictions again, save output to the dataset as a new column 'predictions'
import datasets

def chain_predictions_from_data(eval_data):
  data = eval_data.copy()
  predictions = []  # List to store predictions for all questions
  source_documents = []
  for question in data['question']:
    # Invoke the llm_chain model with the current question
    documents, response = pipeline(question)
    predictions.append(response)  # Append the generated text to the predictions list
    print(question)
    print(documents)
    print(response)
    concatenated_page_content = ""
    for document in documents:
    # Append the page_content of each Document to the page_contents array
      concatenated_page_content += document.page_content + "\n"
    source_documents.append(concatenated_page_content)
  data['source_documents'] = source_documents
  data['predictions'] = predictions  # Assign the predictions list to a new column
  return data

In [None]:
rag_data_cs_industry = chain_predictions_from_data(data_cs_industry)
os.makedirs("industry", exist_ok=True)
rag_data_cs_industry.to_csv("industry/rag.csv", index=False)

In [None]:
rag_pipeline_eval = chain_predictions_from_data(pipeline_eval)
os.makedirs("industry", exist_ok=True)
rag_pipeline_eval.to_csv("industry/pipeline_evaluation.csv", index=False)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful

    Software Engineer


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
page_content='Job Title: Software Engineer' metadata={'source': 'job_listings.csv', 'row': 0}
page_content='Job Title: Software Engineer' metadata={'source': 'job_listings.csv', 'row': 1}
page_content='Job Title: Software Engineer' metadata={'source': 'job_listings.csv', 'row': 2}
Can you give me a typical job description of graduate role for Software Engineering?
[Document(page_content='Job Title: Software Engineer', metadata={'source': 'job_listings.csv', 'row': 0}), Document(page_content='Job Title: Software Engineer', metadata={'source': 'job_listings.csv', 'row': 1}), Document(page_content='Job Title: Software Engineer', metadata={'source': 'job_

In [18]:
rag_pipeline_eval2 = chain_predictions_from_data(pipeline_eval2)
os.makedirs("industry", exist_ok=True)
rag_pipeline_eval2.to_csv("industry/pipeline_evaluation2.csv", index=False)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful

    software engineer


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
page_content="Job Description: Software Engineer / Developer (Graduate Python JavaScript) WFH / Cambridge to 60k Are you a bright, ambitious Software Engineer with a strong record of academic achievement looking for an opportunity to progress your career? You could be joining a tech start-up that are producing a cutting edge Digital Twins and Information Management platform for the construction industry and highways agencies. As a Software Engineer / Developer you'll join a small team, collaborating with the CEO, CTO, Chief Scientist and other world leading researchers to design and develop new features and enhancements on the core platform. You'll be

UniEval: multi-dimensional metric and factual consistency

In [19]:
!git clone https://github.com/maszhongming/UniEval.git
%cd UniEval
!pip install -r requirements.txt

Cloning into 'UniEval'...
remote: Enumerating objects: 91, done.[K
remote: Counting objects: 100% (26/26), done.[K
remote: Compressing objects: 100% (21/21), done.[K
remote: Total 91 (delta 13), reused 5 (delta 5), pack-reused 65[K
Receiving objects: 100% (91/91), 1.97 MiB | 10.41 MiB/s, done.
Resolving deltas: 100% (22/22), done.
/content/UniEval
Collecting accelerate (from -r requirements.txt (line 2))
  Downloading accelerate-0.28.0-py3-none-any.whl (290 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.1/290.1 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Collecting rouge-score (from -r requirements.txt (line 6))
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting py7zr (from -r requirements.txt (line 8))
  Downloading py7zr-0.21.0-py3-none-any.whl (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.6/67.6 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Collecting texttable (f

In [20]:
import torch
import nltk
from utils import convert_to_json
from metric.evaluator import get_evaluator

nltk.download('punkt')
torch.cuda.is_available()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
# Factual consistency score
task = 'fact'

src_list = rag_pipeline_eval["source_documents"]
output_list = rag_pipeline_eval["predictions"]

# Prepare data for pre-trained evaluators
data = convert_to_json(output_list=output_list, src_list=src_list)
# Initialize evaluator for a specific task
evaluator = get_evaluator(task)
# Get factual consistency scores
eval_scores = evaluator.evaluate(data, print_result=True)

config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.89k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

Evaluating consistency of 15 samples !!!


100%|██████████| 22/22 [00:08<00:00,  2.50it/s]


Evaluation scores are shown below:
+-------------+----------+
|  Dimensions |  Score   |
+-------------+----------+
| consistency | 0.553663 |
+-------------+----------+





In [21]:
# Factual consistency score
task = 'fact'

src_list = rag_pipeline_eval2["source_documents"]
output_list = rag_pipeline_eval2["predictions"]

# Prepare data for pre-trained evaluators
data = convert_to_json(output_list=output_list, src_list=src_list)
# Initialize evaluator for a specific task
evaluator = get_evaluator(task)
# Get factual consistency scores
eval_scores = evaluator.evaluate(data, print_result=True)

config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.89k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

Evaluating consistency of 15 samples !!!


100%|██████████| 22/22 [00:10<00:00,  2.09it/s]


Evaluation scores are shown below:
+-------------+----------+
|  Dimensions |  Score   |
+-------------+----------+
| consistency | 0.667881 |
+-------------+----------+





In [None]:
task = 'dialogue'

# a list of dialogue histories
src_list = rag_pipeline_eval["question"]
# a list of additional context that should be included into the generated response
context_list = rag_pipeline_eval["source_documents"]
# a list of model outputs to be evaluated
output_list = rag_pipeline_eval["predictions"]

data = convert_to_json(output_list=output_list,
                       src_list=src_list, context_list=context_list)
evaluator = get_evaluator(task)
eval_scores = evaluator.evaluate(data, print_result=True)

config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.89k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

Evaluating naturalness of 15 samples !!!


100%|██████████| 2/2 [00:00<00:00,  7.32it/s]


Evaluating coherence of 15 samples !!!


100%|██████████| 2/2 [00:00<00:00,  7.59it/s]


Evaluating engagingness of 15 samples !!!


100%|██████████| 22/22 [00:07<00:00,  2.76it/s]


Evaluating groundedness of 15 samples !!!


100%|██████████| 2/2 [00:01<00:00,  1.49it/s]


Evaluating understandability of 15 samples !!!


100%|██████████| 2/2 [00:00<00:00,  8.45it/s]


Evaluation scores are shown below:
+-------------------+----------+
|     Dimensions    |  Score   |
+-------------------+----------+
|    naturalness    | 0.792962 |
|     coherence     | 0.982974 |
|    engagingness   | 5.593069 |
|    groundedness   | 0.939267 |
| understandability |  0.7979  |
|      overall      | 1.821234 |
+-------------------+----------+





In [22]:
task = 'dialogue'

# a list of dialogue histories
src_list = rag_pipeline_eval2["question"]
# a list of additional context that should be included into the generated response
context_list = rag_pipeline_eval2["source_documents"]
# a list of model outputs to be evaluated
output_list = rag_pipeline_eval2["predictions"]

data = convert_to_json(output_list=output_list,
                       src_list=src_list, context_list=context_list)
evaluator = get_evaluator(task)
eval_scores = evaluator.evaluate(data, print_result=True)

config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.89k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

Evaluating naturalness of 15 samples !!!


100%|██████████| 2/2 [00:00<00:00,  6.71it/s]


Evaluating coherence of 15 samples !!!


100%|██████████| 2/2 [00:00<00:00,  7.17it/s]


Evaluating engagingness of 15 samples !!!


100%|██████████| 22/22 [00:09<00:00,  2.20it/s]


Evaluating groundedness of 15 samples !!!


100%|██████████| 2/2 [00:01<00:00,  1.51it/s]


Evaluating understandability of 15 samples !!!


100%|██████████| 2/2 [00:00<00:00,  7.57it/s]


Evaluation scores are shown below:
+-------------------+----------+
|     Dimensions    |  Score   |
+-------------------+----------+
|    naturalness    | 0.608406 |
|     coherence     | 0.995535 |
|    engagingness   | 6.014042 |
|    groundedness   | 0.961856 |
| understandability | 0.637322 |
|      overall      | 1.843432 |
+-------------------+----------+



