In [1]:
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from llama_index import Document
from llama_index import VectorStoreIndex
from llama_index import LLMPredictor, PromptHelper, ServiceContext

import pandas as pd
import re

In [2]:
llm = LlamaCPP(
    model_path="../../../llama/llama-2-7b-chat/gguf-model-q4_0.gguf",
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

llama.cpp: loading model from ../../../llama/llama-2-7b-chat/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 3900
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 4160.96 MB (+ 1950.00 MB per state)
llama_new_context_with_model: kv self size  = 1950.

In [3]:
def load_documents():
    wiki_scraped=pd.read_csv("../../data/turing_internal/wiki-scraped.csv")
    wiki_scraped.dropna(subset="body", inplace=True)
    wiki_scraped_text=[str(i) for i in wiki_scraped["body"].values]

    handbook_scraped=pd.read_csv("../../data/public/handbook-scraped.csv")
    handbook_scraped.dropna(subset="body", inplace=True)
    handbook_scraped_text=[str(i) for i in handbook_scraped["body"].values]

    turingacuk=pd.read_csv("../../data/public/turingacuk-no-boilerplate.csv")
    turingacuk.dropna(subset="body", inplace=True)
    turingacuk_text=[str(i) for i in turingacuk["body"].values]

    documents = [Document(text=i) for i in wiki_scraped_text]
    documents.extend([Document(text=i) for i in handbook_scraped_text])
    documents.extend([Document(text=i) for i in turingacuk_text])

    return documents

In [4]:
documents = load_documents()

In [5]:
def create_service_context(
        model, 
        max_input_size=2048,
        num_output=256,
        chunk_size_lim=512,
        overlap_ratio=0.1
    ):
    llm_predictor=LLMPredictor(llm=model)
    prompt_helper=PromptHelper(max_input_size,num_output,overlap_ratio,chunk_size_lim)
    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, embed_model="local")
    return service_context

In [6]:
service_context = create_service_context(llm)
index = VectorStoreIndex.from_documents(documents, service_context=service_context)

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
chat_engine_condense = index.as_chat_engine(chat_mode="condense_question")

In [12]:
response = chat_engine_condense.chat("What happens at annual appraisal if you have been performing as expected?")
print(response.response)

Llama.generate: prefix-match hit

llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =    37.42 ms /    53 runs   (    0.71 ms per token,  1416.51 tokens per second)
llama_print_timings: prompt eval time =  3119.54 ms /   132 tokens (   23.63 ms per token,    42.31 tokens per second)
llama_print_timings:        eval time =  2742.90 ms /    52 runs   (   52.75 ms per token,    18.96 tokens per second)
llama_print_timings:       total time =  5969.60 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =    37.94 ms /    54 runs   (    0.70 ms per token,  1423.34 tokens per second)
llama_print_timings: prompt eval time = 16283.93 ms /   662 tokens (   24.60 ms per token,    40.65 tokens per second)
llama_print_timings:        eval time =  3209.56 ms /    53 runs   (   60.56 ms per token,    16.51 tokens per second)
llama_print_timings:       total time = 19602.53 ms
Llama.gene

  Thank you for providing more context! Based on the updated information, it seems that the outcome or consequence of performing well during an annual appraisal in this organization is a "small" progression pay rise, which is consistent with the HR guidance provided. It's important to note that this increase will be based on the individual's performance and development within their experience level, and will not be influenced by any external factors such as promotions or bonuses. Additionally, it seems that there is a standardization process in place to ensure consistency and fairness across the team.
In terms of the specific query you provided, if an employee has been performing as expected during their annual appraisal, they can expect a "small" progression pay rise. However, it's important to note that this increase will be based on the individual's performance and development within their experience level, and will not be influenced by any external factors such as promotions or bon


llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =   154.33 ms /   219 runs   (    0.70 ms per token,  1419.06 tokens per second)
llama_print_timings: prompt eval time = 18300.94 ms /   732 tokens (   25.00 ms per token,    40.00 tokens per second)
llama_print_timings:        eval time = 13977.17 ms /   218 runs   (   64.12 ms per token,    15.60 tokens per second)
llama_print_timings:       total time = 32722.79 ms


In [14]:
response = chat_engine_condense.chat("What if you don't?")
print(response.response)

Llama.generate: prefix-match hit

llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =    20.51 ms /    29 runs   (    0.71 ms per token,  1413.94 tokens per second)
llama_print_timings: prompt eval time =  6033.85 ms /   313 tokens (   19.28 ms per token,    51.87 tokens per second)
llama_print_timings:        eval time =  1537.05 ms /    28 runs   (   54.89 ms per token,    18.22 tokens per second)
llama_print_timings:       total time =  7629.13 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =   175.71 ms /   249 runs   (    0.71 ms per token,  1417.14 tokens per second)
llama_print_timings: prompt eval time = 15847.82 ms /   638 tokens (   24.84 ms per token,    40.26 tokens per second)
llama_print_timings:        eval time = 15543.00 ms /   248 runs   (   62.67 ms per token,    15.96 tokens per second)
llama_print_timings:       total time = 31898.00 ms
Llama.gene

  Thank you for providing additional context! Based on the updated information, if an employee does not receive a "small" progression pay rise during their annual appraisal despite performing as expected, it means that their performance and development have been consistent with their peers at an equivalent level. This is in line with the company's Principle 1, which states that pay rises should be based on individual performance and alignment with peers.
However, it's important to note that this does not necessarily mean that the employee will not receive any form of recognition or reward for their contributions. The company may consider awarding a one-off bonus or recognition award for outstanding work or specific contributions made during the past performance year.
In light of the new context, the original answer can be refined as follows: If an employee does not receive a "small" progression pay rise during their annual appraisal despite performing as expected, it means that their p


llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =   181.80 ms /   256 runs   (    0.71 ms per token,  1408.13 tokens per second)
llama_print_timings: prompt eval time = 16941.67 ms /   682 tokens (   24.84 ms per token,    40.26 tokens per second)
llama_print_timings:        eval time = 16638.35 ms /   255 runs   (   65.25 ms per token,    15.33 tokens per second)
llama_print_timings:       total time = 34103.88 ms


In [7]:
chat_engine_context = index.as_chat_engine(chat_mode="context")

In [9]:
response = chat_engine_context.chat("What happens at annual appraisal if you have been performing as expected?")
print(response.response)

Llama.generate: prefix-match hit


  If an employee has been performing as expected throughout the year, they can expect to receive a "small" progression pay rise at their annual appraisal. This means that their salary will increase by a percentage amount set by the university each year, above the cost of living adjustment. The exact percentage increase will depend on the employee's performance and the overall budget for salary increases.
In addition to the pay rise, employees who have been performing as expected may also be eligible for other forms of recognition or bonuses, such as:
1. Recognition awards: These are one-off awards given to employees who have made a significant contribution to the team or organisation during the year.
2. Bonus: A one-off payment given to employees who have consistently performed at a high level throughout the year.
It's worth noting that employees who are at the top of their pay band are not eligible for a progression pay rise, but they are eligible for a bonus if they consistently perf


llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =   157.45 ms /   223 runs   (    0.71 ms per token,  1416.30 tokens per second)
llama_print_timings: prompt eval time = 65680.02 ms /  1914 tokens (   34.32 ms per token,    29.14 tokens per second)
llama_print_timings:        eval time = 16867.46 ms /   222 runs   (   75.98 ms per token,    13.16 tokens per second)
llama_print_timings:       total time = 83002.03 ms


In [10]:
response = chat_engine_context.chat("What if you don't?")
print(response.response)

Llama.generate: prefix-match hit


  If an employee does not receive a pay rise at their annual appraisal due to underperforming, they may be eligible for additional support and development opportunities to help them improve their performance. This could include:
1. Performance improvement plan: The employee will work with their manager to develop a plan to improve their performance, which may involve specific goals or objectives to be achieved within a set timeframe.
2. Coaching and mentoring: The employee may receive additional coaching and mentoring from their manager or other colleagues to help them improve their skills and knowledge.
3. Training and development: The employee may be eligible for additional training and development opportunities to help them improve their performance, such as attendance at workshops or courses.
It's important to note that underperforming employees are not necessarily a lost cause, and with the right support and development opportunities, they can improve their performance and contrib


llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =   144.07 ms /   205 runs   (    0.70 ms per token,  1422.89 tokens per second)
llama_print_timings: prompt eval time = 127727.51 ms /  2770 tokens (   46.11 ms per token,    21.69 tokens per second)
llama_print_timings:        eval time = 16244.24 ms /   204 runs   (   79.63 ms per token,    12.56 tokens per second)
llama_print_timings:       total time = 144388.15 ms


In [15]:
response = chat_engine_context.chat("What is the standard REG finance code?")
print(response.response)

Llama.generate: prefix-match hit


  assistant:  The standard REG (Regional Finance) finance code is R-RSE-001. This code is used to specify the source of funding for a cost in Turing's Finance system.
R-RSE-001 is the standard REG code that is initially charged to all employees before being recharged to a project or other activity. It represents the standard amount that is allocated to each employee as part of their compensation package, and it is used as the default funding source for most costs incurred by employees.
For example, if an employee incurs a cost related to travel for work, they would use R-RSE-001 as the funding source for that cost. This helps to ensure that the cost is properly accounted for and tracked within the finance system.



llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =   127.49 ms /   181 runs   (    0.70 ms per token,  1419.75 tokens per second)
llama_print_timings: prompt eval time = 34780.27 ms /  1288 tokens (   27.00 ms per token,    37.03 tokens per second)
llama_print_timings:        eval time = 12544.19 ms /   180 runs   (   69.69 ms per token,    14.35 tokens per second)
llama_print_timings:       total time = 47691.08 ms


In [16]:
response = chat_engine_context.chat("What was the Turing cost of living pay increase in April 2020?")
print(response.response)

Llama.generate: prefix-match hit


 assistant:  According to the information provided in the context, the Turing cost of living pay increase in April 2020 was 3.0%.



llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =    23.95 ms /    34 runs   (    0.70 ms per token,  1419.92 tokens per second)
llama_print_timings: prompt eval time = 104209.96 ms /  2473 tokens (   42.14 ms per token,    23.73 tokens per second)
llama_print_timings:        eval time =  2549.42 ms /    33 runs   (   77.26 ms per token,    12.94 tokens per second)
llama_print_timings:       total time = 106833.21 ms


In [17]:
response = chat_engine_context.chat("How does this compare to the CPIH?")
print(response.response)

Llama.generate: prefix-match hit


 assistant:  The Turing cost of living pay increase in April 2020 (3.0%) is lower than the Consumer Price Index for Household Items (CPIH) which was 8.44% in January 2020, the most recent month available at the time of the Turing's annual cost of living award.
In other words, the increase in the cost of living exceeded the increase in Turing's pay awards. This means that while employees received a pay rise, it was lower than the rate of inflation, which could impact their purchasing power and standard of living.



llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =    93.53 ms /   133 runs   (    0.70 ms per token,  1421.94 tokens per second)
llama_print_timings: prompt eval time = 62826.15 ms /  1211 tokens (   51.88 ms per token,    19.28 tokens per second)
llama_print_timings:        eval time = 10068.20 ms /   132 runs   (   76.27 ms per token,    13.11 tokens per second)
llama_print_timings:       total time = 73164.41 ms


In [18]:
response = chat_engine_context.chat("Are you sure the CPIH in January 2020 was 8.44%?")
print(response.response)

Llama.generate: prefix-match hit


  I apologize, but I made an error in my previous response. The Consumer Price Index for Household Items (CPIH) for January 2020 was actually 3.53%, not 8.44%.
Here is the correct information:
* Turing cost of living pay increase in April 2020: 3.0%
* CPIH in January 2020: 3.53%
So, the increase in the cost of living was higher than the increase in Turing's pay awards.



llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =    86.62 ms /   123 runs   (    0.70 ms per token,  1420.00 tokens per second)
llama_print_timings: prompt eval time = 97300.89 ms /  1666 tokens (   58.40 ms per token,    17.12 tokens per second)
llama_print_timings:        eval time =  9720.93 ms /   122 runs   (   79.68 ms per token,    12.55 tokens per second)
llama_print_timings:       total time = 107270.91 ms


In [33]:
response = chat_engine_context.chat("Are you sure the CPIH in January 2020 wasn't 1.8%?")
print(response.response)

Llama.generate: prefix-match hit


 assistant:   I apologize for my mistake earlier. According to the Office for National Statistics (ONS), the Consumer Price Index for Household Items (CPIH) for January 2020 was actually 1.8%.
Here is the correct information:
* Turing cost of living pay increase in April 2020: 3.0%
* CPIH in January 2020: 1.8%
So, the increase in the cost of living was lower than the increase in Turing's pay awards.



llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =    85.87 ms /   121 runs   (    0.71 ms per token,  1409.14 tokens per second)
llama_print_timings: prompt eval time = 16063.67 ms /   187 tokens (   85.90 ms per token,    11.64 tokens per second)
llama_print_timings:        eval time =  9980.08 ms /   120 runs   (   83.17 ms per token,    12.02 tokens per second)
llama_print_timings:       total time = 26290.46 ms


In [34]:
response = chat_engine_context.chat("What was the first question I asked?")
print(response.response)

Llama.generate: prefix-match hit


  The first question you asked is: What is the project tracker?



llama_print_timings:        load time =   863.11 ms
llama_print_timings:      sample time =    11.39 ms /    16 runs   (    0.71 ms per token,  1404.99 tokens per second)
llama_print_timings: prompt eval time = 86022.32 ms /  2223 tokens (   38.70 ms per token,    25.84 tokens per second)
llama_print_timings:        eval time =  1135.97 ms /    15 runs   (   75.73 ms per token,    13.20 tokens per second)
llama_print_timings:       total time = 87197.54 ms
