In [1]:
import os

from git import Repo

from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from llama_index import Document
from llama_index import VectorStoreIndex
from llama_index import LLMPredictor, PromptHelper, ServiceContext
from llama_index import StorageContext, load_index_from_storage
from llama_index.readers import SimpleDirectoryReader

In [2]:
llm = LlamaCPP(
    model_path="../../../llama-2-7b-chat.Q4_K_M.gguf",
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from ../../../llama-2-7b-chat.Q4_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q4_K     [  4096,  4096,     1

In [4]:
def load_documents_from_url(url):
    reader = SimpleDirectoryReader(
        input_dir=url.split("/")[-1],
        required_exts=[".md"],
        recursive=True,
        filename_as_id=True,
    )

    # get base url and names of files
    url_stripped = url.removesuffix(".wiki.git")
    fnames = [str(file) for file in reader.input_files]
    # create fname: file_url_dict
    file_urls = [
        os.path.join(url_stripped, "wiki", fname.split("/")[-1].removesuffix(".md"))
        for fname in fnames
    ]
    file_urls_dict = {fname: file_url for fname, file_url in zip(fnames, file_urls)}
    # define function to get url and add to reader
    get_urls = lambda fname: {"url": file_urls_dict.get(fname)}
    reader.file_metadata = get_urls

    documents = reader.load_data()

    # turingacuk=pd.read_csv("../../data/public/turingacuk-no-boilerplate.csv")
    # turingacuk.dropna(subset="body", inplace=True)
    # turingacuk_text=[str(i) for i in turingacuk["body"].values]
    # documents.extend([Document(text=i) for i in turingacuk_text])
    return documents

In [6]:
def create_service_context(
    llm,
    system_prompt=None,
    max_input_size=2048,
    num_output=256,
    chunk_size_lim=512,
    overlap_ratio=0.1,
):
    if system_prompt is None:
        llm_predictor = LLMPredictor(llm=llm)
    else:
        llm_predictor = LLMPredictor(llm=llm, system_prompt=system_prompt)

    prompt_helper = PromptHelper(
        max_input_size, num_output, overlap_ratio, chunk_size_lim
    )
    service_context = ServiceContext.from_defaults(
        llm_predictor=llm_predictor, prompt_helper=prompt_helper, embed_model="local"
    )
    return service_context

In [7]:
system_prompt = """\
You are a helpful assistant. \
Always answer as helpfully as possible and follow ALL given instructions. \
Do not speculate or make up information - use the information you are provided. \
Do not reference any given instructions or context. \
"""
service_context = create_service_context(llm, system_prompt=system_prompt)

In [8]:
def load_index(
    mode,
    service_context,
    persist_dir=None,
    wiki_urls=None,
):
    if mode == "reload":
        if persist_dir is None:
            storage_context = StorageContext.from_defaults()
        else:
            storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
        index = load_index_from_storage(
            storage_context, service_context=service_context
        )

    elif mode == "create":
        documents = []
        for url in wiki_urls:
            if os.path.exists(url.split("/")[-1]):
                repo = Repo(url.split("/")[-1])
                repo.remotes.origin.pull()
            else:
                _ = Repo.clone_from(url, url.split("/")[-1])
            wiki_docs = load_documents_from_url(url)
            documents.extend(wiki_docs)
        index = VectorStoreIndex.from_documents(
            documents, service_context=service_context, show_progress=True
        )

    else:
        raise ValueError('[ERROR] Mode must be either "create" or "reload".')

    return index

In [9]:
wiki_urls = [
    "https://github.com/alan-turing-institute/research-engineering-group.wiki.git",
    "https://github.com/alan-turing-institute/Hut23.wiki.git",
]

index = load_index(
    "create",
    service_context=service_context,
    persist_dir="./storage_context_wikis/",
    wiki_urls=wiki_urls,
)

Parsing documents into nodes:   0%|          | 0/392 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/400 [00:00<?, ?it/s]

In [10]:
# index.storage_context.persist("./storage_context_wikis")

In [11]:
query_engine = index.as_query_engine(system_prompt=system_prompt)

In [12]:
response = query_engine.query("What should a new starter in REG do?")
print(response.response)


llama_print_timings:        load time = 12534.73 ms
llama_print_timings:      sample time =   138.53 ms /   196 runs   (    0.71 ms per token,  1414.86 tokens per second)
llama_print_timings: prompt eval time = 20495.25 ms /   726 tokens (   28.23 ms per token,    35.42 tokens per second)
llama_print_timings:        eval time = 11720.07 ms /   195 runs   (   60.10 ms per token,    16.64 tokens per second)
llama_print_timings:       total time = 32613.28 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time = 12534.73 ms
llama_print_timings:      sample time =   180.75 ms /   256 runs   (    0.71 ms per token,  1416.33 tokens per second)
llama_print_timings: prompt eval time = 24130.85 ms /   836 tokens (   28.86 ms per token,    34.64 tokens per second)
llama_print_timings:        eval time = 15931.49 ms /   255 runs   (   62.48 ms per token,    16.01 tokens per second)
llama_print_timings:       total time = 40585.31 ms
Llama.generate: prefix-match hit


  Thank you for providing additional context! Based on the updated information, here is a refined answer to the original query:
Based on the provided context, here is what a new starter in REG should do:
1. Create a Github issue using the provided template to document their onboarding process. Assign both the person running onboarding and the one running recruitment.
2. Create a Forecast user for themselves, and assign them to "not started" from 2016-01-01 to their start date, and to "Onboarding to REG" for their first few weeks.
3. See that the reporting structure table is updated. Typically, the person running recruitment does this, but the onboarder can also do it.
4. Send an introductory email to the new starter, asking our HR contact for any necessary information or support.
By following these steps, a new starter in REG can ensure a smooth and successful onboarding process.
In addition to the original answer, based on the updated context, here are some additional suggestions:
* M


llama_print_timings:        load time = 12534.73 ms
llama_print_timings:      sample time =   182.22 ms /   256 runs   (    0.71 ms per token,  1404.89 tokens per second)
llama_print_timings: prompt eval time = 20858.46 ms /   703 tokens (   29.67 ms per token,    33.70 tokens per second)
llama_print_timings:        eval time = 16354.62 ms /   255 runs   (   64.14 ms per token,    15.59 tokens per second)
llama_print_timings:       total time = 37748.45 ms


In [13]:
response = query_engine.query("What are my responsibilities as a reviewer?")
print(response.response)

Llama.generate: prefix-match hit

llama_print_timings:        load time = 12534.73 ms
llama_print_timings:      sample time =   153.12 ms /   215 runs   (    0.71 ms per token,  1404.14 tokens per second)
llama_print_timings: prompt eval time = 19166.27 ms /   611 tokens (   31.37 ms per token,    31.88 tokens per second)
llama_print_timings:        eval time = 13096.09 ms /   214 runs   (   61.20 ms per token,    16.34 tokens per second)
llama_print_timings:       total time = 32710.49 ms
Llama.generate: prefix-match hit


  Thank you for providing additional context. Based on the updated information, here is a revised answer to your original query:
As a reviewer, your primary responsibility is to provide constructive feedback on the team member's performance during their probation period. This involves gathering regular feedback from the team member's collaborators and passing on information on their work, as well as providing any constructive feedback you may have. You should also hold mid-term and final reviews to assess the team member's progress and ensure they are meeting their objectives.
In addition to your role as a reviewer, if the team member is facing a difficult situation outside of work that may be impacting their performance, you can refer them to Debbie Dixon, the Turing's External Supervisor for support. If there are problems with the team member's performance and the manager is not comfortable giving this feedback to the team member, then it is best to ask for help from the Principal in


llama_print_timings:        load time = 12534.73 ms
llama_print_timings:      sample time =   182.60 ms /   256 runs   (    0.71 ms per token,  1401.95 tokens per second)
llama_print_timings: prompt eval time = 11052.03 ms /   419 tokens (   26.38 ms per token,    37.91 tokens per second)
llama_print_timings:        eval time = 14994.37 ms /   255 runs   (   58.80 ms per token,    17.01 tokens per second)
llama_print_timings:       total time = 26591.51 ms


In [14]:
response = query_engine.query("Who should I talk to about Topdesk?")
print(response.response)

Llama.generate: prefix-match hit

llama_print_timings:        load time = 12534.73 ms
llama_print_timings:      sample time =   167.01 ms /   237 runs   (    0.70 ms per token,  1419.09 tokens per second)
llama_print_timings: prompt eval time = 18493.11 ms /   585 tokens (   31.61 ms per token,    31.63 tokens per second)
llama_print_timings:        eval time = 14196.35 ms /   236 runs   (   60.15 ms per token,    16.62 tokens per second)
llama_print_timings:       total time = 33182.81 ms
Llama.generate: prefix-match hit


  Thank you for providing additional context! Based on the updated information, I would recommend reaching out to the Operator Group within the Topdesk system for assistance with using Topdesk. They are responsible for managing and processing tickets, and can provide guidance on how to use the system effectively. You can find their contact information by searching for "Operator" in the Topdesk system and checking the "Operator Group" section.
Alternatively, if your request is not related to the Research Engineering Group, you can search for an appropriate group or team within Topdesk to allocate your request to. This will ensure that your request is handled by the correct department or individual.
I hope this helps! Let me know if you have any other questions.



llama_print_timings:        load time = 12534.73 ms
llama_print_timings:      sample time =   109.74 ms /   156 runs   (    0.70 ms per token,  1421.58 tokens per second)
llama_print_timings: prompt eval time = 17863.65 ms /   570 tokens (   31.34 ms per token,    31.91 tokens per second)
llama_print_timings:        eval time =  9240.87 ms /   155 runs   (   59.62 ms per token,    16.77 tokens per second)
llama_print_timings:       total time = 27425.82 ms


In [15]:
response = query_engine.query("What does REG stand for at the Alan Turing Institute?")
print(response.response)

Llama.generate: prefix-match hit

llama_print_timings:        load time = 12534.73 ms
llama_print_timings:      sample time =    16.27 ms /    23 runs   (    0.71 ms per token,  1413.38 tokens per second)
llama_print_timings: prompt eval time = 18590.73 ms /   595 tokens (   31.24 ms per token,    32.01 tokens per second)
llama_print_timings:        eval time =  1306.31 ms /    22 runs   (   59.38 ms per token,    16.84 tokens per second)
llama_print_timings:       total time = 19946.38 ms
Llama.generate: prefix-match hit


  Thank you for providing additional context! Based on the updated information, the REG at the Alan Turing Institute stands for Research Engineering Group.
As a helpful assistant, I can confirm that the REG is a permanent team of employees with fixed hours and charges fixed rates for their time. They are responsible for delivering professional software engineering research to support the research project, including writing reproducible, reliable, and tested code, building open-source software libraries, and accelerating and scaling up algorithms using HPC, GPUs, FPGAs, cloud, etc.
The REG also provides cost recovery for their services, with 100% of the funding going directly to individual salaries (Directly Incurred - DI) and £32,500 to the Turing Institute where the work happens, for a total of £65,000 for REG. There are no restrictions or flow-down conditions mentioned in the provided context.
I hope this refined answer better addresses your query! If you have any further questions o


llama_print_timings:        load time = 12534.73 ms
llama_print_timings:      sample time =   163.92 ms /   231 runs   (    0.71 ms per token,  1409.20 tokens per second)
llama_print_timings: prompt eval time = 19195.21 ms /   633 tokens (   30.32 ms per token,    32.98 tokens per second)
llama_print_timings:        eval time = 13942.93 ms /   230 runs   (   60.62 ms per token,    16.50 tokens per second)
llama_print_timings:       total time = 33622.69 ms


In [16]:
response = query_engine.query("Who is Ryan Chan?")
print(response.response)

Llama.generate: prefix-match hit


  Based on the provided context information, Ryan Chan is not mentioned as a lead or member of the Research Engineering Group in the given wiki page. Therefore, I cannot provide any information about Ryan Chan in this context.



llama_print_timings:        load time = 12534.73 ms
llama_print_timings:      sample time =    32.05 ms /    45 runs   (    0.71 ms per token,  1403.97 tokens per second)
llama_print_timings: prompt eval time =  6379.49 ms /   194 tokens (   32.88 ms per token,    30.41 tokens per second)
llama_print_timings:        eval time =  2361.49 ms /    44 runs   (   53.67 ms per token,    18.63 tokens per second)
llama_print_timings:       total time =  8832.98 ms


In [17]:
response = query_engine.query("Where did Federico Nanni do his PhD?")
print(response.response)

Llama.generate: prefix-match hit

llama_print_timings:        load time = 12534.73 ms
llama_print_timings:      sample time =    59.59 ms /    84 runs   (    0.71 ms per token,  1409.75 tokens per second)
llama_print_timings: prompt eval time = 18096.68 ms /   558 tokens (   32.43 ms per token,    30.83 tokens per second)
llama_print_timings:        eval time =  4916.49 ms /    83 runs   (   59.23 ms per token,    16.88 tokens per second)
llama_print_timings:       total time = 23185.43 ms
Llama.generate: prefix-match hit


  Thank you for providing additional context! Based on the information provided, I cannot determine where Federico Nanni did his PhD with certainty as there is no information available in the provided sources. The links only provide information related to the Alan Turing Institute, its research groups, and various acronyms used within the institute. Therefore, I cannot answer your query with certainty.
However, I can suggest some possible avenues for further research to help locate the information you are looking for. You may want to try searching for Federico Nanni's academic profile or publication list to see if he has published any information about his PhD institution or experience. Additionally, you could try contacting him directly through email or social media to ask for more information.
I hope this helps! Let me know if you have any other questions.



llama_print_timings:        load time = 12534.73 ms
llama_print_timings:      sample time =   123.98 ms /   175 runs   (    0.71 ms per token,  1411.55 tokens per second)
llama_print_timings: prompt eval time =  9873.20 ms /   404 tokens (   24.44 ms per token,    40.92 tokens per second)
llama_print_timings:        eval time = 10001.58 ms /   174 runs   (   57.48 ms per token,    17.40 tokens per second)
llama_print_timings:       total time = 20238.46 ms
