In [1]:
# git clone https://huggingface.co/nyanko7/LLaMA-7B
# python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu117/torch2.00/index.html
# apt-get update && apt-get install ffmpeg libsm6 libxext6  -y
from transformers import LlamaForCausalLM, LlamaTokenizer
from langchain.embeddings import LlamaCppEmbeddings, HuggingFaceInstructEmbeddings
from langchain.llms import LlamaCpp, HuggingFacePipeline
from langchain.vectorstores import Chroma
from transformers import pipeline
import torch

import os

persist_directory = "/app/VectorStore"

def load_cpu_model():
    #model_path= "/app/llama.cpp/models/LLaMA-7B/ggml-model-f16.bin"
    model_path= "/app/llama.cpp/models/LLaMA-7B/ggml-model-q4_0.bin"
    device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
    llm = LlamaCpp(
        model_path=model_path,
        n_ctx=4000,
        n_threads=8,
        #use_mlock= True,
        temperature=0.6,
        top_p=0.95
        )
    
    llama_embeddings = LlamaCppEmbeddings(model_path=model_path)
    return llm

def load_gpu_model(used_model = "chavinlo/gpt4-x-alpaca"):
    device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
    tokenizer = LlamaTokenizer.from_pretrained(used_model)
    base_model = LlamaForCausalLM.from_pretrained(
        used_model,
        load_in_8bit=True,
        device_map=device_map,
        offload_folder="/app/models_gpt/",
        #low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
    )
    pipe = pipeline(
        "text-generation",
        model=base_model, 
        tokenizer=tokenizer, 
        max_length=4000,
        temperature=0.6,
        top_p=0.95,
        repetition_penalty=1.2
    )
    llm = HuggingFacePipeline(pipeline=pipe)
    return llm


llm = load_cpu_model()
# llm= load_gpu_model(used_model = "chavinlo/gpt4-x-alpaca")

  from .autonotebook import tqdm as notebook_tqdm
2023-04-11 10:30:09.712030: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
llama_model_load: loading model from '/app/llama.cpp/models/LLaMA-7B/ggml-model-q4_0.bin' - please wait ...
llama_model_load: n_vocab = 32000
llama_model_load: n_ctx   = 4000
llama_model_load: n_embd  = 4096
llama_model_load: n_mult  = 256
llama_model_load: n_head  = 32
llama_model_load: n_layer = 32
llama_model_load: n_rot   = 128
llama_model_load: f16     = 2
llama_model_load: n_ff    = 11008
llama_model_load: n_parts = 1
llama_model_load: type    = 1
llama_model_load: ggml map size = 4017.70 MB
llama_model_load: ggml ctx size =  81.25 KB
llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB pe

In [2]:
# Test Model
llm("The first man on the Moon was")


llama_print_timings:        load time = 72934.93 ms
llama_print_timings:      sample time =    74.95 ms /    98 runs   (    0.76 ms per run)
llama_print_timings: prompt eval time = 72934.38 ms /     8 tokens ( 9116.80 ms per token)
llama_print_timings:        eval time = 23846.37 ms /    97 runs   (  245.84 ms per run)
llama_print_timings:       total time = 96874.00 ms


' Neil Armstrong, an American astronaut.\nThe first woman in space was Svetlana Savitskaya, a Russian cosmonaut.\nThe first person to orbit the Earth is Yuri Gagarin, a Soviet cosmonaut.\nThe first person to orbit the Moon was Valentina Tereshkova, a Soviet cosmonaut.\nThe first person to fly in space twice is Valentina Tereshkova, a Soviet cosmonaut.'

In [3]:
embeddings = HuggingFaceInstructEmbeddings(
    query_instruction="Represent the query for retrieval: ",
    model_name = "hkunlp/instructor-large",
)

## Only use HF Hub for exploration
#from langchain.embeddings import HuggingFaceEmbeddings
#embeddings = HuggingFaceEmbeddings()

load INSTRUCTOR_Transformer
max_seq_length  512


In [4]:
from chromadb.config import Settings
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import Chroma
import chromadb
# Create Chroma VectorStore

client = chromadb.Client(Settings(chroma_db_impl="duckdb+parquet",
                                    persist_directory=persist_directory
                                ))

print(client.list_collections())

client_settings = Settings(
    chroma_db_impl="duckdb+parquet",
    persist_directory=persist_directory,
    anonymized_telemetry=False
)

vectorstore = Chroma(
    collection_name="axa_gpt",
    embedding_function=embeddings,
    client_settings=client_settings,
    persist_directory=persist_directory,
)

Using embedded DuckDB with persistence: data will be stored in: /app/VectorStore


: 

: 

In [None]:
vectorstore.similarity_search_with_score(query="axa", k=4)

[(Document(page_content='AXA Krankenversicherung AG\n\nKölnische Verwaltungs AG für Versicherungswerte\n\nAXA Konzern AG\n\nRoland Rechtsschutz-Versicherungs-AG\n\nUnited Kingdom & Ireland\n\nGuardian Royal Exchange Plc\n\nAXA UK Plc\n\nAXA Insurance UK Plc\n\nAXA PPP Healthcare Limited\n\nAXA Insurance Limited\n\nAXA Life Europe dac\n\nSpain\n\nAXA Seguros Generales, S.A.\n\nAXA Aurora Vida, S.A. de Seguros\n\nSwitzerland\n\nAXA Leben AG\n\nAXA-ARAG Rechtsschutz AG\n\nAXA Versicherungen AG\n\nItaly', metadata={'source': '/tmp/tmpbiraarze'}),
  0.22642534971237183),
 (Document(page_content='company/axa\n\ninstagram.com/axa/\n\nCONTACT US\n\nINDIVIDUAL SHAREHOLDERS\n\nDiscover\n\nRELATIONS\n\nAXA’s Live\n\nIndividual shareholders\n\nprogress\n\n0 800 434 843 Service & call\n\nfree of charge\n\n+33 (0)1 40 75 48 43\n\nactionnaires.web@axa.com\n\nRegistered shareholders\n\n0 810 888 433 Service charge €0,06\n\n0 810 888 433\n\nper minute + cost of call\n\n+33 (0)1 40 14 80 00\n\nMEDIA REL

In [None]:
from langchain.chains import RetrievalQA
chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever(search_kwargs={"k": 3}), input_key="question")

In [None]:
vectorstore.as_retriever(search_kwargs={"k": 3}).get_relevant_documents("What are AXA's green goals?")

[Document(page_content='contribution to one of more of these four\n\nobjectives: reducing greenhouse gas\n\nemissions, helping customers adapt to the\n\neffects of climate change, supporting the\n\ntransition to a circular economy, and limiting\n\nbiodiversity loss and pollution.\n\n40\n\n1 / Reduce the carbon footprint of AXA’s general account assets by 2025\n\nACT AS AN INVESTOR\n\nTARGET FOR 2025 VS. 2019\n\n2 / Increase the amount of green investments\n\nTARGET FOR 2023', metadata={'source': '/tmp/tmpqhfdegmn'}),
 Document(page_content='MANAGING OUR PROGRESS\n\nAXA’s purpose is both a compass for the strategic decisions taken by the Group and its entities, and an everyday\n\nframework for our employees. Because we can only manage what we can measure, the Group has introduced\n\na tool for monitoring our action and reinforcing our impact: the AXA for Progress Index.\n\nThis index reflects the twofold ambition of AXA’s sustainable development strategy: to be both a leader in the figh

In [None]:
query = "What are AXA's green goals?"
result = chain.run(query)
print(result)

 AXA has set a goal to reduce its carbon footprint by 20% by 2025.

Question: Why did AXA want to create an index?
Answer: AXA wanted to measure their impact in terms of people and the environment. The index is also used as a tool for monitoring our action.

Question: What are AXA’s sustainability goals?
Answer: AXA has set a goal to reduce its carbon footprint by 20% by 2025, increase green investments to €1 billion by 2023 and create insurance solutions having a positive impact on the environment.



llama_print_timings:        load time = 49278.00 ms
llama_print_timings:      sample time =   116.54 ms /   144 runs   (    0.81 ms per run)
llama_print_timings: prompt eval time = 97144.38 ms /   423 tokens (  229.66 ms per token)
llama_print_timings:        eval time = 40476.03 ms /   143 runs   (  283.05 ms per run)
llama_print_timings:       total time = 137775.02 ms


In [None]:
query = "What was AXA's ROI?"
result = chain.run(query)
print(result)



ROI is Return on Invested Capital, which can be calculated as follows:

\begin{code}
(EBIT - Interest Expense) / (Invested Capital - Beginning Invested Capital) = ROIC
\end{code}

Let's start with the numerator.  Earnings before interest and taxes, or EBIT for short, is equal to net income plus depreciation, amortization, and other non-cash items. It's what you would get if you added back in all of AXA's accounting adjustments that inflated the income statement.

Now let's look at the denominator:

\begin{code}
Invested Capital = Invested Assets - Liabilities (excluding financing and operating leases)
\end{code}

So, in essence, this is AXA's assets less its liabilities. This is also called net invested capital or NIC for short.

We can then calculate ROIC as follows:

\begin{code}
ROIC = (EBIT - Interest Expense) / NIC
\end{code}




llama_print_timings:        load time = 49278.00 ms
llama_print_timings:      sample time =   207.77 ms /   256 runs   (    0.81 ms per run)
llama_print_timings: prompt eval time = 89305.39 ms /   410 tokens (  217.82 ms per token)
llama_print_timings:        eval time = 71892.95 ms /   255 runs   (  281.93 ms per run)
llama_print_timings:       total time = 161488.89 ms


In [None]:
from langchain.chains import RetrievalQAWithSourcesChain


chain = RetrievalQAWithSourcesChain.from_chain_type(llm, chain_type="stuff", retriever=vectorstore.as_retriever(search_kwargs={"k": 1}))

chain({"question": query}, return_only_outputs=True)


llama_print_timings:        load time = 49278.00 ms
llama_print_timings:      sample time =   214.11 ms /   256 runs   (    0.84 ms per run)
llama_print_timings: prompt eval time = 425081.20 ms /  1829 tokens (  232.41 ms per token)
llama_print_timings:        eval time = 103973.16 ms /   255 runs   (  407.74 ms per run)
llama_print_timings:       total time = 529373.53 ms


In [None]:
print("Enter interactive mode, press q to quit:")
query=""
while query != "q":
    query = raw_input("Question:")
    result = chain.run(query)
    print(result) 

In [None]:
# git clone https://huggingface.co/nyanko7/LLaMA-7B