In [2]:
from transformers import BloomTokenizerFast, BloomForQuestionAnswering, BloomForCausalLM, TrainingArguments, Trainer
import os
from dotenv import load_dotenv
from langchain.embeddings import HuggingFaceEmbeddings
import pinecone
from langchain.vectorstores import Pinecone
from langchain.chains import RetrievalQA
from langchain import HuggingFaceHub
from langchain.prompts import PromptTemplate


  from .autonotebook import tqdm as notebook_tqdm


In [13]:
## LOCAL
from transformers import BloomTokenizerFast, BloomForCausalLM,TrainingArguments, Trainer

In [14]:
load_dotenv()

True

In [12]:

# Save it into pinecone
API_KEY = os.environ.get("PINECONE_API_KEY")
YOUR_ENV = os.environ.get("PINECONE_ENVIRONMENT", "us-west4-gcp-free")
index_name = "test-langchain"


In [15]:
pinecone.init(
    api_key=API_KEY,
    environment=YOUR_ENV
)

if len(pinecone.list_indexes()) == 0:
    pinecone.create_index(name=index_name, metric="cosine", shards=1, dimension=len(res[0]))

pinecone.describe_index(pinecone.list_indexes()[0])

IndexDescription(name='test-langchain', metric='cosine', replicas=1, dimension=384.0, shards=1, pods=1, pod_type='p1', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')

In [20]:
embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")

In [26]:

# Save it into pinecone
API_KEY = os.environ.get("PINECONE_API_KEY")
YOUR_ENV = os.environ.get("PINECONE_ENVIRONMENT", "us-west4-gcp-free")
index_name = "test-langchain"

In [27]:
index = pinecone.Index(index_name)
index

<pinecone.index.Index at 0x2b27ed690>

In [18]:
text_field = "text"

index = pinecone.Index(index_name)

In [21]:
vectorstore = Pinecone(index, embeddings.embed_query, text_field)

In [30]:

LLM = HuggingFaceHub(repo_id='bigscience/bloomz-560m')

In [31]:
# prompt_template = """
# Using the following pieces of context answer the question 
# {context}
# 
# Question: {question}
# """
# 
# PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

In [32]:
# chain_type_kwargs = {"prompt": PROMPT}

In [42]:

qa = RetrievalQA.from_chain_type(llm=LLM, chain_type="refine", 
                                 retriever=vectorstore.as_retriever(), 
                                 return_source_documents=True,
                                 #chain_type_kwargs=chain_type_kwargs
                                 )

In [43]:
query = "who was Benito Mussolini?"
result = qa({"query": query})

Input length of input_ids is 353, but `max_length` is set to 20. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 451, but `max_length` is set to 20. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 457, but `max_length` is set to 20. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 331, but `max_length` is set to 20. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.


In [44]:
result

{'query': 'who was Benito Mussolini?',
 'result': ' Mussolini',
 'source_documents': [Document(page_content='Benito Amilcare Andrea Mussolini KSMOM GCTE (29 July 1883 – 28 April 1945) was an Italian politician and journalist. He was also the Prime Minister of Italy from 1922 until 1943. He was the leader of the National Fascist Party.\n\nBiography\n\nEarly life\nBenito Mussolini was named after Benito Juarez, a Mexican opponent of the political power of the Roman Catholic Church, by his anticlerical (a person who opposes the political interference of the Roman Catholic Church in secular affairs) father. Mussolini\'s father was a blacksmith. Before being involved in politics, Mussolini was a newspaper editor (where he learned all his propaganda skills) and elementary school teacher.\n\nAt first, Mussolini was a socialist, but when he wanted Italy to join the First World War, he was thrown out of the socialist party. He \'invented\' a new ideology, Fascism, much out of Nationalist\xa0and

## LOCAL

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM

In [46]:
checkpoint = "bigscience/bloomz-1b1"
okenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to("mps")

Downloading (…)okenizer_config.json: 100%|██████████| 222/222 [00:00<00:00, 1.35MB/s]
Downloading tokenizer.json: 100%|██████████| 14.5M/14.5M [00:00<00:00, 18.1MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 85.0/85.0 [00:00<00:00, 348kB/s]
Downloading (…)lve/main/config.json: 100%|██████████| 715/715 [00:00<00:00, 6.94MB/s]
Downloading model.safetensors: 100%|██████████| 2.13G/2.13G [01:33<00:00, 22.8MB/s]


In [7]:
from langchain import HuggingFacePipeline, LLMChain
from transformers import pipeline

In [5]:
training_args = TrainingArguments(output_dir="test_trainer", use_mps_device=True)

In [25]:
query = "Who was Adolf hitler?"

vectorstore.similarity_search(query, k=3)

[Document(page_content='Adolf Hitler (20 April 1889 – 30 April 1945) was an Austrian-born German politician and the leader of Nazi Germany. He became Chancellor of Germany in 1933, after a democratic election in 1932. He became Führer (leader) of Nazi Germany in 1934.\n\nHitler led the NSDAP, or the Nazi Party from 1921. When in power, the Nazis created a dictatorship called the Third Reich. In 1933, they blocked out all other political parties. This gave Hitler absolute power.\n\nHitler ordered the invasion of Poland in 1939, and this started World War II. Because of Hitler, at least 50 million people died. During World War II, Hitler was the Commander-in-Chief of the German Armed Forces and made all the important decisions. This was part of the so-called Führerprinzip. He shot himself on 30 April 1945, as the Soviet Army got to Berlin, because he did not want to be captured alive by the Soviet Union.\n\nHitler and the Nazi regime were responsible for the killing of an estimated 19.3 

In [26]:
docs = vectorstore.similarity_search(query, k=3)[0]
body = docs.page_content
source = docs.metadata['source']

In [8]:
LLM  = HuggingFacePipeline.from_model_id("bigscience/bloomz-560m", task="text-generation")

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [47]:
prompt = f"""Using only the following context: {body} answer the following 
Question: {query}

Answer:
"""
prompt

NameError: name 'body' is not defined

In [48]:
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device="mps", do_sample=False)


In [49]:
generator.model

BloomForCausalLM(
  (transformer): BloomModel(
    (word_embeddings): Embedding(250880, 1536)
    (word_embeddings_layernorm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
    (h): ModuleList(
      (0-23): 24 x BloomBlock(
        (input_layernorm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
        (self_attention): BloomAttention(
          (query_key_value): Linear(in_features=1536, out_features=4608, bias=True)
          (dense): Linear(in_features=1536, out_features=1536, bias=True)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (post_attention_layernorm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
        (mlp): BloomMLP(
          (dense_h_to_4h): Linear(in_features=1536, out_features=6144, bias=True)
          (gelu_impl): BloomGelu()
          (dense_4h_to_h): Linear(in_features=6144, out_features=1536, bias=True)
        )
      )
    )
    (ln_f): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
  )
  (

In [50]:
prompt = """
Generate a complicted sentence using the following context:
'Using only the following context: Adolf Hitler (20 April 1889 – 30 April 1945) was an Austrian-born German politician and the leader of Nazi Germany. He became Chancellor of Germany in 1933, after a democratic election in 1932. He became Führer (leader) of Nazi Germany in 1934.\n\nHitler led the NSDAP, or the Nazi Party from 1921. When in power, the Nazis created a dictatorship called the Third Reich. In 1933, they blocked out all other political parties. This gave Hitler absolute power.\n\nHitler ordered the invasion of Poland in 1939, and this started World War II. Because of Hitler, at least 50 million people died. During World War II, Hitler was the Commander-in-Chief of the German Armed Forces and made all the important decisions. This was part of the so-called Führerprinzip. He shot himself on 30 April 1945, as the Soviet Army got to Berlin, because he did not want to be captured alive by the Soviet Union.\n\nHitler and the Nazi regime were responsible for the killing of an estimated 19.3 million civilians and prisoners of war. In addition, 28.7 million soldiers and people died as a result of military action in Europe.\n\nNazi forces committed many war crimes during the war. They were doing what Hitler told them to do. They killed their enemies or put them in concentration camps and death camps. Hitler and his men persecuted and killed Jews and other ethnic, religious, and political minorities. In what is called the Holocaust, the Nazis killed six million Jews, Roma people, homosexuals, Slavs, and many other groups of people.
Answer the following question:
What did Adolf Hitler?

"""

In [57]:
prompt = """

Based on the context: 'John is 4 times as old as Bob. Bob is 3 years younger than Mike. Mike is 10 years old.'

Answer:

'What is John's age?'

Explain the solution to the following problem step by step.


"""

In [58]:
result = generator(prompt, max_new_tokens = 100000)

In [59]:
result

[{'generated_text': "\n\nBased on the context: 'John is 4 times as old as Bob. Bob is 3 years younger than Mike. Mike is 10 years old.'\n\nAnswer:\n\n'What is John's age?'\n\nExplain the solution to the following problem step by step.\n\n\n4"}]