In [16]:
import textwrap
import os
import wikipediaapi
from collections import defaultdict

import transformers
from transformers import BitsAndBytesConfig, AutoTokenizer, AutoModelForCausalLM
from langchain_huggingface import HuggingFacePipeline
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain

import torch


In [2]:
device = 'cuda' if torch.cuda.is_available() else None
if device:
    print(f"Using {torch.cuda.get_device_name(0)}")
bitsquant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_id = "mistralai/Mistral-7B-Instruct-v0.1"
sharded_model = "filipealmeida/Mistral-7B-Instruct-v0.1-sharded"

tokenizer = AutoTokenizer.from_pretrained(model_id, token=True)
model = AutoModelForCausalLM.from_pretrained(
    sharded_model,
    trust_remote_code=True,
    quantization_config=bitsquant_config,
    device_map="auto",
    token=True,
)

text_gen_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.85,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=256,
    device_map="auto"
)

mistral_llm = HuggingFacePipeline(pipeline=text_gen_pipeline)

quick_test = "Who is MistralAI? Please write an essay"
ans = mistral_llm.invoke(quick_test)
print(ans)

Using NVIDIA GeForce RTX 4060


Loading checkpoint shards: 100%|██████████| 8/8 [00:21<00:00,  2.64s/it]
Device set to use cuda:0


Who is MistralAI? Please write an essay about it.

MistralAI is a cutting-edge company based in France that focuses on developing large language models. These models can generate human-like text and understand natural language input from humans. The technology behind these models is based on deep learning algorithms that analyze massive amounts of data to identify patterns and relationships between words and phrases.

One of the key benefits of MistralAI's technology is its ability to create highly accurate translations into multiple languages. The company's models can also be used for tasks such as summarizing long documents or generating captions for images and videos. In addition, they can be trained on specific domains or industries, making them useful for applications such as medical or legal document analysis.

MistralAI is known for its commitment to ethical AI, ensuring that its models are transparent and explainable. They also prioritize diversity in their datasets, which help

In [13]:
wiki_regular = wikipediaapi.Wikipedia(user_agent='Summary Comparison (taylor.ayla@protonmail.com)', language='en')
regular_page = wiki_regular.page('Python_(programming_language)')


In [24]:
len(regular_page.text)

50322

In [28]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(encoding_name="cl100k_base", chunk_size=2000, chunk_overlap=100)
texts = text_splitter.split_text(regular_page.text)
[print(len(text)) for text in texts]
docs = [Document(page_content=t) for t in texts]
print(docs)

8148
8404
8195
8769
9446
7728


In [29]:
chain = load_summarize_chain(mistral_llm, chain_type="map_reduce", verbose= True)
output_summary = chain.run(docs)




[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation.
Python is dynamically type-checked and garbage-collected. It supports multiple programming paradigms, including structured (particularly procedural), object-oriented and functional programming.
Guido van Rossum began working on Python in the late 1980s as a successor to the ABC programming language, and he first released it in 1991 as Python 0.9.0. Python 2.0 was released in 2000. Python 3.0, released in 2008, was a major revision not completely backward-compatible with earlier versions. Python 2.7.18, released in 2020, was the last release of Python 2.
Python consistently ranks as one of the most popular programming languages, and it has gained widesprea

ValueError: A single document was longer than the context length, we cannot handle this.

In [None]:
wrapped_text = textwrap.fill(output_summary, width=100)
print(wrapped_text)