In [None]:
!pip install ydata-profiling chromadb langchain sentence_transformers bitsandbytes



In [None]:
!pip install langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.17-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.34 (from langchain-community)
  Downloading langchain_core-0.3.34-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain<1.0.0,>=0.3.18 (from langchain-community)
  Downloading langchain-0.3.18-py3-none-any.whl.metadata (7.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-

In [None]:
import pandas as pd
import numpy as np

# Auto EDA
from ydata_profiling import ProfileReport

# Torch and Transformers
import torch
from torch import bfloat16
import transformers
from transformers import AutoTokenizer

from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import DataFrameLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma

# Hiding warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(torch.cuda.current_device())
    total_memory = torch.cuda.get_device_properties(0).total_memory
    total_memory_gb = total_memory / (1024**3) # Converting memory to Gb
    print("GPU is available. \nUsing GPU")
    print("\nGPU Name:", gpu_name)
    print(f"Total GPU Memory: {total_memory_gb:.2f} GB")

    device = torch.device('cuda')
else:
    print("GPU is not available. \nUsing CPU")
    device = torch.device('cpu')

GPU is available. 
Using GPU

GPU Name: Tesla T4
Total GPU Memory: 14.74 GB


In [None]:
df = pd.read_csv('/content/Wikipedia Crypto Articles.csv')
report = ProfileReport(df,title='Wikipedia Cryptic Articles without barnacles')

In [None]:
report

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]



In [None]:
print('Dataframe Length:',len(df),'rows')
df = df.dropna()
print('Dataframe length after dropping:',len(df),'rows')

Dataframe Length: 218 rows
Dataframe length after dropping: 218 rows


In [None]:
print('Title: ',df.title.iloc[-1])
print('- - - - -')
print(df.article.iloc[-1])

Title:  NEO (cryptocurrency)
- - - - -
Neo is a blockchain-based cryptocurrency and application platform used to run smart contracts and decentralized applications. The project, originally named Antshares, was founded in 2014 by Da HongFei and Erik Zhang and rebranded as Neo in 2017. In 2017 and 2018, the cryptocurrency maintained some success in the Chinese market despite the recently-enacted prohibition on cryptocurrency in that country.


== Technical specifications ==
The Neo network runs on a proof-of-stake decentralized Byzantine fault tolerant (dBFT) consensus mechanism between a number of centrally approved nodes, and can support up to 10,000 transactions per second. The base asset of the Neo blockchain is the non-divisible Neo token which generates GAS tokens. These GAS tokens, a separate asset on the network, can be used to pay for transaction fees, and are divisible with the smallest unit of 0.00000001. The inflation rate of GAS is controlled with a decaying half-life algori

In [None]:
#load df content into a document
articles = DataFrameLoader(df,page_content_column='title')
#loading entire dataframe into document format
document = articles.load()

In [None]:
#Chunking/Splitting
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 20)
splitted_texts = splitter.split_documents(document)

In [None]:
embedding_model = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
chroma_database =  Chroma.from_documents(splitted_texts, embedding_model, persist_directory = 'chroma_db')


In [None]:
chroma_database

<langchain_community.vectorstores.chroma.Chroma at 0x7b04900f57d0>

In [None]:
#Defining a Retriever
retriever = chroma_database.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7b04900f57d0>, search_kwargs={})

In [None]:
#optimal loading of Mistral Model
quantization_config = transformers.BitsAndBytesConfig(load_in_4bit=True,
                                                      bnb_4bit_quant_type='nf4',
                                                      bnb_4bit_use_double_quant=True,
                                                      bnb_4bit_compute_dtype = bfloat16)

In [None]:

from langchain.chains import RetrievalQA

In [None]:
from transformers import AutoModelForCausalLM, pipeline, AutoTokenizer
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_id,use_auth_token='')

# The line below now uses the correctly imported AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(model_id,device_map='auto',quantization_config=quantization_config,use_auth_token='')

text_gen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=1024,
    temperature=0.3
)

llm = HuggingFacePipeline(pipeline=text_gen_pipeline)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff"
)

In [None]:
query = "What is the main topic of the article titled XYZ?"
response = qa_chain.run(query)

print("🔹 Response:", response)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


🔹 Response: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Bitcoin Magazine

Micree Zhan

Xapo

Zcash

Question: What is the main topic of the article titled XYZ?
Helpful Answer: I don't know, there is no article titled XYZ in the given context.
