In [1]:
from langchain_huggingface import HuggingFaceEmbeddings
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
import torch
from tqdm import tqdm
from itertools import islice
from uuid import uuid4
from datasets import load_dataset
from dotenv import dotenv_values
from huggingface_hub import login

In [2]:
venv = dotenv_values('.env')
login(token=venv["HF_TOKEN"], add_to_git_credential=True)

# ASTRA_DB_API_ENDPOINT = venv["ASTRA_DB_API_ENDPOINT"]
# ASTRA_DB_APPLICATION_TOKEN = venv["ASTRA_DB_APPLICATION_TOKEN"]
# ASTRA_DB_NAMESPACE = venv["ASTRA_DB_NAMESPACE"]

MODEL_EMBED = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'

Token has not been saved to git credential helper.
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'store' credential helper as default.

git config --global credential.helper store

Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.[0m


In [3]:
# corpus_file = r"NaverLegal/corpus.csv"
dataset_name = "Zappu/Legal-vn"
collection_name = "AstraDB_Train_Legal_v2"

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_kwargs = {'device': device}
cache_dir = r"../.cache"

embeddings = HuggingFaceEmbeddings(
    model_name=MODEL_EMBED,
    cache_folder=cache_dir,
    model_kwargs=model_kwargs,
    show_progress=True,
)



In [5]:
index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [6]:
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [7]:
dataset = load_dataset(dataset_name, "default", split="train")
dataset

Dataset({
    features: ['question', 'context', 'cid', 'qid'],
    num_rows: 79456
})

In [8]:
# def create_vector_store_from_dataset(dataset, collection_name, vector_store, chunk_size):
#     """
#     Create vectorstore from Huggingface Dataset

#     Args:
#         dataset_name: dataset from Hugging Face
#         collection_name: name of collection
#         vector_store: AstraDBVectorStore
#         chunk_size: size of chunk
#     """
    
#     for i in range(0, len(dataset), chunk_size):
#         # Lặp qua tất cả các split (train, test, validation)
#         documents = []
#         chunk = islice(dataset, i, i + chunk_size)
#         for row in tqdm(chunk, desc=f"Processing split {chunk}"):
#             text = row['context']
#             if isinstance(text, list):
#                  text = " ".join(text)  # Nối list thành string nếu cần
#             # Tiền xử lý text nếu cần
#             if '"' in text:
#                 text = text.replace('"', '')
#             if r'\n' in text:
#                 text = text.replace(r'\n', '')
#             if r'/' in text:
#                 text = text.replace(r'/', '')

#             doc = Document(
#                 page_content=text,
#                 metadata={"cid": row['cid']},
#             )
#             documents.append(doc)
#         uuids = [str(uuid4()) for _ in range(len(documents))]
#         print(f"Chunk: {i // chunk_size}/{len(dataset) // chunk_size + 1}")
#         vector_store.add_documents(documents=documents, ids=uuids)

#     print(f"Collection {collection_name} created successfully")

In [9]:
chunk_size = 1000
# create_vector_store_from_dataset(dataset, collection_name, vector_store, chunk_size)
for i in range(0, len(dataset), chunk_size):
    # Lặp qua tất cả các split (train, test, validation)
    documents = []
    chunk = islice(dataset, i, i + chunk_size)
    for row in tqdm(chunk, desc=f"Processing split {chunk}"):
        text = row['context']
        if isinstance(text, list):
                text = " ".join(text)  # Nối list thành string nếu cần
        # Tiền xử lý text nếu cần
        if '"' in text:
            text = text.replace('"', '')
        if r'\n' in text:
            text = text.replace(r'\n', '')
        if r'/' in text:
            text = text.replace(r'/', '')

        doc = Document(
            page_content=text,
            metadata={"cid": row['cid']},
        )
        documents.append(doc)
    uuids = [str(uuid4()) for _ in range(len(documents))]
    print(f"Chunk: {i // chunk_size}/{len(dataset) // chunk_size + 1}")
    vector_store.add_documents(documents=documents, ids=uuids)

print("Done")

Processing split <itertools.islice object at 0x7fab705131a0>: 1000it [00:00, 20991.04it/s]

Chunk: 0/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2e6cfa60>: 1000it [00:00, 12259.32it/s]

Chunk: 1/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab70450810>: 1000it [00:00, 8361.87it/s]

Chunk: 2/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2e7e7600>: 1000it [00:00, 6456.73it/s]


Chunk: 3/80


Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2cea7650>: 1000it [00:00, 5195.14it/s]


Chunk: 4/80


Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2e7e7600>: 1000it [00:00, 4339.38it/s]

Chunk: 5/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2ce84540>: 1000it [00:00, 3719.67it/s]

Chunk: 6/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2c917650>: 1000it [00:00, 3309.30it/s]

Chunk: 7/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2c6f8040>: 1000it [00:00, 2942.40it/s]

Chunk: 8/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2c6f2160>: 1000it [00:00, 2663.92it/s]

Chunk: 9/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2c31e700>: 1000it [00:00, 2386.61it/s]

Chunk: 10/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2c31cb80>: 1000it [00:00, 2191.63it/s]

Chunk: 11/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0beb4450>: 1000it [00:00, 2061.16it/s]

Chunk: 12/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0bebe430>: 1000it [00:00, 1935.16it/s]

Chunk: 13/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0bbd0b80>: 1000it [00:00, 1785.70it/s]

Chunk: 14/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2c750310>: 1000it [00:00, 1671.74it/s]

Chunk: 15/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0bd7f790>: 1000it [00:00, 1601.94it/s]

Chunk: 16/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0baa9d50>: 1000it [00:00, 1493.02it/s]

Chunk: 17/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2c212750>: 1000it [00:00, 1431.28it/s]

Chunk: 18/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0b670310>: 1000it [00:00, 1355.11it/s]

Chunk: 19/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0bd7f790>: 1000it [00:00, 1310.41it/s]

Chunk: 20/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2d5673d0>: 1000it [00:00, 1220.22it/s]

Chunk: 21/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2d36b1a0>: 1000it [00:00, 1157.55it/s]

Chunk: 22/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2d3d1030>: 1000it [00:00, 1132.78it/s]

Chunk: 23/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2d36fbf0>: 1000it [00:00, 1095.10it/s]

Chunk: 24/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab08330b80>: 1000it [00:00, 1052.78it/s]

Chunk: 25/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2cf8c6d0>: 1000it [00:01, 996.87it/s]


Chunk: 26/80


Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0b44a3e0>: 1000it [00:01, 985.85it/s]

Chunk: 27/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab706a65c0>: 1000it [00:01, 933.85it/s]

Chunk: 28/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab03f5fbf0>: 1000it [00:01, 907.52it/s]

Chunk: 29/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab706a65c0>: 1000it [00:01, 873.12it/s]

Chunk: 30/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0349a750>: 1000it [00:01, 844.37it/s]

Chunk: 31/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab706a65c0>: 1000it [00:01, 816.27it/s]

Chunk: 32/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab03f71c60>: 1000it [00:01, 789.15it/s]

Chunk: 33/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab03f70090>: 1000it [00:01, 785.76it/s]

Chunk: 34/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab02f69b70>: 1000it [00:01, 763.55it/s]

Chunk: 35/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab034e7920>: 1000it [00:01, 745.39it/s]

Chunk: 36/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab03337510>: 1000it [00:01, 724.84it/s]

Chunk: 37/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0b1bf3d0>: 1000it [00:01, 708.94it/s]

Chunk: 38/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2c1b2160>: 1000it [00:01, 674.60it/s]

Chunk: 39/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0ae6fbf0>: 1000it [00:01, 668.10it/s]

Chunk: 40/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2ccdeed0>: 1000it [00:01, 649.68it/s]

Chunk: 41/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab03f70090>: 1000it [00:01, 624.01it/s]

Chunk: 42/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0aaa89a0>: 1000it [00:01, 623.32it/s]

Chunk: 43/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab03f70090>: 1000it [00:01, 594.76it/s]

Chunk: 44/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0a73d8a0>: 1000it [00:01, 584.39it/s]

Chunk: 45/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0a9965c0>: 1000it [00:01, 578.63it/s]

Chunk: 46/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0a8910d0>: 1000it [00:01, 563.47it/s]

Chunk: 47/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0b433ba0>: 1000it [00:01, 557.01it/s]

Chunk: 48/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0a2da930>: 1000it [00:01, 548.16it/s]

Chunk: 49/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0a1fff10>: 1000it [00:01, 531.75it/s]

Chunk: 50/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2c90e5c0>: 1000it [00:01, 530.81it/s]

Chunk: 51/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab2c7a2ca0>: 1000it [00:01, 507.07it/s]

Chunk: 52/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0a890810>: 1000it [00:01, 509.76it/s]

Chunk: 53/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab09b17790>: 1000it [00:02, 494.68it/s]

Chunk: 54/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab09b75d50>: 1000it [00:02, 483.48it/s]

Chunk: 55/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab09a64f90>: 1000it [00:02, 473.28it/s]

Chunk: 56/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab09a67a10>: 1000it [00:02, 471.29it/s]

Chunk: 57/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab09a22930>: 1000it [00:02, 461.77it/s]

Chunk: 58/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab704e9a30>: 1000it [00:02, 448.38it/s]

Chunk: 59/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0992e430>: 1000it [00:02, 448.81it/s]

Chunk: 60/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab098e9c60>: 1000it [00:02, 438.40it/s]

Chunk: 61/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0992bc40>: 1000it [00:02, 424.04it/s]

Chunk: 62/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab09a209a0>: 1000it [00:02, 425.36it/s]

Chunk: 63/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab08fd73d0>: 1000it [00:02, 418.82it/s]

Chunk: 64/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab09a209a0>: 1000it [00:02, 410.31it/s]

Chunk: 65/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab08de8f40>: 1000it [00:02, 401.30it/s]

Chunk: 66/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab08de7a10>: 1000it [00:02, 400.49it/s]

Chunk: 67/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab09e4b1a0>: 1000it [00:02, 396.77it/s]

Chunk: 68/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab08ce9b70>: 1000it [00:02, 392.26it/s]

Chunk: 69/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab0a4b8f40>: 1000it [00:02, 389.89it/s]

Chunk: 70/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab088d0f40>: 1000it [00:02, 373.88it/s]

Chunk: 71/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab088c6430>: 1000it [00:02, 372.02it/s]

Chunk: 72/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab086a2930>: 1000it [00:03, 332.52it/s]

Chunk: 73/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab08de7a10>: 1000it [00:02, 362.55it/s]

Chunk: 74/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab085aff10>: 1000it [00:02, 356.70it/s]

Chunk: 75/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab03f631a0>: 1000it [00:02, 351.49it/s]

Chunk: 76/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab085af510>: 1000it [00:02, 347.95it/s]

Chunk: 77/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab02b3e430>: 1000it [00:02, 344.71it/s]

Chunk: 78/80





Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Processing split <itertools.islice object at 0x7fab029ef510>: 456it [00:02, 157.85it/s]

Chunk: 79/80





Batches:   0%|          | 0/15 [00:00<?, ?it/s]

Done


In [11]:
faiss_path = "VectorStore/Train_Legal_v1.faiss"

In [None]:

vector_store.save_local(faiss_path)
print(f"Collection {collection_name} created successfully")

Collection AstraDB_Train_Legal_v2 created successfully


In [None]:
import ray, time
ray.init()

2024-11-25 07:04:24,035	INFO util.py:154 -- Outdated packages:
  ipywidgets==7.8.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-11-25 07:04:24,059	INFO worker.py:1634 -- Connecting to existing Ray cluster at address: 10.0.0.4:6379...
2024-11-25 07:04:24,068	INFO worker.py:1810 -- Connected to Ray cluster. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Python version:,3.10.11
Ray version:,2.39.0
Dashboard:,http://127.0.0.1:8265


In [14]:
dataset = load_dataset(dataset_name, "default", split="train")
dataset

Dataset({
    features: ['question', 'context', 'cid', 'qid'],
    num_rows: 79456
})

In [None]:
small_dataset = dataset.select(range(79456))
small_dataset

Dataset({
    features: ['question', 'context', 'cid', 'qid'],
    num_rows: 800
})

In [None]:
@ray.remote(num_cpus=1, num_gpus=0)
def predict_by_chunk(i, chunk, vtstore_ref):
    start_time = time.time()
    predict_path = f"Predicts/predict_{i}.txt"
    with open(predict_path, 'w') as p:
        for row in tqdm(chunk, desc=f"Processing split {chunk} {i}"):
            content = row['question']
            qid = row['qid']

            results = vtstore_ref.similarity_search(
                content,
                k = 10,
            )
            cids = [doc.metadata['cid'] for doc in results]  # Assuming 'qid' is what you meant by cid
            more_cid = ""
            for i in cids:
                more_cid += f" {i[1:-1]}"
            response = f"{qid} {more_cid}"

            # response = similarity_search(content, faiss_path, qid)
            p.write(response + '\n')
    return f"processed {i} chunk: " + "{:.4f}".format(time.time() - start_time)    

In [18]:
vtstore = FAISS.load_local(faiss_path, embeddings, allow_dangerous_deserialization=True)
vtstore_ref = ray.put(vtstore)

In [None]:
chunk_size = 1000
cnt = 0
chunk_results = []
max_task = 8
total_chunk = int(len(small_dataset) // chunk_size)
for i in range(0, total_chunk):
    cnt += 1
    chunk = small_dataset.shard(num_shards=total_chunk, index=i)
    while cnt - 1 >= max_task:
        ready_ids, _ = ray.wait(chunk_results, num_returns=1)
        ready_id = ready_ids[0]
        chunk_results.remove(ready_id)
        cnt -= 1
    chunk_results.append(predict_by_chunk.remote(i, chunk, vtstore_ref))
processed_chunks = ray.get(chunk_results)

Processing split Dataset({=35384)[0m 
[36m(predict_by_chunk pid=35384)[0m     features: ['question', 'context', 'cid', 'qid'],
[36m(predict_by_chunk pid=35384)[0m     num_rows: 100
[36m(predict_by_chunk pid=35384)[0m }) 2:   0%|          | 0/100 [00:00<?, ?it/s]
Batches:   0%|          | 0/1 [00:00<?, ?it/s][A
[36m(predict_by_chunk pid=35386)[0m 
[36m(predict_by_chunk pid=35382)[0m 
Batches: 100%|██████████| 1/1 [00:01<00:00,  1.94s/it][A
[36m(predict_by_chunk pid=35382)[0m }) 0:   1%|          | 1/100 [00:01<03:15,  1.97s/it]
[36m(predict_by_chunk pid=35384)[0m 
[36m(predict_by_chunk pid=35381)[0m 
[36m(predict_by_chunk pid=35386)[0m 
Batches: 100%|██████████| 1/1 [00:00<00:00, 20.20it/s]
[36m(predict_by_chunk pid=35382)[0m 
Batches: 100%|██████████| 1/1 [00:00<00:00, 22.06it/s]
[36m(predict_by_chunk pid=35384)[0m 
[36m(predict_by_chunk pid=35381)[0m 
[36m(predict_by_chunk pid=35386)[0m 
[36m(predict_by_chunk pid=35382)[0m 
[36m(predict_by_chunk pid=353

In [22]:
print(processed_chunks)

['processed [73066] chunk: 8.3650', 'processed [63558 63559 63560] chunk: 8.6792', 'processed [167983] chunk: 8.4100', 'processed [63777] chunk: 8.5137', 'processed [70726] chunk: 4.5242', 'processed [81730] chunk: 4.4061', 'processed [192983] chunk: 5.1766', 'processed [206691] chunk: 5.3145']


In [20]:
ray.shutdown()