In [None]:
%%capture
# Install the latest master of Haystack
!pip install git+https://github.com/deepset-ai/haystack.git
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1kKZSBpgDwRCvaMR9y9caEs1wSdbFKRzs' -O QA.csv
# !pip install transformers
# !pip install sentence-transformers

In [None]:
import pandas as pd
from haystack.retriever.dense import DensePassageRetriever
from haystack.preprocessor.utils import fetch_archive_from_http
from haystack.document_store.memory import InMemoryDocumentStore
from haystack.generator.transformers import RAGenerator
from haystack.document_store.faiss import FAISSDocumentStore

In [None]:
qa = pd.read_csv('QA.csv')

In [None]:
# Initialize FAISS document store.
# Set `return_embedding` to `True`, so generator doesn't have to perform re-embedding
document_store = FAISSDocumentStore(
    faiss_index_factory_str="Flat",
    return_embedding=True
)

documents = []
for i, e in qa.iterrows():
    documents.append({"text": e['context'], "meta": {"title": e['title']}})

In [None]:
%%capture
# Initialize DPR Retriever to encode documents, encode question and query documents
retriever = DensePassageRetriever(document_store=document_store,
                                  query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
                                  passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
                                  max_seq_len_query=64,
                                  max_seq_len_passage=256,
                                  batch_size=16,
                                  use_gpu=True,
                                  embed_title=True,
                                  use_fast_tokenizers=True)

09/17/2021 00:42:06 - INFO - filelock -   Lock 140014167241744 acquired on /root/.cache/huggingface/transformers/4ad08b5f983c1384baaf257d8edf51a7a3961fd8c75a1778ac604e3c0b564dd9.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99.lock
09/17/2021 00:42:08 - INFO - filelock -   Lock 140014167241744 released on /root/.cache/huggingface/transformers/4ad08b5f983c1384baaf257d8edf51a7a3961fd8c75a1778ac604e3c0b564dd9.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99.lock
09/17/2021 00:42:09 - INFO - filelock -   Lock 140014167241744 acquired on /root/.cache/huggingface/transformers/b305bc9085b3d0ce33551c251b75c11b6c6df1d4d51e5d3439d01cf4bb1abc9d.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4.lock
09/17/2021 00:42:11 - INFO - filelock -   Lock 140014167241744 released on /root/.cache/huggingface/transformers/b305bc9085b3d0ce33551c251b75c11b6c6df1d4d51e5d3439d01cf4bb1abc9d.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4.lock
09/17/20

In [None]:
%%capture
generator = RAGenerator(
    model_name_or_path="facebook/rag-token-nq",
    use_gpu=True,
    top_k=1,
    max_length=200,
    min_length=2,
    embed_title=True,
    num_beams=2,
)

09/17/2021 00:43:01 - INFO - farm.utils -   Using device: CUDA 
09/17/2021 00:43:01 - INFO - farm.utils -   Number of GPUs: 1
09/17/2021 00:43:01 - INFO - farm.utils -   Distributed Training: False
09/17/2021 00:43:01 - INFO - farm.utils -   Automatic Mixed Precision: None
09/17/2021 00:43:02 - INFO - filelock -   Lock 140013784488400 acquired on /root/.cache/huggingface/transformers/6337b0203e20d15c98f5e500e1e673c74e71bb8617b2753a53663b9b8e6dfc1a.59948e1fef260da10a0cecb8b6862373c32f40001848a63f985ab4f9d787f3f1.lock
09/17/2021 00:43:03 - INFO - filelock -   Lock 140013784488400 released on /root/.cache/huggingface/transformers/6337b0203e20d15c98f5e500e1e673c74e71bb8617b2753a53663b9b8e6dfc1a.59948e1fef260da10a0cecb8b6862373c32f40001848a63f985ab4f9d787f3f1.lock
09/17/2021 00:43:04 - INFO - filelock -   Lock 140013784089360 acquired on /root/.cache/huggingface/transformers/26cf899a0974235af1f84469ddd94d2ee83c803c23ecead93b511ce8a0744f5c.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea2

In [None]:
# Delete existing documents in documents store
document_store.delete_documents()

# Write documents to document store
document_store.write_documents(documents)

# Add documents embeddings to index
document_store.update_embeddings(
    retriever=retriever
)

09/17/2021 00:48:33 - INFO - haystack.document_store.faiss -   Updating embeddings for 85320 docs...
Updating Embedding:   0%|          | 0/85320 [00:00<?, ? docs/s]

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Documents Processed:  12%|█▏        | 10000/85320 [13:24<1:39:17, 12.64 docs/s]

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Documents Processed:  23%|██▎       | 20000/85320 [19:50<1:00:05, 18.12 docs/s]

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Documents Processed:  35%|███▌      | 30000/85320 [25:50<42:48, 21.54 docs/s]

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Documents Processed:  47%|████▋     | 40000/85320 [31:50<31:58, 23.62 docs/s]

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Documents Processed:  59%|█████▊    | 50000/85320 [37:50<23:32, 25.00 docs/s]

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Documents Processed:  70%|███████   | 60000/85320 [43:44<16:17, 25.90 docs/s]

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Documents Processed:  82%|████████▏ | 70000/85320 [49:44<09:38, 26.49 docs/s]

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Documents Processed:  94%|█████████▍| 80000/85320 [55:44<03:17, 26.87 docs/s]

Create embeddings:   0%|          | 0/5328 [00:00<?, ? Docs/s]

Documents Processed: 90000 docs [58:47, 25.52 docs/s]


In [None]:
import pickle
with open('document_store.pkl', 'wb') as outp:
    documents_out = document_store.get_all_documents()
    pickle.dump(documents_out, outp, pickle.HIGHEST_PROTOCOL)

In [None]:
!pip install --upgrade gupload

from google.colab import auth

# Authenticate and create the PyDrive client.
auth.authenticate_user()

!gupload --to '1W9R77oTk_DAMxfSjKGOLNj5xFtoC7gLF' document_store.pkl

In [None]:
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1pOgs7nVlJGE60Cx77qrQfwUsYEKx6Ue0' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1pOgs7nVlJGE60Cx77qrQfwUsYEKx6Ue0" -O document_store.pkl && rm -rf /tmp/cookies.txt


--2021-09-16 01:13:44--  https://docs.google.com/uc?export=download&confirm=zHxM&id=1pOgs7nVlJGE60Cx77qrQfwUsYEKx6Ue0
Resolving docs.google.com (docs.google.com)... 74.125.206.100, 74.125.206.101, 74.125.206.102, ...
Connecting to docs.google.com (docs.google.com)|74.125.206.100|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-08-4s-docs.googleusercontent.com/docs/securesc/6j2e4u9n5vnpkd5k5c1a82lh5dqqroc8/acfds895j3ro00gk0rhuad8h26dbl5iq/1631754750000/03976892977300334194/00431816533186696515Z/1pOgs7nVlJGE60Cx77qrQfwUsYEKx6Ue0?e=download [following]
--2021-09-16 01:13:44--  https://doc-08-4s-docs.googleusercontent.com/docs/securesc/6j2e4u9n5vnpkd5k5c1a82lh5dqqroc8/acfds895j3ro00gk0rhuad8h26dbl5iq/1631754750000/03976892977300334194/00431816533186696515Z/1pOgs7nVlJGE60Cx77qrQfwUsYEKx6Ue0?e=download
Resolving doc-08-4s-docs.googleusercontent.com (doc-08-4s-docs.googleusercontent.com)... 64.233.166.132, 2a00:1450:400c:c09::84
Connecting

In [None]:
import pickle
with open('document_store.pkl', 'rb') as inp:
    docs = pickle.load(inp)

document_store = FAISSDocumentStore(
    faiss_index_factory_str="Flat",
    return_embedding=True
)

# Delete existing documents in documents store
document_store.delete_documents()

# Write documents to document store
document_store.write_documents(docs)

Please call `update_embeddings` method to repopulate `faiss_index`


KeyboardInterrupt: ignored

In [None]:
retriever_results = retriever.retrieve(
    query = qa['question'][0]
)

predicted_result = generator.predict(
    query = qa['question'][0],
    documents = retriever_results,
    top_k = 1
)

predicted_result["answers"]



[{'answer': ' brome mosaic virus',
  'meta': {'doc_ids': ['e6ee1cf01fda6bde1b674343eb8a5abb',
    '1aa06b86ddae41cbb3485c2374cd7a39',
    'fc78a0cd7f46d57aea36150367e10ba5',
    'd39076f4b4141c5d6ee08fa2ba15383a',
    'a02688ba5bbe40ef8d970b281f6fc84',
    '96cb202bd7757d40cc5c2d411dcf57a5',
    '30a12f49b13d58b97979a4b6e57b9c33',
    '1744d956141581e22c49679e38de2e21',
    '103c3b0affe3dd86389d98eaf024b55a',
    'c8b3ddfeeee5c80a6db4a91aa0b60b0e'],
   'doc_scores': [0.6830282113553966,
    0.6814360358445017,
    0.6809773754267667,
    0.6800101307179998,
    0.6783921557878394,
    0.678152596423112,
    0.6766527005720612,
    0.6765705172337811,
    0.6761687088118209,
    0.6756279619088759],
   'texts': ['SGs are reversible dynamic structures that rapidly form when cells encounter environmental stress that reduces global translation rates. SG form from concentration of stalled assembled 43S and 48S ribosomal preinitiation complexes and serve as temporary repositories for these c

TypeError: ignored

In [None]:
from tqdm import tqdm
import csv
from scipy import spatial
from sentence_transformers import SentenceTransformer, util
modelSentence = SentenceTransformer('bert-base-nli-mean-tokens')

synthetic_QA_round_trip = open('synthetic_QA_round_trip_RAG.csv', 'wt', newline ='')
synthetic_QA_round_trip_writer = csv.writer(synthetic_QA_round_trip, delimiter = ',')
synthetic_QA_round_trip_writer.writerow(['passage', 'question', 'answer'])
 
for i, q in tqdm(qa.iterrows(), total = qa.shape[0], position = 0, leave = True):
    retriever_results = retriever.retrieve(
        query = q['question']
    )

    predicted_result = generator.predict(
        query = q['question'],
        documents = retriever_results,
        top_k = 1
    )
    answers = predicted_result["answers"]

    sentence_embeddings = modelSentence.encode([q['answer'], answers[0]['answer']], device = 'cuda', show_progress_bar = False)

    if util.cos_sim(sentence_embeddings[0], sentence_embeddings[1]).numpy()[0][0] > 0.8:
        synthetic_QA_round_trip_writer.writerow([q['context'], q['question'], q['answer']])


09/17/2021 01:47:22 - INFO - sentence_transformers.SentenceTransformer -   Load pretrained SentenceTransformer: bert-base-nli-mean-tokens
09/17/2021 01:47:23 - INFO - filelock -   Lock 140013774971408 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/.gitattributes.lock
09/17/2021 01:47:23 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/.gitattributes to /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/tmprav3jd5o


Downloading:   0%|          | 0.00/391 [00:00<?, ?B/s]

09/17/2021 01:47:24 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/.gitattributes in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/.gitattributes
09/17/2021 01:47:24 - INFO - filelock -   Lock 140013774971408 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/.gitattributes.lock
09/17/2021 01:47:25 - INFO - filelock -   Lock 140013453574992 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/README.md.lock
09/17/2021 01:47:25 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37

Downloading:   0%|          | 0.00/3.95k [00:00<?, ?B/s]

09/17/2021 01:47:26 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/README.md in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/README.md
09/17/2021 01:47:26 - INFO - filelock -   Lock 140013453574992 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/README.md.lock
09/17/2021 01:47:26 - INFO - filelock -   Lock 140013488556752 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/added_tokens.json.lock
09/17/2021 01:47:26 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/a

Downloading:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

09/17/2021 01:47:27 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/added_tokens.json in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/added_tokens.json
09/17/2021 01:47:27 - INFO - filelock -   Lock 140013488556752 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/added_tokens.json.lock
09/17/2021 01:47:28 - INFO - filelock -   Lock 140013488556752 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/config.json.lock
09/17/2021 01:47:28 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b

Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

09/17/2021 01:47:29 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/config.json in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/config.json
09/17/2021 01:47:29 - INFO - filelock -   Lock 140013488556752 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/config.json.lock
09/17/2021 01:47:30 - INFO - filelock -   Lock 140013598985040 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/config_sentence_transformers.json.lock
09/17/2021 01:47:30 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361

Downloading:   0%|          | 0.00/122 [00:00<?, ?B/s]

09/17/2021 01:47:30 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/config_sentence_transformers.json in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/config_sentence_transformers.json
09/17/2021 01:47:30 - INFO - filelock -   Lock 140013598985040 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/config_sentence_transformers.json.lock
09/17/2021 01:47:31 - INFO - filelock -   Lock 140013488556752 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/modules.json.lock
09/17/2021 01:47:31 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base

Downloading:   0%|          | 0.00/229 [00:00<?, ?B/s]

09/17/2021 01:47:32 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/modules.json in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/modules.json
09/17/2021 01:47:32 - INFO - filelock -   Lock 140013488556752 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/modules.json.lock
09/17/2021 01:47:33 - INFO - filelock -   Lock 140013598985040 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/pytorch_model.bin.lock
09/17/2021 01:47:33 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

09/17/2021 01:47:47 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/pytorch_model.bin in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/pytorch_model.bin
09/17/2021 01:47:47 - INFO - filelock -   Lock 140013598985040 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/pytorch_model.bin.lock
09/17/2021 01:47:48 - INFO - filelock -   Lock 140013488556752 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/sentence_bert_config.json.lock
09/17/2021 01:47:48 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

09/17/2021 01:47:49 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/sentence_bert_config.json in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/sentence_bert_config.json
09/17/2021 01:47:49 - INFO - filelock -   Lock 140013488556752 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/sentence_bert_config.json.lock
09/17/2021 01:47:50 - INFO - filelock -   Lock 140013542559568 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/special_tokens_map.json.lock
09/17/2021 01:47:50 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base-nli-mean-tok

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

09/17/2021 01:47:51 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/special_tokens_map.json in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/special_tokens_map.json
09/17/2021 01:47:51 - INFO - filelock -   Lock 140013542559568 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/special_tokens_map.json.lock
09/17/2021 01:47:51 - INFO - filelock -   Lock 140013598985040 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/tokenizer.json.lock
09/17/2021 01:47:51 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

09/17/2021 01:47:53 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/tokenizer.json in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/tokenizer.json
09/17/2021 01:47:53 - INFO - filelock -   Lock 140013598985040 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/tokenizer.json.lock
09/17/2021 01:47:54 - INFO - filelock -   Lock 140013488556752 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/tokenizer_config.json.lock
09/17/2021 01:47:54 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664

Downloading:   0%|          | 0.00/399 [00:00<?, ?B/s]

09/17/2021 01:47:55 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/tokenizer_config.json in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/tokenizer_config.json
09/17/2021 01:47:55 - INFO - filelock -   Lock 140013488556752 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/tokenizer_config.json.lock
09/17/2021 01:47:55 - INFO - filelock -   Lock 140013535979344 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/vocab.txt.lock
09/17/2021 01:47:55 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedf

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

09/17/2021 01:47:57 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/vocab.txt in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/vocab.txt
09/17/2021 01:47:57 - INFO - filelock -   Lock 140013535979344 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/vocab.txt.lock
09/17/2021 01:47:58 - INFO - filelock -   Lock 140013774971408 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/1_Pooling/config.json.lock
09/17/2021 01:47:58 - INFO - huggingface_hub.file_download -   downloading https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

09/17/2021 01:47:58 - INFO - huggingface_hub.file_download -   storing https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens/resolve/940b797366fedff0e361664b655910c1d37edf76/1_Pooling/config.json in cache at /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/1_Pooling/config.json
09/17/2021 01:47:58 - INFO - filelock -   Lock 140013774971408 released on /root/.cache/torch/sentence_transformers/sentence-transformers__bert-base-nli-mean-tokens.940b797366fedff0e361664b655910c1d37edf76/1_Pooling/config.json.lock
09/17/2021 01:48:00 - INFO - sentence_transformers.SentenceTransformer -   Use pytorch device: cuda
 18%|█▊        | 15329/87331 [5:15:31<24:15:05,  1.21s/it]

In [None]:
# %%capture
# !wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1kKZSBpgDwRCvaMR9y9caEs1wSdbFKRzs' -O QA.csv
# !wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1Evu3wda_3XJozb-U8amJAmlCCrnAD5-k' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1Evu3wda_3XJozb-U8amJAmlCCrnAD5-k" -O my_knowledge_dataset.zip && rm -rf /tmp/cookies.txt
# !wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/research_projects/rag/requirements.txt requirements.txt
# !wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/research_projects/rag/use_own_knowledge_dataset.py use_own_knowledge_dataset.py
# !pip install -r requirements.txt
# !pip install sentence-transformers