# Cloning the data from a weaviate instance to another

In [None]:
import os
import sys
import weaviate
from dotenv import load_dotenv
from askem.retriever.migrate import convert_data, MigrationManager

load_dotenv()
sys.path.append("./askem/retriever")

Note that the convert data function will rename `type` into `doc_type`.

Create all clients

In [None]:
test_client = weaviate.Client(
    url="http://weaviate:8080",
    auth_client_secret=weaviate.AuthApiKey(api_key=os.getenv("WEAVIATE_APIKEY")),
)

askem_client = weaviate.Client(
    url="http://cosmos0001.chtc.wisc.edu:8080",
    auth_client_secret=weaviate.AuthApiKey(api_key=os.getenv("WEAVIATE_APIKEY")),
)

DOLOMITE_KEY = input("Enter the Dolomite weaviate API key: ")

dolomite_client = weaviate.Client(
    url="http://cosmos0004.chtc.wisc.edu:8080",
    auth_client_secret=weaviate.AuthApiKey(api_key=DOLOMITE_KEY),
)

Use a new testing weaviate instance for safety

In [None]:
from askem.retriever.base import init_retriever

test_client.schema.delete_all()
init_retriever(test_client, version=1)
test_client.query.aggregate("Passage").with_meta_count().do()

Clone from ASKEM to TEST

In [None]:
askem_migration = MigrationManager(
    source_client=askem_client,
    destination_client=test_client,
    class_name="Passage",
)

In [None]:
askem_migration.clone(
    batch_size=1000,
    source_properties=["paper_id", "topic", "preprocessor_id", "type", "text_content"],
    parsing_function=convert_data,
    debug=True,  # Remove this to actually run the entire migration
)

In [None]:
askem_migration.destination_client.query.aggregate("Passage").with_meta_count().do()

Clone Dolomite to TEST

In [None]:
dolomite_migration = MigrationManager(
    source_client=dolomite_client,
    destination_client=test_client,
    class_name="Passage",
)

In [None]:
dolomite_migration.clone(
    batch_size=1000,
    source_properties=["paper_id", "topic", "preprocessor_id", "type", "text_content"],
    parsing_function=convert_data,
    debug=True,  # Remove this to actually run the entire migration
)

In [None]:
dolomite_migration.destination_client.query.aggregate("Passage").with_meta_count().do()