In [1]:
!pip install -q "dlt[weaviate]"

In [3]:
!pip install sqlalchemy pymysql

Looking in indexes: https://pypi.org/simple, https://packagecloud.io/github/git-lfs/pypi/simple
Collecting pymysql
  Obtaining dependency information for pymysql from https://files.pythonhosted.org/packages/e5/30/20467e39523d0cfc2b6227902d3687a16364307260c75e6a1cb4422b0c62/PyMySQL-1.1.0-py3-none-any.whl.metadata
  Downloading PyMySQL-1.1.0-py3-none-any.whl.metadata (4.4 kB)
Downloading PyMySQL-1.1.0-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymysql
Successfully installed pymysql-1.1.0


In [2]:
!dlt --non-interactive init sql_database weaviate 

Looking up the init scripts in [1mhttps://github.com/dlt-hub/verified-sources.git[0m...
Cloning and configuring a verified source [1msql_database[0m (Source that loads tables form any SQLAlchemy supported database, supports batching requests and incremental loads.)

Verified source [1msql_database[0m was added to your project!
* See the usage examples and code snippets to copy from [1msql_database_pipeline.py[0m
* Add credentials for [1mweaviate[0m and other secrets in [1m./.dlt/secrets.toml[0m
* Add the required dependencies to [1mpyproject.toml[0m:
  [1msqlalchemy>=1.4[0m
  [1mdlt[weaviate]<0.4,>=0.3.5[0m
  If the dlt dependency is already added, make sure you install the extra for [1mweaviate[0m to it
  If you are using poetry you may issue the following command:
[1m  poetry add dlt -E weaviate[0m

* Read [1mhttps://dlthub.com/docs/walkthroughs/create-a-pipeline[0m for more information


In [12]:
import os
import weaviate


def show_data(class_name, properties):
    client = weaviate.Client(
        url=os.getenv("WEAVIATE_URL"),
        auth_client_secret=weaviate.AuthApiKey(
            api_key=os.getenv("WEAVIATE_API_KEY")
        ),
        additional_headers={
            "X-OpenAI-Api-Key": os.getenv("WEAVIATE_OPENAI_KEY")
        }
    )

    response = (
        client.query
        .get(class_name, properties)
        .do()
    )
    return response

In [4]:
import dlt
from dlt.destinations.weaviate import weaviate_adapter

from sql_database import sql_database, sql_table

pipeline = dlt.pipeline(
     pipeline_name="rfam", destination='weaviate', dataset_name="rfam"
)

load_source = sql_table(table="family",)
load_info = pipeline.run(weaviate_adapter(load_source, vectorize="description", tokenization={"description": "word"}))
# load_info = pipeline.run(load_source, write_disposition="replace")
# pretty print the information on data that was loaded
row_counts = pipeline.last_trace.last_normalize_info
print(row_counts)
print("------")
print(load_info)

            Please instead use the `client.batch.configure()` method to configure your batch and `client.batch` to enter the context manager.
            See https://weaviate.io/developers/weaviate/client-libraries/python for details.


Normalized data for the following tables:
- DltPipelineState: 1 row(s)
- Family: 4108 row(s)

------
Pipeline rfam completed in 1 minute and 37.41 seconds
1 load package(s) were loaded to destination weaviate and into dataset Rfam
The weaviate destination used https://demo-1-wvxjul5s.weaviate.network location to store data
Load package 1694104408.537511 is LOADED and contains no failed jobs


In [14]:
show_data("Rfam_Family", ["description"])

{'data': {'Get': {'Rfam_Family': [{'description': 'CDKN2B antisense RNA 1 intronic convserved region'},
    {'description': 'microRNA mir-605'},
    {'description': 'mir-974 microRNA precursor family'},
    {'description': 'microRNA mir-633'},
    {'description': 'microRNA mir-569'},
    {'description': 'mir-6715 microRNA precursor family'},
    {'description': 'Small nucleolar RNA Z103'},
    {'description': 'Small nucleolar RNA SNORD70'},
    {'description': 'mir-5856 microRNA precursor family'},
    {'description': 'ctRNA'},
    {'description': 'MIR4245 microRNA precursor family'},
    {'description': 'mir-2068 microRNA precursor family'},
    {'description': 'mir-5890 microRNA precursor family'},
    {'description': 'Leptospira sRNA 30_255'},
    {'description': 'Pospiviroid RY motif stem loop'},
    {'description': 'TeloSII non coding RNA 45'},
    {'description': 'MIR2871 microRNA precursor family'},
    {'description': 'mir-1017 microRNA precursor family'},
    {'description': '