In [1]:
# ! pip install -U weaviate-client

In [2]:
! pip show weaviate-client

Name: weaviate-client
Version: 4.7.1
Summary: A python native Weaviate client
Home-page: https://github.com/weaviate/weaviate-python-client
Author: Weaviate
Author-email: hello@weaviate.io,
License: BSD 3-clause
Location: /Users/avinash/anaconda3/lib/python3.11/site-packages
Requires: authlib, grpcio, grpcio-health-checking, grpcio-tools, httpx, pydantic, requests, validators
Required-by: 


In [3]:
import weaviate
import os

In [4]:
import weaviate

# For Connecting to weaviate to localhost:8080 (mentioned in docker-confif.yaml)
client = weaviate.connect_to_local()

# # For Closing the connection
# client.close() 

In [5]:
client

<weaviate.client.WeaviateClient at 0x1089a53d0>

In [6]:
# ! pip install --upgrade weaviate-client

In [7]:
import weaviate

client = weaviate.Client(url="http://localhost:8080")

# Example: Fetch metadata
meta = client.get_meta()
print(meta)


{'hostname': 'http://[::]:8080', 'modules': {'text2vec-contextionary': {'version': 'en0.16.0-v1.2.1', 'wordCount': 818072}}, 'version': '1.26.3'}


            your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.

            For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
            For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration
            


In [8]:
assert client.is_live()  # This will raise an exception if the client is not live

        client.get_meta() :
                This method fetches metadata from the Weaviate instance. 
                Metadata typically includes information about the Weaviate instance, 
                such as its version, available modules, and other configuration details.

        json.dumps() => This function converts a Python object into a JSON string.
        
        json.dumps(metainfo indent) => converts the metainfo dictionary into a JSON-formatted string
        
        indent=2: 
            This argument specifies that the JSON output should be formatted with an indentation of 2 spaces. 
            This makes the JSON string more readable by adding line breaks and indentation.

In [9]:
import json

metainfo = client.get_meta()
print("Metainfo as Python object : ")
print(metainfo)
print("Dtype - ", type(metainfo))
print("--------------------------------------------------------------------")
print("Metainfo as JSON-formatted String : ")
print(json.dumps(metainfo, indent=2))  # Print the meta information in a readable format
print("Dtype - ", type(json.dumps(metainfo, indent=2)))

Metainfo as Python object : 
{'hostname': 'http://[::]:8080', 'modules': {'text2vec-contextionary': {'version': 'en0.16.0-v1.2.1', 'wordCount': 818072}}, 'version': '1.26.3'}
Dtype -  <class 'dict'>
--------------------------------------------------------------------
Metainfo as JSON-formatted String : 
{
  "hostname": "http://[::]:8080",
  "modules": {
    "text2vec-contextionary": {
      "version": "en0.16.0-v1.2.1",
      "wordCount": 818072
    }
  },
  "version": "1.26.3"
}
Dtype -  <class 'str'>


### To know the schemas present in the current instance

In [10]:
# Retrieve the schema
schema = client.schema.get()

# Print the schema in a readable format
import json
print(json.dumps(schema, indent=2))

{
  "classes": [
    {
      "class": "Movie",
      "invertedIndexConfig": {
        "bm25": {
          "b": 0.75,
          "k1": 1.2
        },
        "cleanupIntervalSeconds": 60,
        "stopwords": {
          "additions": null,
          "preset": "en",
          "removals": null
        }
      },
      "moduleConfig": {
        "text2vec-contextionary": {
          "vectorizeClassName": true
        }
      },
      "multiTenancyConfig": {
        "autoTenantActivation": false,
        "autoTenantCreation": false,
        "enabled": false
      },
      "properties": [
        {
          "dataType": [
            "text"
          ],
          "indexFilterable": true,
          "indexRangeFilters": false,
          "indexSearchable": true,
          "moduleConfig": {
            "text2vec-contextionary": {
              "skip": false,
              "vectorizePropertyName": false
            }
          },
          "name": "title",
          "tokenization": "word"
        }

### Deleting a schema present

In [11]:
# Name of the class to delete
class_name = "Movie"  # Replace with the class you want to delete

# Delete the class from the schema
client.schema.delete_class(class_name)

print(f"Class '{class_name}' has been deleted.")

Class 'Movie' has been deleted.


### Creating a new class "Movie" and defining the schema, embedding model

In [12]:
# Defining the Schema

schema = {
    'class':'Movie',
    'properties':[
        {'name':'title', "dataType":['text']},
        {'name':'overview', "dataType":['text']},
        {'name':'vote_average', "dataType":['text']},
        {'name':'genre_ids', "dataType":['int[]']},
        {'name':'release_date', "dataType":['date']},
        {'name':'tmdb_id', "dataType":['int']}
    ],
    
    'vectorizer': 'text2vec-contextionary'
}

In [13]:
# Create the class in the Weaviate schema
client.schema.create_class(schema)

### Retreiving the data stored 

In [14]:
# Define the class name you want to query
class_name = "Movie"

# Retrieve all objects from the class
results = client.query.get(class_name, ["title", "overview", "vote_average", "genre_ids", "release_date", "tmdb_id"]).do()

# Print the results in a readable format
print(json.dumps(results, indent=2))

{
  "data": {
    "Get": {
      "Movie": []
    }
  }
}


### Loading the Data (Batch_Process)

In [15]:
import requests
import pandas as pd

In [16]:
data_url = "https://raw.githubusercontent.com/weaviate-tutorials/edu-datasets/main/movies_data_1990_2024.json"
resp = requests.get(data_url)
df = pd.DataFrame(resp.json())


In [17]:
df.columns

Index(['backdrop_path', 'genre_ids', 'id', 'original_language',
       'original_title', 'overview', 'popularity', 'poster_path',
       'release_date', 'title', 'video', 'vote_average', 'vote_count'],
      dtype='object')

In [18]:
df.shape

(680, 13)

In [19]:
df.head()

Unnamed: 0,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,/3Nn5BOM1EVw1IYrv6MsbOS6N1Ol.jpg,"[14, 18, 10749]",162,en,Edward Scissorhands,A small suburban town receives a visit from a ...,45.694,/1RFIbuW9Z3eN9Oxw2KaQG5DfLmD.jpg,1990-12-07,Edward Scissorhands,False,7.7,12305
1,/sw7mordbZxgITU877yTpZCud90M.jpg,"[18, 80]",769,en,GoodFellas,"The true story of Henry Hill, a half-Irish, ha...",57.228,/aKuFiU82s5ISJpGZp7YkIr3kCUd.jpg,1990-09-12,GoodFellas,False,8.5,12106
2,/6uLhSLXzB1ooJ3522ydrBZ2Hh0W.jpg,"[35, 10751]",771,en,Home Alone,Eight-year-old Kevin McCallister makes the mos...,3.538,/onTSipZ8R3bliBdKfPtsDuHTdlL.jpg,1990-11-16,Home Alone,False,7.4,10599
3,/vKp3NvqBkcjHkCHSGi6EbcP7g4J.jpg,"[12, 35, 878]",196,en,Back to the Future Part III,The final installment of the Back to the Futur...,28.896,/crzoVQnMzIrRfHtQw0tLBirNfVg.jpg,1990-05-25,Back to the Future Part III,False,7.5,9918
4,/3tuWpnCTe14zZZPt6sI1W9ByOXx.jpg,"[35, 10749]",114,en,Pretty Woman,When a millionaire wheeler-dealer enters a bus...,97.953,/hVHUfT801LQATGd26VPzhorIYza.jpg,1990-03-23,Pretty Woman,False,7.5,7671


In [20]:
# Get the collection
movies = client.schema.get("Movie")

In [21]:
movies

{'class': 'Movie',
 'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
  'cleanupIntervalSeconds': 60,
  'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
 'moduleConfig': {'text2vec-contextionary': {'vectorizeClassName': True}},
 'multiTenancyConfig': {'autoTenantActivation': False,
  'autoTenantCreation': False,
  'enabled': False},
 'properties': [{'dataType': ['text'],
   'indexFilterable': True,
   'indexRangeFilters': False,
   'indexSearchable': True,
   'moduleConfig': {'text2vec-contextionary': {'skip': False,
     'vectorizePropertyName': False}},
   'name': 'title',
   'tokenization': 'word'},
  {'dataType': ['text'],
   'indexFilterable': True,
   'indexRangeFilters': False,
   'indexSearchable': True,
   'moduleConfig': {'text2vec-contextionary': {'skip': False,
     'vectorizePropertyName': False}},
   'name': 'overview',
   'tokenization': 'word'},
  {'dataType': ['text'],
   'indexFilterable': True,
   'indexRangeFilters': False,
   'indexSearcha

In [22]:
import weaviate
from datetime import datetime, timezone
import json
from tqdm import tqdm

In [23]:
# Initialize Weaviate client
client = weaviate.Client(url="http://localhost:8080")

# Define the class name
class_name = "Movie"

            your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.

            For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
            For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration
            


### 3.1 Inserting Data into Schema

                ----------------    UUID   ----------------
            A UUID (Universally Unique Identifier) is a 128-bit number 
            used to uniquely identify information in computer systems

        "uuid.NAMESPACE_DNS" is one of the predefined namespaces available in the uuid module.
            A namespace in UUID generation provides a context for generating the UUID. 
            Think of it as a grouping of identifiers
            
            The use of a namespace ensures that two UUIDs 
            generated from the same name in different namespaces will be different.
            
            
            here are four predefined namespaces in Python:
                1.  uuid.NAMESPACE_DNS: Namespace for Domain Name System (DNS) names.
                2.  uuid.NAMESPACE_URL: Namespace for URLs.
                3.  uuid.NAMESPACE_OID: Namespace for ISO Object Identifiers (OIDs).
                4.  uuid.NAMESPACE_X500: Namespace for X.500 Distinguished Names.

            
            
         uuid.uuid5(namespace, name):
            This UUID will always be the same for the same namespace and name combination.

In [24]:
import uuid

def generate_uuid5(name):
    # Define a namespace UUID (you can use any UUID here, this is just an example)
    namespace = uuid.NAMESPACE_DNS
    # Generate and return the UUID based on the name and namespace
    return str(uuid.uuid5(namespace, name))


### 3.2 Inserting data to Schema using Batch Processing

In [25]:
with client.batch() as batch:
    for i, movie in tqdm(df.iterrows()):
        # Convert data types
        release_date = datetime.strptime(movie["release_date"], "%Y-%m-%d").replace(
            tzinfo=timezone.utc
        ).isoformat()  # Convert datetime to ISO 8601 string
        genre_ids = json.loads(movie["genre_ids"])

        # Build the object payload
        movie_obj = {
            "title": movie["title"],
            "overview": movie["overview"],
            "vote_average": str(movie["vote_average"]),
            "genre_ids": genre_ids,
            "release_date": release_date,
            "tmdb_id": movie["id"],
        }

        # Add object to batch
        batch.add_data_object(
            class_name=class_name,
            data_object=movie_obj,
            uuid=generate_uuid5(str(movie["id"]))  # Ensure you have this function defined
        )


            Use the `client.batch.configure()` method to configure your batch process, and `client.batch` to enter the context manager.

            See https://weaviate.io/developers/weaviate/client-libraries/python for details.
680it [00:00, 1143.94it/s]


In [26]:
# Define the class name you want to query
class_name = "Movie"

# Retrieve all objects from the class
results = client.query.get(class_name, ["title", "overview", "vote_average", "genre_ids", "release_date", "tmdb_id"]).do()

# Print the results in a readable format
print(json.dumps(results, indent=2))

{
  "data": {
    "Get": {
      "Movie": [
        {
          "genre_ids": [
            18
          ],
          "overview": "In 2003, Harvard undergrad and computer genius Mark Zuckerberg begins work on a new concept that eventually turns into the global social network known as Facebook. Six years later, he is one of the youngest billionaires ever, but Zuckerberg finds that his unprecedented success leads to both personal and legal complications when he ends up on the receiving end of two lawsuits, one involving his former friend.",
          "release_date": "2010-10-01T00:00:00Z",
          "title": "The Social Network",
          "tmdb_id": 37799,
          "vote_average": "7.4"
        },
        {
          "genre_ids": [
            27
          ],
          "overview": "In a small town in Maine, seven children known as The Losers Club come face to face with life problems, bullies and a monster that takes the shape of a clown called Pennywise.",
          "release_date": "201

## Performing "Near-text Search"

In [27]:
# Create a Weaviate client
client = weaviate.Client(url="http://localhost:8080")

# Define the near-text query
query = {
    "concepts": ["country into war"],  # The query text
#     "certainty": 0.7  # Optional: Certainty threshold
}

In [28]:
query

{'concepts': ['country into war']}

In [29]:
# Perform the query
response = client.query.get(class_name="Movie", properties=["title", "release_date"])\
                     .with_near_text(query)\
                     .with_additional(['certainty'])\
                     .do()

In [30]:
response

{'data': {'Get': {'Movie': [{'_additional': {'certainty': 0.939186155796051},
     'release_date': '2019-12-25T00:00:00Z',
     'title': '1917'},
    {'_additional': {'certainty': 0.9329001009464264},
     'release_date': '2006-02-23T00:00:00Z',
     'title': 'V for Vendetta'},
    {'_additional': {'certainty': 0.9296229183673859},
     'release_date': '1992-08-26T00:00:00Z',
     'title': 'The Last of the Mohicans'},
    {'_additional': {'certainty': 0.9270565509796143},
     'release_date': '2011-07-22T00:00:00Z',
     'title': 'Captain America: The First Avenger'},
    {'_additional': {'certainty': 0.9220730066299438},
     'release_date': '2016-04-27T00:00:00Z',
     'title': 'Captain America: Civil War'},
    {'_additional': {'certainty': 0.9206634163856506},
     'release_date': '2005-09-16T00:00:00Z',
     'title': 'Lord of War'},
    {'_additional': {'certainty': 0.9206005930900574},
     'release_date': '2017-07-19T00:00:00Z',
     'title': 'Dunkirk'},
    {'_additional': {'ce

In [31]:
# Inspect the response
for result in response['data']['Get']['Movie']:
    print(result['title'])
    print(result['release_date'])
    if '_additional' in result:
        print(f"Certainty: {result['_additional']['certainty']:.3f}")

1917
2019-12-25T00:00:00Z
Certainty: 0.939
V for Vendetta
2006-02-23T00:00:00Z
Certainty: 0.933
The Last of the Mohicans
1992-08-26T00:00:00Z
Certainty: 0.930
Captain America: The First Avenger
2011-07-22T00:00:00Z
Certainty: 0.927
Captain America: Civil War
2016-04-27T00:00:00Z
Certainty: 0.922
Lord of War
2005-09-16T00:00:00Z
Certainty: 0.921
Dunkirk
2017-07-19T00:00:00Z
Certainty: 0.921
Saving Private Ryan
1998-07-24T00:00:00Z
Certainty: 0.920
Dances with Wolves
1990-03-30T00:00:00Z
Certainty: 0.920
Braveheart
1995-05-24T00:00:00Z
Certainty: 0.919
Captain America: The Winter Soldier
2014-03-20T00:00:00Z
Certainty: 0.919
Legends of the Fall
1994-12-16T00:00:00Z
Certainty: 0.918
Wonder Woman
2017-05-30T00:00:00Z
Certainty: 0.917
American Sniper
2014-12-25T00:00:00Z
Certainty: 0.916
True Lies
1994-07-15T00:00:00Z
Certainty: 0.913
The Shape of Water
2017-12-01T00:00:00Z
Certainty: 0.913
Life Is Beautiful
1997-12-20T00:00:00Z
Certainty: 0.913
World War Z
2013-06-20T00:00:00Z
Certainty: 0

=================================================================================================================

### Creating the Schema

In [32]:
# Name of the class to delete
class_name = "test"  # Replace with the class you want to delete

# Delete the class from the schema
client.schema.delete_class(class_name)

print(f"Class '{class_name}' has been deleted.")

Class 'test' has been deleted.


In [33]:
mytest = {
    'class':'test',
    'properties':[
        {'name':'tid', "dataType":['int']},
        {'name':'text', "dataType":['text']}
    ],
    
    "vectorizer": "text2vec-contextionary"
}

In [34]:
client.schema.create_class(mytest)

In [35]:
result = client.query.get("test", ['tid','text']).do()
result
# Retreiving the data

{'data': {'Get': {'Test': []}}}

In [36]:
print(json.dumps(result, indent=2))

{
  "data": {
    "Get": {
      "Test": []
    }
  }
}


In [37]:
client.schema.get('test')
# gives the meta deta

{'class': 'Test',
 'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
  'cleanupIntervalSeconds': 60,
  'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
 'moduleConfig': {'text2vec-contextionary': {'vectorizeClassName': True}},
 'multiTenancyConfig': {'autoTenantActivation': False,
  'autoTenantCreation': False,
  'enabled': False},
 'properties': [{'dataType': ['int'],
   'indexFilterable': True,
   'indexRangeFilters': False,
   'indexSearchable': False,
   'moduleConfig': {'text2vec-contextionary': {'skip': False,
     'vectorizePropertyName': False}},
   'name': 'tid'},
  {'dataType': ['text'],
   'indexFilterable': True,
   'indexRangeFilters': False,
   'indexSearchable': True,
   'moduleConfig': {'text2vec-contextionary': {'skip': False,
     'vectorizePropertyName': False}},
   'name': 'text',
   'tokenization': 'word'}],
 'replicationConfig': {'asyncEnabled': False, 'factor': 1},
 'shardingConfig': {'actualCount': 1,
  'actualVirtualCount': 128,
  'des

### "Data"

In [38]:
test_data = [
  { "tid": 1, "text": "The quick brown fox jumps over the lazy dog." },
  { "tid": 2, "text": "Artificial intelligence is transforming the world." },
  { "tid": 3, "text": "Deep learning models require a lot of data." },
  { "tid": 4, "text": "OpenAI develops powerful language models." },
  { "tid": 5, "text": "Natural language processing is a fascinating field." }
]


In [39]:
test_data

[{'tid': 1, 'text': 'The quick brown fox jumps over the lazy dog.'},
 {'tid': 2, 'text': 'Artificial intelligence is transforming the world.'},
 {'tid': 3, 'text': 'Deep learning models require a lot of data.'},
 {'tid': 4, 'text': 'OpenAI develops powerful language models.'},
 {'tid': 5, 'text': 'Natural language processing is a fascinating field.'}]

### Inserting the data into the DB

In [40]:
import weaviate

# Initializing weaviate client
client = weaviate.Client("http://localhost:8080")


In [41]:
client

<weaviate.client.Client at 0x117ef11d0>

In [42]:
for data in test_data:
    client.data_object.create(
    data_object = data,
    class_name = 'test')


### Retrieving the data

In [43]:
stored_data = client.query.get('test', ['tid','text']).do()

print(json.dumps(stored_data, indent=2))

{
  "data": {
    "Get": {
      "Test": [
        {
          "text": "Deep learning models require a lot of data.",
          "tid": 3
        },
        {
          "text": "Natural language processing is a fascinating field.",
          "tid": 5
        },
        {
          "text": "Artificial intelligence is transforming the world.",
          "tid": 2
        },
        {
          "text": "OpenAI develops powerful language models.",
          "tid": 4
        },
        {
          "text": "The quick brown fox jumps over the lazy dog.",
          "tid": 1
        }
      ]
    }
  }
}


### Near-text Search

In [44]:
my_query = {
    'concepts': ['AI is chainging the world'],
    
}

In [45]:
my_query

{'concepts': ['AI is chainging the world']}

In [46]:
response =  client.query.get(class_name='test', 
                 properties=['tid','text']).with_near_text(my_query).with_additional(['certainty']).do()

In [47]:
response

{'data': {'Get': {'Test': [{'_additional': {'certainty': 0.831869512796402},
     'text': 'Artificial intelligence is transforming the world.',
     'tid': 2},
    {'_additional': {'certainty': 0.7750378847122192},
     'text': 'Deep learning models require a lot of data.',
     'tid': 3},
    {'_additional': {'certainty': 0.7537948489189148},
     'text': 'Natural language processing is a fascinating field.',
     'tid': 5},
    {'_additional': {'certainty': 0.7259707748889923},
     'text': 'OpenAI develops powerful language models.',
     'tid': 4},
    {'_additional': {'certainty': 0.6994861662387848},
     'text': 'The quick brown fox jumps over the lazy dog.',
     'tid': 1}]}}}

In [48]:
response['data']['Get']['Test']

[{'_additional': {'certainty': 0.831869512796402},
  'text': 'Artificial intelligence is transforming the world.',
  'tid': 2},
 {'_additional': {'certainty': 0.7750378847122192},
  'text': 'Deep learning models require a lot of data.',
  'tid': 3},
 {'_additional': {'certainty': 0.7537948489189148},
  'text': 'Natural language processing is a fascinating field.',
  'tid': 5},
 {'_additional': {'certainty': 0.7259707748889923},
  'text': 'OpenAI develops powerful language models.',
  'tid': 4},
 {'_additional': {'certainty': 0.6994861662387848},
  'text': 'The quick brown fox jumps over the lazy dog.',
  'tid': 1}]

In [49]:
my_query2 = {
    'concepts': ['Why we need too much data'],
    
}

In [50]:
response = client.query.get(class_name='test', 
                properties=['tid','text']).with_near_text(my_query2).with_additional(['certainty']).do()

In [51]:
response['data']['Get']['Test']

[{'_additional': {'certainty': 0.9259282350540161},
  'text': 'Deep learning models require a lot of data.',
  'tid': 3},
 {'_additional': {'certainty': 0.8870106935501099},
  'text': 'Natural language processing is a fascinating field.',
  'tid': 5},
 {'_additional': {'certainty': 0.8620906174182892},
  'text': 'The quick brown fox jumps over the lazy dog.',
  'tid': 1},
 {'_additional': {'certainty': 0.8288148045539856},
  'text': 'Artificial intelligence is transforming the world.',
  'tid': 2},
 {'_additional': {'certainty': 0.7208278179168701},
  'text': 'OpenAI develops powerful language models.',
  'tid': 4}]

=================================================================================================================

=================================================================================================================

=================================================================================================================

In [52]:

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from transformers import BitsAndBytesConfig

In [53]:
from langchain.llms.huggingface_pipeline import HuggingFacePipeline


In [54]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Specify model huggingface model name
model_name = "anakin87/zephyr-7b-alpha-sharded"

# Function for loading model for CPU
def load_model_cpu(model_name: str):
    """
    :param model_name: Name or path of the model to be loaded.
    :return: Loaded model.
    """
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float32  # Use float32 for CPU compatibility
    )
    return model

# Function for initializing tokenizer
def initialize_tokenizer(model_name: str):
    """
    Initialize the tokenizer with the specified model_name.

    :param model_name: Name or path of the model for tokenizer initialization.
    :return: Initialized tokenizer.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name, return_token_type_ids=False, truncation=True, max_length=2048)
    tokenizer.bos_token_id = 1  # Set beginning of sentence token id
    return tokenizer

# Initialize tokenizer
tokenizer = initialize_tokenizer(model_name)

# Load model
model = load_model_cpu(model_name)

# Build HuggingFace pipeline for using zephyr-7b-alpha
text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="cpu",  # Force pipeline to use CPU
    max_length=2048,
    do_sample=True,
    top_k=5,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
)

# Specify the llm
llm = HuggingFacePipeline(pipeline=text_pipeline)


  self.comm = Comm(**args)


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [55]:
# ! python3 -m pip install -U langchain

In [56]:
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from langchain.vectorstores import Weaviate


In [57]:
vector_db = Weaviate(client=client, index_name="Test", text_key="text")


In [58]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=vector_db.as_retriever()
)

In [None]:
response = qa_chain.invoke(
    "why to use large amount of data?")
print(response)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [None]:
# Closing the current Weaviate instance

del client

In [None]:
# Getting all the running weaviate instances

schema = client.schema.get()

print(schema)