In [42]:
import os
import ipfshttpclient

# Connect to the IPFS client
client = ipfshttpclient.connect('/ip4/127.0.0.1/tcp/5001/http')

# Define the directory path
directory_path = "./qdrant_data/collections/test3"

# Check if the directory exists
if os.path.isdir(directory_path):
    # Upload the directory to IPFS
    result = client.add(directory_path, recursive=True,follow_symlinks=True )
    print("Upload result:", result)
else:
    print(f"Directory '{directory_path}' does not exist.")


Upload result: [<ipfshttpclient.client.base.ResponseBase: {'Name': 'test3/config.json', 'Hash': 'QmPYwmjhmCY5RXxvDdLL7wKMF9NttnUXbgFevAsNcfv4ZN', 'Size': '589'}>, <ipfshttpclient.client.base.ResponseBase: {'Name': 'test3/version.info', 'Hash': 'QmYBjmQU8tP4V9BdDnVyA7UQ5ZwWVcVamCYRuoDTb2rSgW', 'Size': '13'}>, <ipfshttpclient.client.base.ResponseBase: {'Name': 'test3/0/newest_clocks.json', 'Hash': 'QmPnLYkD2XykP9eSnWFwYikToptWHqX59ATTKZCvhWz8FW', 'Size': '107'}>, <ipfshttpclient.client.base.ResponseBase: {'Name': 'test3/0/replica_state.json', 'Hash': 'Qmb3WLFVGzFgJm9SvueDS5428YNAGmDFvsEXrkzBsEvB7A', 'Size': '95'}>, <ipfshttpclient.client.base.ResponseBase: {'Name': 'test3/0/shard_config.json', 'Hash': 'QmafHrxptCcJ17ArMhoQbDTXQco8LbWGS1ZgXCBMW95to4', 'Size': '29'}>, <ipfshttpclient.client.base.ResponseBase: {'Name': 'test3/0/segments/0c6c0a72-9e58-4845-861f-f99518fa5e1d/000004.log', 'Hash': 'QmbFMke1KXqnYyBBWxB74N4c5SBnJMVAiMNRcGu6x1AwQH', 'Size': '6'}>, <ipfshttpclient.client.base.Respo

In [44]:
import os
import ipfshttpclient
from loguru import logger

class IPFSManager:
    def __init__(self, ipfs_address='/ip4/127.0.0.1/tcp/5001/http'):
        # Set up logging
        # Connect to the IPFS client
        self.client = ipfshttpclient.connect(ipfs_address)
    
    def upload_directory(self, directory_path, recursive=True, follow_symlinks=True):
        if os.path.isdir(directory_path):
            logger.info(f"Uploading directory: {directory_path}")
            try:
                # Upload the directory to IPFS
                result = self.client.add(directory_path, recursive=recursive, follow_symlinks=follow_symlinks)
                logger.info(f"Upload result: {result}")
                return result
            except Exception as e:
                logger.error(f"Failed to upload directory '{directory_path}': {e}")
                return None
        else:
            logger.warning(f"Directory {directory_path} does not exist.")
            return None

    def download_files(self, upload_result, base_directory):
        os.makedirs(base_directory, exist_ok=True)
        logger.info(f"Starting download to base directory: {base_directory}")

        for file_info in upload_result:
            file_name = file_info['Name']
            file_hash = file_info['Hash']
            
            # Create directory structure if necessary
            file_path = os.path.join(base_directory, file_name)
            os.makedirs(os.path.dirname(file_path), exist_ok=True)
            
            try:
                # Download the file content from IPFS
                file_content = self.client.cat(file_hash)
                
                # Save the content to the file
                with open(file_path, 'wb') as file:
                    file.write(file_content)
                
                logger.info(f"Downloaded and saved file: {file_path}")
            except Exception as e:
                logger.error(f"Failed to download file with hash '{file_hash}': {e}")

# Usage example:
if __name__ == "__main__":
    # Create an IPFSManager instance
    ipfs_manager = IPFSManager()

    # Upload directory
    upload_result = ipfs_manager.upload_directory("./qdrant_data/collections/test3")

    # Define the base directory where you want to save the downloaded files
    base_directory = "./downloaded_files"

    # Example upload result with names and hashes
    if upload_result:
        # Download files
        ipfs_manager.download_files(upload_result, base_directory)
    else:
        logger.info("No uploaded result.")


[32m2024-09-06 00:03:22.175[0m | [1mINFO    [0m | [36m__main__[0m:[36mupload_directory[0m:[36m13[0m - [1mUploading directory: ./qdrant_data/collections/test3[0m
[32m2024-09-06 00:03:22.404[0m | [1mINFO    [0m | [36m__main__[0m:[36mupload_directory[0m:[36m17[0m - [1mUpload result: [<ipfshttpclient.client.base.ResponseBase: {'Name': 'test3/config.json', 'Hash': 'QmPYwmjhmCY5RXxvDdLL7wKMF9NttnUXbgFevAsNcfv4ZN', 'Size': '589'}>, <ipfshttpclient.client.base.ResponseBase: {'Name': 'test3/version.info', 'Hash': 'QmYBjmQU8tP4V9BdDnVyA7UQ5ZwWVcVamCYRuoDTb2rSgW', 'Size': '13'}>, <ipfshttpclient.client.base.ResponseBase: {'Name': 'test3/0/newest_clocks.json', 'Hash': 'QmPnLYkD2XykP9eSnWFwYikToptWHqX59ATTKZCvhWz8FW', 'Size': '107'}>, <ipfshttpclient.client.base.ResponseBase: {'Name': 'test3/0/replica_state.json', 'Hash': 'Qmb3WLFVGzFgJm9SvueDS5428YNAGmDFvsEXrkzBsEvB7A', 'Size': '95'}>, <ipfshttpclient.client.base.ResponseBase: {'Name': 'test3/0/shard_config.json', 'Hash': 'Q

In [49]:
import os
import zipfile
import ipfshttpclient
from loguru import logger

class IPFSManager:
    def __init__(self, ipfs_address='/ip4/127.0.0.1/tcp/5001/http'):
        # Set up logging
        # Connect to the IPFS client
        self.client = ipfshttpclient.connect(ipfs_address)
    
    def zip_directory(self, directory_path, zip_file_path):
        logger.info(f"Creating zip file at: {zip_file_path}")

        with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for root, dirs, files in os.walk(directory_path):
                for file in files:
                    file_path = os.path.join(root, file)
                    zipf.write(file_path, os.path.relpath(file_path, directory_path))
        
        logger.info(f"Zip file created successfully: {zip_file_path}")
        return zip_file_path

    def upload_file(self, file_path):
        if os.path.isfile(file_path):
            logger.info(f"Uploading file: {file_path}")
            try:
                # Upload the file to IPFS
                result = self.client.add(file_path)
                logger.info(f"Upload result: {result}")
                return result
            except Exception as e:
                logger.error(f"Failed to upload file '{file_path}': {e}")
                return None
        else:
            logger.warning(f"File {file_path} does not exist.")
            return None

    def download_file(self, file_hash, download_path):
        logger.info(f"Starting download to: {download_path}")

        os.makedirs(os.path.dirname(download_path), exist_ok=True)
        
        try:
            # Download the file content from IPFS
            file_content = self.client.cat(file_hash)
            
            # Save the content to the file
            with open(download_path, 'wb') as file:
                file.write(file_content)
            
            logger.info(f"Downloaded and saved file: {download_path}")
        except Exception as e:
            logger.error(f"Failed to download file with hash '{file_hash}': {e}")

    def unzip_file(self, zip_file_path, extract_to_path):
        logger.info(f"Unzipping file: {zip_file_path} to {extract_to_path}")

        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall(extract_to_path)
        
        logger.info(f"Unzipped file successfully to: {extract_to_path}")

# Usage example:
if __name__ == "__main__":
    # Create an IPFSManager instance
    ipfs_manager = IPFSManager()

    # Define paths
    directory_path = "./qdrant_data/collections/test3"
    zip_file_path = "./qdrant_data_backup.zip"

    # Zip directory
    zip_file = ipfs_manager.zip_directory(directory_path, zip_file_path)

    # Upload the zip file
    upload_result = ipfs_manager.upload_file(zip_file)

    # Define paths for download and extraction
    download_path = "./zipdownloaded_files/qdrant_data_backup.zip"
    extract_to_path = "./qdrant_data/collections/global_data"

    # Example upload result with file hash
    if upload_result:
        # Download the zip file
        file_hash = upload_result['Hash']
        ipfs_manager.download_file(file_hash, download_path)

        # Unzip the downloaded file
        ipfs_manager.unzip_file(download_path, extract_to_path)
    else:
        logger.info("No uploaded result.")


[32m2024-09-06 01:09:58.907[0m | [1mINFO    [0m | [36m__main__[0m:[36mzip_directory[0m:[36m13[0m - [1mCreating zip file at: ./qdrant_data_backup.zip[0m


[32m2024-09-06 01:09:59.309[0m | [1mINFO    [0m | [36m__main__[0m:[36mzip_directory[0m:[36m21[0m - [1mZip file created successfully: ./qdrant_data_backup.zip[0m
[32m2024-09-06 01:09:59.310[0m | [1mINFO    [0m | [36m__main__[0m:[36mupload_file[0m:[36m26[0m - [1mUploading file: ./qdrant_data_backup.zip[0m
[32m2024-09-06 01:09:59.327[0m | [1mINFO    [0m | [36m__main__[0m:[36mupload_file[0m:[36m30[0m - [1mUpload result: <ipfshttpclient.client.base.ResponseBase: {'Name': 'qdrant_data_backup.zip', 'Hash': 'QmbjvpLLhRkBc7xr4iwdJSW28tzkSgMv1S8Me9ttvHiWX3', 'Size': '246582'}>[0m
[32m2024-09-06 01:09:59.328[0m | [1mINFO    [0m | [36m__main__[0m:[36mdownload_file[0m:[36m40[0m - [1mStarting download to: ./zipdownloaded_files/qdrant_data_backup.zip[0m
[32m2024-09-06 01:09:59.356[0m | [1mINFO    [0m | [36m__main__[0m:[36mdownload_file[0m:[36m52[0m - [1mDownloaded and saved file: ./zipdownloaded_files/qdrant_data_backup.zip[0m
[32m2024-09-0

In [27]:

from bayesrag.vector_db import VectorDB

db=VectorDB(collection_name="test3")


db.create_db()

from langchain.schema import Document

# Create a list of Document objects
chunks = [
    Document(page_content="This is the first text chunk", metadata={"source": "doc1"}),
    Document(page_content="This is the second text chunk", metadata={"source": "doc2"})
]

# Upsert embeddings into Qdrant
db.upsert_embeddings(chunks)


[32m2024-09-05 23:24:52.608[0m | [1mINFO    [0m | [36mbayesrag.vector_db[0m:[36mcreate_db[0m:[36m18[0m - [1mVector DB successfully created: test3[0m
[32m2024-09-05 23:24:52.610[0m | [1mINFO    [0m | [36mbayesrag.vector_db[0m:[36mupsert_embeddings[0m:[36m34[0m - [1mUpserting embeddings into Vector DB...[0m
[32m2024-09-05 23:24:52.739[0m | [1mINFO    [0m | [36mbayesrag.vector_db[0m:[36mupsert_embeddings[0m:[36m45[0m - [1mEmbeddings created successfully[0m


In [31]:
! dir

 Volume in drive C is OS
 Volume Serial Number is A48A-C644

 Directory of c:\Users\faiza\Music\llmResearch\rag\research\communication

09/05/2024  11:26 PM    <DIR>          .
08/24/2024  06:56 PM    <DIR>          ..
09/05/2024  10:55 PM             6,888 ipfs.ipynb
08/07/2024  10:23 PM    <DIR>          kafka
08/07/2024  10:23 PM    <DIR>          mqtt
09/05/2024  11:26 PM    <DIR>          qdrant_storage
               1 File(s)          6,888 bytes
               5 Dir(s)  35,329,863,680 bytes free


In [33]:
! dir qdrant_storage

 Volume in drive C is OS
 Volume Serial Number is A48A-C644

 Directory of c:\Users\faiza\Music\llmResearch\rag\research\communication\qdrant_storage

09/05/2024  11:26 PM    <DIR>          .
09/05/2024  11:26 PM    <DIR>          ..
09/05/2024  11:26 PM    <DIR>          aliases
09/05/2024  11:26 PM    <DIR>          collections
09/05/2024  11:19 PM               324 raft_state.json
               1 File(s)            324 bytes
               4 Dir(s)  35,329,302,528 bytes free


In [34]:
import os
import shutil
import ipfshttpclient
from loguru import logger

class QdrantIPFSHandler:
    def __init__(self, ipfs_host='127.0.0.1', ipfs_port=5001, ipfs_connection_link=None):
        if ipfs_connection_link is None:
            ipfs_connection_link = f'/ip4/{ipfs_host}/tcp/{ipfs_port}/http'
        self.ipfs_client = self._connect_to_ipfs(ipfs_connection_link)

    def _connect_to_ipfs(self, connection_link):
        try:
            client = ipfshttpclient.connect(connection_link)
            logger.info("Connected to IPFS")
            return client
        except Exception as e:
            logger.error(f"Error during IPFS connection: {e}")
            raise

    def zip_folder(self, source_folder, zip_file_path):
        try:
            shutil.make_archive(zip_file_path, 'zip', source_folder)
            logger.info(f"Successfully zipped {source_folder} to {zip_file_path}.zip")
        except Exception as e:
            logger.error(f"Error zipping folder: {str(e)}")
            raise

    def upload_to_ipfs(self, file_path):
        try:
            file_hash = self.ipfs_client.add(file_path)['Hash']
            logger.info(f"File uploaded to IPFS with hash: {file_hash}")
            return file_hash
        except Exception as e:
            logger.error(f"Error uploading file to IPFS: {str(e)}")
            raise

    def handle_qdrant_data(self, source_folder, zip_file_path):
        self.zip_folder(source_folder, zip_file_path)
        ipfs_hash = self.upload_to_ipfs(f"{zip_file_path}.zip")
        return ipfs_hash

# Example usage:
if __name__ == "__main__":
    handler = QdrantIPFSHandler()
    
    qdrant_data_folder = './qdrant_data'
    zip_file_path = './qdrant_data_backup'
    
    ipfs_hash = handler.handle_qdrant_data(qdrant_data_folder, zip_file_path)
    
    if ipfs_hash:
        logger.info(f"Zipped Qdrant data uploaded to IPFS with hash: {ipfs_hash}")
    else:
        logger.error("Failed to upload zipped Qdrant data to IPFS.")


c:\Users\faiza\Music\llmResearch\rag\env\Lib\site-packages\ipfshttpclient\client\__init__.py:75: VersionMismatch: Unsupported daemon version '0.23.0' (not in range: 0.5.0 ≤ … < 0.9.0)
[32m2024-09-05 23:37:47.395[0m | [1mINFO    [0m | [36m__main__[0m:[36m_connect_to_ipfs[0m:[36m15[0m - [1mConnected to IPFS[0m
[32m2024-09-05 23:37:48.223[0m | [1mINFO    [0m | [36m__main__[0m:[36mzip_folder[0m:[36m24[0m - [1mSuccessfully zipped ./qdrant_data to ./qdrant_data_backup.zip[0m
[32m2024-09-05 23:37:48.293[0m | [1mINFO    [0m | [36m__main__[0m:[36mupload_to_ipfs[0m:[36m32[0m - [1mFile uploaded to IPFS with hash: QmadmszDYPXFfZ87cjV9mhJgrtHMZzF9qgWirr9TrAPVqr[0m
[32m2024-09-05 23:37:48.294[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m53[0m - [1mZipped Qdrant data uploaded to IPFS with hash: QmadmszDYPXFfZ87cjV9mhJgrtHMZzF9qgWirr9TrAPVqr[0m
