<a href="https://colab.research.google.com/github/aishwaryaprabhat/Advanced-RAG/blob/main/Advanced_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Download and Environment Preparation

In [1]:
!bash download_dataset.sh # get from https://github.com/aishwaryaprabhat/Advanced-RAG/blob/main/download_dataset.sh

Cloning into 'DataRepository'...
remote: Enumerating objects: 47, done.[K
remote: Counting objects: 100% (39/39), done.[K
remote: Compressing objects: 100% (27/27), done.[K
remote: Total 47 (delta 12), reused 21 (delta 7), pack-reused 8[K
Receiving objects: 100% (47/47), 49.80 MiB | 11.03 MiB/s, done.
Resolving deltas: 100% (12/12), done.
Updating files: 100% (25/25), done.
Archive:  DataRepository/high-performance-rag/Camel Papers Test.zip
  inflating: source_docs/Acute respiratory distress syndrome in an alpaca cria.pdf  
  inflating: source_docs/Alpaca liveweight variations and fiber production in Mediterranean range of Chile.pdf  
Archive:  DataRepository/high-performance-rag/Camel Papers Train.zip
  inflating: source_docs/Antibody response to the epsilon toxin ofClostridium perfringensfollowing vaccination of Lama glamacrias.pdf  
  inflating: source_docs/Comparative pigmentation of sheep, goats, and llamas what colors are possible through selection.pdf  
  inflating: source_d

In [18]:
# %pip install llama-index pypdf sentence_transformers typing_extensions==4.7.1 nest_asyncio -U -q
%pip install --upgrade -r requirements.txt -U

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting llama-index==0.9.27
  Using cached llama_index-0.9.27-py3-none-any.whl (15.8 MB)
Collecting ragas
  Downloading ragas-0.0.22-py3-none-any.whl (52 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.4/52.4 kB[0m [31m376.3 kB/s[0m eta [36m0:00:00[0m0:01[0m:00:01[0m
Collecting pysbd>=0.3.4
  Downloading pysbd-0.3.4-py3-none-any.whl (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.1/71.1 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.1.0-py3-none-any.whl (797 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m798.0/798.0 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting datasets
  Downloading datasets-2.16.1-py3-none-any.whl (507 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess
  Downloading multiprocess-0.70.15-py38-none-any.whl (13

In [1]:
import os
# from google.colab import userdata
import nest_asyncio

nest_asyncio.apply()
os.environ['OPENAI_API_KEY'] = 'sk-*'
os.environ['HUGGINGFACE_API_TOKEN'] = 'hf_*'

In [2]:
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms import OpenAI
from llama_index import SimpleDirectoryReader

# Initialize an embedding model from Hugging Face using the "BAAI/bge-small-en" model.
embedding_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")

# Create an OpenAI GPT-3.5 model instance with no randomness in responses (temperature=0).
llm = OpenAI(model="gpt-3.5-turbo", temperature=0, api_key=os.environ['OPENAI_API_KEY'])

# Load data from a directory named 'source_docs' using SimpleDirectoryReader.
source_docs = SimpleDirectoryReader('source_docs').load_data()

  return torch._C._cuda_getDeviceCount() > 0


config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

# Advanced RAG Techniques

## Chunking with Overlap (Baseline)

### Parse source_docs into nodes

In [4]:
from llama_index.node_parser import SimpleNodeParser

# Create an instance of SimpleNodeParser with default settings.
# Parameters:
#   chunk_overlap: Specifies the number of overlapping characters between adjacent text chunks.
#                  This is useful for ensuring that context isn't lost at the boundaries of chunks.
#   chunk_size:    Defines the size of each text chunk in characters. 
#                  This determines how the text is segmented for processing.
baseline_parser = SimpleNodeParser.from_defaults(
    chunk_overlap=200,  # Overlap of 200 characters between chunks
    chunk_size=1024     # Each chunk consists of 1024 characters
)

# Use the created parser instance to extract nodes from the documents.
# The 'get_nodes_from_documents' method processes the documents in 'source_docs'
# and extracts structured nodes based on the parser's configuration.
baseline_nodes = baseline_parser.get_nodes_from_documents(source_docs)


In [5]:
from llama_index import VectorStoreIndex
from llama_index import ServiceContext

# Create a ServiceContext instance with default settings.
# Parameters:
#   llm: The language model to be used within the service context. 
#        This defines how text will be interpreted or processed.
#   embed_model: The embedding model used for converting text to numerical representations,
#                facilitating operations like similarity search or clustering.
#   node_parser: The node parser that structures and segments text data into manageable parts.
#                It was defined previously in your code.
baseline_context = ServiceContext.from_defaults(
    llm=llm,                # Language model
    embed_model=embedding_model,  # Embedding model
    node_parser=baseline_parser   # Node parser from previous code
)

# Initialize a VectorStoreIndex with the baseline nodes and the service context.
# The VectorStoreIndex is used for efficient storage and retrieval of vectorized text data.
# Parameters:
#   baseline_nodes: The nodes extracted from documents, ready for indexing.
#   service_context: The service context that provides essential components like the embedding model.
baseline_index = VectorStoreIndex(
    baseline_nodes,              # Nodes to be indexed
    service_context=baseline_context  # Service context providing necessary components
)

# Persist the baseline index in a specified directory.
# This step saves the state of the index to disk, allowing for later retrieval or backup.
# Parameters:
#   persist_dir: The directory name where the index will be stored.
baseline_index.storage_context.persist(
    persist_dir="baseline_index"  # Directory name for storing the index
)

In [17]:
# Convert the baseline index into a query engine capable of finding the top 3 most similar entries.
baseline_query_engine = baseline_index.as_query_engine(similarity_top_k=3)

# Perform a query with the baseline query engine asking about the influence of camelid genetics on wool quality.
baseline_response = baseline_query_engine.query("How do camelid genetics influence wool quality?")

# Retrieve the response from the query.
baseline_response.response

'Camelid genetics can influence wool quality. The inheritance of coat colors in alpacas and llamas, which are types of camelids, has been studied. Additionally, major genes affecting alpaca fiber traits have been analyzed. The expression patterns of keratin intermediate filament and keratin associated protein genes in wool follicles have also been investigated. These studies suggest that genetic factors play a role in determining the quality of wool in camelids.'

## Sentence Window Parser

In [9]:
from llama_index.node_parser import SentenceWindowNodeParser

# Initialize a SentenceWindowNodeParser with specific settings.
# A SentenceWindowNodeParser is designed to parse and structure text data into nodes,
# with a focus on sentence-level granularity.
# Parameters:
#   window_size: Defines the number of sentences to include in each window or node. 
#                This sets the scope of context for each node.
#   window_metadata_key: The key under which window metadata (like window number) will be stored.
#   original_text_metadata_key: The key for storing the original text data in the metadata.
sentence_parser = SentenceWindowNodeParser.from_defaults(
    window_size=6,  # Number of sentences in each window
    window_metadata_key="window",  # Metadata key for window information
    original_text_metadata_key="original_text"  # Metadata key for original text
)

# Use the sentence parser to parse nodes from documents.
# This method processes the documents in 'source_docs' and extracts structured nodes,
# with each node representing a window of sentences.
sentence_nodes = sentence_parser.get_nodes_from_documents(source_docs)

# Create a ServiceContext using the sentence parser and previously defined models.
# This context will use the sentence-level node parser for processing text data,
# along with the specified language and embedding models.
# Parameters:
#   llm: The language model for the service context.
#   embed_model: The embedding model for converting text to numerical representations.
#   node_parser: The sentence-level node parser for structuring text data.
sentence_context = ServiceContext.from_defaults(
    llm=llm,  # Language model
    embed_model=embedding_model,  # Embedding model
    node_parser=sentence_parser  # Sentence-level node parser
)


In [10]:
from llama_index import VectorStoreIndex

# Create a VectorStoreIndex using the parsed sentence nodes and the previously defined service context.
# VectorStoreIndex is a structure used for efficient storage, retrieval, and manipulation of vectorized text data.
# Parameters:
#   sentence_nodes: The nodes obtained from parsing the documents at the sentence level.
#                   These nodes are now ready for indexing.
#   service_context: The service context that provides essential components (like models and parsers) for the indexing.
sentence_index = VectorStoreIndex(
    sentence_nodes,         # Nodes obtained from sentence-level parsing
    service_context=sentence_context  # Service context with essential components
)

# Persist the sentence index to a directory. This step saves the current state of the index on disk.
# It allows for the index to be reloaded and used in the future, ensuring data persistence.
# Parameters:
#   persist_dir: The name of the directory where the index will be stored.
sentence_index.storage_context.persist(
    persist_dir="sentence_index"  # Directory name for storing the index
)


In [16]:
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor

# Convert the sentence index into a query engine.
# The query engine is configured to find the top 3 most similar entries in the index
# when performing a query. This is useful for retrieving the most relevant information
# based on a given input.
# Parameters:
#   similarity_top_k: The number of top similar entries to retrieve. Here, it's set to 3.
#   node_postprocessors: A list of postprocessors to apply on the nodes. In this case,
#                        a MetadataReplacementPostProcessor is used, which replaces the node's
#                        metadata with values from the 'window' key. This can be helpful for
#                        contextualizing results based on specific metadata.
sentence_query_engine = sentence_index.as_query_engine(
    similarity_top_k=3,  # Find the top 3 most similar entries
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ]
)

# Perform a query using the sentence query engine.
# This query is about the influence of camelid genetics on wool quality. The query engine will
# process this input and find the most relevant entries in the index that match the query.
sentence_response = sentence_query_engine.query("How do camelid genetics influence wool quality?")

# Retrieve the response from the query.
# The response contains the top similar entries as determined by the query engine,
# potentially providing insightful information about the query topic.
response = sentence_response.response
response

'Camelid genetics influence wool quality through various mechanisms. One important aspect is coat color genetics, where llamas and alpacas exhibit a wide range of natural colors and patterns. Llamas, in particular, have greater color variation compared to alpacas. This variation is attributed to the selection process during domestication, where llamas were primarily selected for body size and fiber weight rather than color uniformity or fiber fineness. \n\nAdditionally, the composition and interactions of keratin intermediate filaments (KIFs) and keratin-associated proteins (KAPs) play a crucial role in determining fiber characteristics. Fiber growth in mammals, including camelids, is a cyclical process regulated by genetics, nutrition, and hormones. The proteins that form the fiber are encoded by keratin genes (KRT) and keratin-associated proteins (KRTAP), which are expressed in a highly regulated manner during hair follicle growth.\n\nGenetic selection programs have been implemented 

## Automerging Retrival (Using Hierarchical Nodes)

In [12]:
from llama_index.node_parser import HierarchicalNodeParser

# Initialize a HierarchicalNodeParser with default settings.
# HierarchicalNodeParser is designed to parse and structure text data into a hierarchy of nodes,
# allowing for a more structured and layered representation of the text.
# This can be particularly useful for complex documents where different levels of granularity are needed.
hierarchical_parser = HierarchicalNodeParser.from_defaults()
# No additional parameters are needed for default settings.

# Parse nodes from the documents using the hierarchical parser.
# This method processes the documents in 'source_docs' and extracts structured nodes,
# organizing them hierarchically based on the document structure.
hierarchical_nodes = hierarchical_parser.get_nodes_from_documents(source_docs)

# Create a ServiceContext using the hierarchical parser and previously defined models.
# This context will now use the hierarchical node parser for processing text data,
# in conjunction with the specified language and embedding models.
# Parameters:
#   llm: The language model for the service context, used for understanding and processing language data.
#   embed_model: The embedding model for converting text into numerical representations,
#                enabling various text analysis tasks.
#   node_parser: The hierarchical node parser for structuring the text data.
hierarchical_context = ServiceContext.from_defaults(
    llm=llm,  # Language model
    embed_model=embedding_model,  # Embedding model
    node_parser=hierarchical_parser  # Hierarchical node parser
)

In [13]:
from llama_index import VectorStoreIndex, StorageContext

# Create a VectorStoreIndex using the parsed hierarchical nodes and the specified service context.
# VectorStoreIndex is used for efficient storage, retrieval, and manipulation of vectorized text data.
# Parameters:
#   hierarchical_nodes: The nodes obtained from parsing the documents using the hierarchical node parser.
#                       These nodes represent the text data structured in a hierarchical format.
#   service_context: The service context that includes essential components like models and parsers
#                    for processing and understanding the text data.
hierarchical_index = VectorStoreIndex(
    hierarchical_nodes,           # Nodes structured hierarchically
    service_context=hierarchical_context  # Service context with essential components
)

# Persist the hierarchical index to a directory. This action saves the current state of the index on disk,
# enabling the index to be reloaded and reused in the future. It ensures the persistence and availability
# of the indexed data for later use.
# Parameters:
#   persist_dir: The name of the directory where the index will be stored.
hierarchical_index.storage_context.persist(
    persist_dir="hierarchical_index"  # Directory name for storing the index
)

In [14]:
from llama_index.retrievers.auto_merging_retriever import AutoMergingRetriever
from llama_index.query_engine import RetrieverQueryEngine

# Initialize an AutoMergingRetriever with the hierarchical index.
# AutoMergingRetriever is used for retrieving data from an index by automatically merging results 
# from multiple queries or sources. It's particularly useful for complex data structures like a hierarchical index.
# Parameters:
#   hierarchical_index.as_retriever(similarity_top_k=3): Converts the hierarchical index into a retriever
#                                                        configured to find the top 3 most similar entries.
#   storage_context: Specifies the storage context from the hierarchical index for data management.
#   verbose: Enables verbose output, providing more detailed information during retrieval operations.
retriever = AutoMergingRetriever(
    hierarchical_index.as_retriever(similarity_top_k=3),
    storage_context=hierarchical_index.storage_context,
    verbose=True
)

# Create a RetrieverQueryEngine using the AutoMergingRetriever.
# RetrieverQueryEngine is a query engine that uses a specified retriever for querying the indexed data.
# It allows for complex query operations, especially in conjunction with retrievers like AutoMergingRetriever.
amretriever_query_engine = RetrieverQueryEngine.from_args(retriever)

# Perform a query using the AMRetriever query engine.
# The query is about the influence of camelid genetics on wool quality. The query engine processes this
# input and retrieves the most relevant entries from the index.
amretriever_response = amretriever_query_engine.query("How do camelid genetics influence wool quality?")

# Retrieve the response from the query.
# The response contains the results of the query as determined by the query engine, potentially offering
# valuable insights into the queried topic.
response = amretriever_response.response

In [15]:
response

'Camelid genetics play a significant role in determining wool quality. While there is still much to be understood in this field, recent advancements in genetic understanding have shed light on the genetic mechanisms that regulate economically important fiber traits in South American camelids. Mutations responsible for some monogenic or oligogenic traits have been identified, allowing for molecular testing to assist breeding decisions. Additionally, the development of a 76K SNPs array for the alpaca has facilitated the identification of genes affecting more complex traits through genome-wide association studies. These advancements in genomics and the discovery of genetic variants are expected to contribute to the improvement of wool quality in camelids.'

# Evaluating RAG Performance

## Generating a test dataset

In [56]:
from ragas.testset import TestsetGenerator
import random

# Initialize a TestsetGenerator using its default settings.
# TestsetGenerator is used for generating test datasets, typically for model evaluation or testing.
# The 'from_default' method sets up the generator with default configurations.
testsetgenerator = TestsetGenerator.from_default()

# Specify the sample size for the source documents.
# This determines how many documents will be randomly selected from the source documents.
sample_size = 10

# Define the number of questions to be included in the test set.
# This will set how many test cases or questions the test set will contain.
num_questions = 10

# Generate a test dataset from a random sample of source documents.
# 'random.sample' is used to randomly select a subset of documents from the source.
# The test set is then generated based on these documents.
# Parameters:
#   random.sample(source_docs, sample_size): A randomly selected subset of source documents.
#   test_size: The number of questions or test cases to generate in the test set.
testset = testsetgenerator.generate(
    random.sample(source_docs, sample_size),  # Randomly selected documents
    test_size=num_questions                    # Number of questions in the test set
)






  0%|          | 0/10 [00:00<?, ?it/s][A[A[A[A



 10%|█         | 1/10 [00:38<05:43, 38.18s/it][A[A[A[A



 30%|███       | 3/10 [01:33<03:32, 30.30s/it][A[A[A[A



 60%|██████    | 6/10 [02:36<01:38, 24.69s/it][A[A[A[A



100%|██████████| 10/10 [03:46<00:00, 20.82s/it][A[A[A[A



15it [04:22, 14.10s/it]                        [A[A[A[A



21it [05:39, 13.54s/it][A[A[A[A



28it [07:12, 15.46s/it][A[A[A[A


## Minor cleanup and reformatting

In [91]:
import re 

test_df = testset.to_pandas()
# Define the regex pattern to match any character that is NOT a letter, a number, '.', ',', or '?'
pattern = r"[^a-zA-Z0-9.,? ]"

# Define a function to replace special characters in a string
def remove_special_chars(s):
    return re.sub(pattern, '', str(s))

# Apply the function to each cell in the DataFrame
test_df = test_df.applymap(remove_special_chars)


test_questions = test_df['question'].values.tolist()
test_answers = [[item] for item in test_df['ground_truth'].values.tolist()]

test_df

Unnamed: 0,question,ground_truth_context,ground_truth,question_type,episode_done
0,What is the role of the melanocortin 1receptor...,"However, color inheritance in domestic South ...",The role of the melanocortin 1receptor MC1R in...,simple,True
1,What specific integumental characteristics con...,SACs have developed several special integumen...,The specific integumental characteristics that...,reasoning,True
2,What are the possible longterm effects of shea...,She concluded that shearing alpaca in winter ...,The possible longterm effects of shearing alpa...,multicontext,True
3,What is the role of hair in thermoregulation i...,The specic integumental characteristics of SA...,The role of hair in thermoregulation in South ...,simple,True
4,What is the effect of ASIP mutations on melani...,"However, if the agouti signaling protein ASIP...",The effect of ASIP mutations on melanin produc...,simple,True
5,What role does MC1R play in coat color regulat...,"For example, the mating between two white ani...",MC1R plays a role in coat color regulation in ...,multicontext,True
6,What factors contribute to the higher stress l...,We found a strong positive correlation betwee...,The factors that contribute to higher stress l...,reasoning,False
7,"In contrast, what factors contribute to lower ...",We interpret this as indicating that some ind...,The factors that contribute to lower stress le...,reasoning,True


## Running the evaluation for the 3 RAG methods across 6 metrics

In [89]:
from ragas.metrics import (
    faithfulness, 
    answer_relevancy, 
    context_precision, 
    context_recall, 
    answer_similarity, 
    answer_correctness
)
from ragas.llama_index import evaluate
import pandas as pd
import time

# List of evaluation metrics functions to be used.
metrics = [
    faithfulness,           # Evaluates faithfulness of the response to the source material.
    answer_relevancy,       # Assesses relevance of the response to the query.
    context_precision,      # Measures precision of the context in the response.
    context_recall,         # Measures recall of the context in the response.
    answer_correctness,     # Checks correctness of the answer.
    answer_similarity,      # Evaluates similarity of the answer to a reference answer.
]

# A list to collect individual result DataFrames.
results_list = []

# A list of tuples, each containing a query engine and its corresponding technique name.
indices = [
    (baseline_query_engine, 'chunks_with_overlap'),
    (sentence_query_engine, 'sentence_window'),
    (amretriever_query_engine, 'hierarchical_automerge')
]

# Iterate over each query engine and technique pair.
for query_engine, technique in indices:
    # Evaluate the query engine.
    result = evaluate(query_engine, metrics, test_questions, test_answers)

    # Add a 'technique' column to the result DataFrame.
    result['technique'] = technique

    # Add the result DataFrame to the results list.
    results_list.append(result)

    # Sleep to handle rate limits.
    time.sleep(60)

In [88]:
# Convert each Result object's items to a dictionary and collect them in a list
dict_list = [dict(result.items()) for result in results_list]

# Convert the list of dictionaries to a DataFrame
results_df = pd.DataFrame(dict_list)

results_df

Unnamed: 0,faithfulness,answer_relevancy,context_precision,context_recall,answer_correctness,answer_similarity,technique
0,0.71875,0.864502,0.625,0.925,0.557648,0.964971,chunks_with_overlap
1,0.852083,0.990066,0.802083,0.8875,0.62031,0.97786,sentence_window
2,0.84375,0.962274,0.9375,0.875,0.557285,0.962493,hierarchical_automerge


# Tracking RAG Evaluation Results on MLFlow

In [None]:
%pip install mlflow azureml-mlflow -U -q

In [90]:
from azureml.core import Workspace
import mlflow

# Load the Azure ML workspace configuration
ws = Workspace.from_config()

# Set the MLflow tracking URI
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

# Set the MLflow experiment name
mlflow.set_experiment("advanced_rag_eval")

# Assuming 'technique' is the column name in results_df that stores the technique name
# And other columns in results_df are the metrics you want to log
for index, row in results_df.iterrows():
    # Start a new MLflow run
    with mlflow.start_run(run_name=f"{row['technique']}"):  # Use 'technique' column to name the run
        # Log each metric in the row
        for metric in row.index:
            if metric != 'technique':  # Exclude the 'technique' column from metrics
                mlflow.log_metric(metric, row[metric])

# Note: Adjust the column names if they are different in your DataFrame


Bad pipe message: %s [b'!\xf4o"`\x83F\xd8?d]\xbe\xff\xdfmO(\x8c \xce\x1a\xda\xa8kdG\xea\x05\x94#)Z\xf9K\xd8\xefJ\xffx\xe1\xe8\xaf\xf1.\xd9\xa9:Q\xee%\xee\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00\x8f\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x00\x00\x00\r\x00\x1e\x00\x1c\x04\x03\x05\x03\x06\x03\x08\x07\x08\x08\x08\t\x08\n\x08\x0b\x08\x04\x08\x05\x08\x06', b'\x05\x01\x06', b'']
Bad pipe message: %s [b'\x03\x02\x03\x04\x00-\x00\x02\x01\x01\x003\x00&\x00$\x00\x1d\x00 \xf41P\xf6\xcet+9\xaa*\xce\xd5\x947\xae\x82F\xb2\x12\xa5\xc0/']
Bad pipe message: %s [b'\xc6VBm\xe7\xd4\x15:w_\x85)\x0cy\x01\x03SW ', b"E\xa4\xc6*:\xe9\xad\xc4\xea'\xa3iRV\xe2d\xfc:f\xec(:\xba]\xfc\xfc\x1a\xf2$V\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00\x8f\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0"]
Bad pipe message: %s [b'.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00