In [54]:
from requests_aws4auth import AWS4Auth
import boto3
from langchain_aws import BedrockEmbeddings

#from langchain_community.embeddings import BedrockEmbeddings
from opensearchpy import OpenSearch, RequestsHttpConnection
from langchain_aws import ChatBedrock
import pandas as pd
import json
import re
import time 

In [55]:
# pip install -U langchain-aws

In [57]:
QUERIES = {
    '1': 'provide comparative analysis of transformers - TXID_2217, TXID_1235, TXID_1685 and TXID_2484 based on their parameters',
    '2': 'For transformers 2217, 1235, 1685 and 2484 provide comparative details for transformer health based on Oil quality testing ( dielectric breakdown voltage, IFT, acidity, moisture content, furan analysis) , DGA ( duval, rogers ratio, key gas method), Power factor (IPF/IR), temperature monitoring (top oil temperature , hot spot temperature, cooling efficiency) , load/overload history ( loading prof, percentage overload), partial discharge , electrical testing ( FRA, WRM, Turns ratio, capacitance and dissipation factor) , TAIM, LEDT, dornenburgs method, bushing health (capacitance and power factor, DGA for bushings), THI (HI, risk of failure), LTC. Ensure you provide details on parameter, significance, calculation details, root cause analysis details, actions and measures that must be taken , advice to curb further damage for provided transformer details in a tabular format',
    '3': 'what is the duval triangle 1,2,3 value for transformer 2484',
    '4': 'How is my transformer fleet - 2217, 1235, 1685 and 2484 doing?',
    '5': 'Which are my high-risk transformers and how does one mitigate the risk?'
}
INDEX_CONFIG ={'Hackathon_index': 'testing_index2'}

In [58]:
import boto3
from langchain_aws import ChatBedrock
import re

def load_model():
    # bedrock client
    bedrock_runtime = boto3.client(
        service_name="bedrock-runtime",
        region_name="us-east-1",
    )
    # Update to use amazon.titan-text-premier-v1:0
    model_id = "amazon.titan-text-premier-v1:0"
    model_kwargs = {
        "temperature": 0.0,
    }
    # Bedrock chat model
    model = ChatBedrock(
        client=bedrock_runtime,
        model_id=model_id,
        model_kwargs=model_kwargs,
    ).with_retry()
    return model

# Query Router

In [59]:
def query_router(query):
    prompt =  f"""There are the following types of user queries: alert, trasnformer.
                Use the following examples for reference:
                'provide comparative analysis of transformers - 2217, 1235, 1685 and 2484 based on their parameters' -> transformer
                'which transformer is needing immediate attention ?' -> alert
                'which transformer needs inspection currently and for what ? ignore missing data' -> alert
                'When will a transformer fail? which one first' -> alert
                Classify the following request: {query}"""
    
    model = load_model()
    response = model.invoke(prompt)
    content = response.content
    
    return content

In [60]:
# query_router('what is the duval triangle 1,2,3 value for transformer 2484')

# Keyword Parsing

In [61]:
def parse_keywords(content):
    must_keywords = []
    should_keywords = []

    # Extract the Must Keywords line
    must_line = re.search(r'Numerical Keywords: \[(.*?)\]', content)
    if must_line:
        must_keywords = [keyword.strip() for keyword in must_line.group(1).split(',')]

    # Extract the Should Keywords line
    should_line = re.search(r'Text Keywords: \[(.*?)\]', content)
    if should_line:
        should_keywords = [keyword.strip() for keyword in should_line.group(1).split(',')]

    return must_keywords, should_keywords

def extract_keywords(query):
    prompt = f"""
    Extract the keywords from the following query: "{query}"
    Keywords should include specific identifiers like transformer numbers, names, or other entities relevant to the query.
    Transformer should never be a keyword.
    If the keyword is clearly a transformer ID and it doesn't start with 'TXID_', add this to its prefix
    The output should have format of: Numerical Keywords: [...]; Text Keywords: [...]
    
    <example>
    Input: provide comparative analysis of transformers - 2217, 1235, 1685 and 2484 based on their parameters?
    Output: Numerical Keyword: [TXID_2217, TXID_1235, TXID_1685, TXID_2484]; Text Keyword: …
    
    Input: what is the duval triangle 1,2,3 value for transformer 2484
    Output: Numerical Keyword: [TXID_2484]; Text Keyword: [duval triangle]
    """
    
    model = load_model()
    response = model.invoke(prompt)
    
    content = response.content
    print(content)
    
    return parse_keywords(content)


# Searching Logic

In [62]:
def keyword_search(opensearch_client, index_name, query, k=2):
    must_keywords, should_keywords = extract_keywords(query)

    msearch_body = []
    if must_keywords:
        for keyword in must_keywords:
            msearch_body.append({'index': index_name})
            
            msearch_body.append({
                'size': k, 
                'query': {
                    'bool': {
                        'must': [
                            {
                                'match_phrase': {
                                    'content': keyword
                                }
                            }
                        ],
                        'should': [
                            {
                                'match': {
                                    'content': should_keyword
                                }
                            } for should_keyword in should_keywords
                        ],
                        'minimum_should_match': 0 
                    }
                }
            })

    response = opensearch_client.msearch(body=msearch_body)

    all_results = []
    for res in response['responses']:
        if 'hits' in res and 'hits' in res['hits']:
            all_results.extend(res['hits']['hits'])  

    return all_results



def vector_search(opensearch_client, index_name, embedding_model, query, k=2):
    query_vector = embedding_model.embed_documents([query])[0]
    response = opensearch_client.search(
        index=index_name,
        body={
            'size': k,
            'query': {
                'knn': {
                    'embedding': {
                        'vector': query_vector,
                        'k': k
                    }
                }
            }
        }
    )
    return response['hits']['hits']

def alert_search(transformer_data, fault_ranges):
    high_risk_transformers = {}

    for transformer_id, data in transformer_data.items():
        alerts = []
        for param, value in data.items():
            if param in fault_ranges:
                ranges = fault_ranges[param]
                if value > ranges['High']:
                    alerts.append(f"{param} is in the high-risk range: {value}.")
        
        if alerts:
            high_risk_transformers[transformer_id] = {
                'alerts': alerts,
                'mitigation': suggest_mitigation(alerts)
            }


    top_5_transformers = dict(list(high_risk_transformers.items())[:5])
    
    return top_5_transformers


def print_query_result(query, results):
    search_results = ""
    
    if not results:
        search_results += f"# Query: {query} (search)"
        search_results += "--------------------------------"
        search_results += "No results found."
        search_results += "--------------------------------"
        print(search_results)
        return search_results

    df = pd.concat([pd.DataFrame([result['_source'] if isinstance(result, dict) else result]) for result in results], ignore_index=True)
    search_results += f"# Query: {query} (search)"
    search_results += "--------------------------------"
    for i, result in enumerate(results):
        # print(result)
        metadata = result['_source'] if isinstance(result, dict) else result
        search_results += f"# Search result {i+1} (relevant document chunk):"
        search_results += f"Source: {metadata['source']}"
        search_results += "Content:"
        row_content = json.loads(metadata['content'])
        for key, value in row_content.items():
            search_results += f"'{key}': {value}"
        search_results +=  "--------------------------------"
    return search_results

In [69]:
search_result = test_queries(index_name, QUERIES['5'], opensearch_client, embedding_model, 'transformer')
print(search_result)



Testing query Which are my high-risk transformers and how does one mitigate the risk? - Hybrid Search:
Numerical Keywords: []; Text Keywords: [high-risk, mitigate, risk]
<query> # Query: Which are my high-risk transformers and how does one mitigate the risk? </query>
<documents>
# Query: Which are my high-risk transformers and how does one mitigate the risk? (search)--------------------------------# Search result 1 (relevant document chunk):Source: health_index_augdata.csvContent:'Hydrogen': 4'Oxigen': 13600'Nitrogen': 37500'Methane': 3'CO': 443'CO2': 5890'Ethylene': 0'Ethane': 7'Acethylene': 0'DBDS': 0.0'Power factor': 1.0'Interfacial V': 34'Dielectric rigidity': 55'Water content': 77'Health index': 38.3'Life expectation': 32.0'CO_H2_ratio': 110.75'CH4_H2_ratio': 0.75'C2H4_H2_ratio': 0.0'C2H2_H2_ratio': 0.0'H2_N2_ratio': 0.0001066666666666'O2_N2_ratio': 0.3626666666666667'H2_CO2_ratio': 0.0006791171477079'TransformerID': TxID_5785'InstallationDate': 2002-07-05'MaintenanceSchedule': 2

In [73]:
def test_queries(index_name, query, opensearch_client, embedding_model, search_type):

    if search_type == 'transformer': #Hybrid
        print(f"\nTesting query {query} - Hybrid Search:")
        results = keyword_search(opensearch_client, index_name, query) + vector_search(opensearch_client, index_name, embedding_model, query)
    elif search_type == 'alert':
        print('Hit alerting logic')
        results = alert_search(transformer_data, fault_ranges)
    search_result = print_query_result(query, results)


    formatted_output = format_output(query, [search_result])
    # print(formatted_output)
    return formatted_output

def format_output(query, results):
    formatted_results = []
    for result in results:
        formatted_results.append(f"<documents>\n{result}\n</documents>")
    return f"<query> # Query: {query} </query>\n" + "\n--------------------------------\n".join(formatted_results)


In [74]:
# Initialize OpenSearch client
def init_opensearch_client(host, port, region, service):
    credentials = boto3.Session().get_credentials()
    awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token)
    return OpenSearch(
        hosts=[{'host': host, 'port': port}],
        http_auth=awsauth,
        use_ssl=True,
        verify_certs=True,
        connection_class=RequestsHttpConnection,
        timeout=3000
    )


opensearch_host = 'iellhhrn6kean028im78.us-east-1.aoss.amazonaws.com'
opensearch_port = 443
opensearch_region = 'us-east-1'
opensearch_service = 'aoss'
index_name = INDEX_CONFIG['Hackathon_index']
dimension = 1024

opensearch_client = init_opensearch_client(opensearch_host, opensearch_port, opensearch_region, opensearch_service)
embedding_model = BedrockEmbeddings(client=boto3.client("bedrock-runtime", region_name=opensearch_region), model_id="amazon.titan-embed-text-v2:0")

In [75]:
search_result = test_queries(index_name, QUERIES['4'], opensearch_client, embedding_model, 'hybrid')



UnboundLocalError: cannot access local variable 'results' where it is not associated with a value

In [44]:
#alert_search(opensearch_client, 'testing_index2', embedding_model, 'check insulation power factor', k=5, alert_keywords=['insulation', 'resistance'])

# Call Titan LLM Model for contextual output 

In [None]:
def generate_response(model, user_prompt, system_prompt):
    input_prompt = f"System: {system_prompt}\n\nHuman: {user_prompt}\n\nAI:"
    
    response = model.invoke(
        input=input_prompt
    )
    
    return response

def clean_response(raw_response):
    raw_text = raw_response.content
    cleaned_response = raw_text.strip()
    
    return cleaned_response

In [None]:
model = load_model()

query = QUERIES['5']
print(f"Running query  {query}")

search_type=query_router(query)

search_result=test_queries(index_name, query, opensearch_client, embedding_model, search_type)
user_prompt = search_result
system_prompt = """You are a specialized assistant trained to provide information only related to power transformers. You have access to detailed operational, environmental, and performance data for each transformer. If a transformer ID is mentioned, you should provide accurate and factual information related only to that transformer and not generalize across other transformers. You are expected to avoid answering any queries outside the scope of power transformer maintenance, operation, and health.

Instructions:

1. Use only the information from the dataset, and avoid relying on external knowledge for answering questions.
2. Always respond with precise data when the transformer ID is mentioned, only referring to the specified transformer.
3. When multiple transformer IDs are mentioned, compare them side by side using parameters from the dataset (e.g., hydrogen level, temperature, performance metrics, etc.).
4. Flag any transformer needing attention based on unusual parameter readings (e.g., high hydrogen, low operating time, specific alerts).
5. Reject any queries outside the scope of transformer health, operation, or maintenance.

Few-Shot Examples:

Few-Shot Example 1: Comparative Analysis of Transformers
User Query: "Provide a comparative analysis of transformers 2217, 1235, 1685, and 2484 based on their parameters."

Model Response:

Parameter: Installation Date:

Transformer 2217: 12/4/2001
Transformer 1235: 12/25/2006
Transformer 1685: 6/9/2013
Transformer 2484: 8/20/2010
Parameter: Load Condition:

Transformer 2217: Normal Load
Transformer 1235: High Load
Transformer 1685: Normal Load
Transformer 2484: High Load
Parameter: Operating Temperature (°C):

Transformer 2217: 69.74°C
Transformer 1235: 86.65°C
Transformer 1685: 35.78°C
Transformer 2484: 58.46°C
Parameter: Health Index:

Transformer 2217: 13.4
Transformer 1235: 13.4
Transformer 1685: 13.4
Transformer 2484: 13.5
Parameter: Expected Remaining Life (Years):

Transformer 2217: 51 years
Transformer 1235: 51 years
Transformer 1685: 51 years
Transformer 2484: 51 years
Parameter: Dissolved Gases (Hydrogen, Oxygen, Nitrogen):

Transformer 2217: H₂ - 5, O₂ - 3860, N₂ - 29300
Transformer 1235: H₂ - 3, O₂ - 13900, N₂ - 3600
Transformer 1685: H₂ - 35, O₂ - 232, N₂ - 61400
Transformer 2484: H₂ - 52, O₂ - 1710, N₂ - 56300
This analysis is strictly based on the provided dataset and only includes the mentioned transformers.

Few-Shot Example 2: Transformer Needing Immediate Attention
User Query: "Which transformer is needing immediate attention?"

Model Response: The transformer requiring immediate attention is TxID 1235 due to the following issues:

High Moisture Content: 35 mg/L, significantly above acceptable limits. High moisture can degrade insulating properties.
Elevated Temperatures: Top oil temperature is 86.65°C, and the hot spot temperature is 92°C, which can accelerate insulation wear.
Dissolved Gas Analysis (DGA): Duval ratio of 3.0 indicates potential thermal issues. Rogers ratio of 0.33 also suggests insulation or overheating concerns.
High Load Condition: The transformer is operating at 1769.88 KW, 25% overload.
Immediate inspection and measures should be taken to prevent failure.

Few-Shot Example 3: Standard Moisture Content for Oil Quality Testing
User Query: "What's the standard moisture content for oil quality testing?"

Model Response: Standard moisture content ranges for transformer oil quality are as follows:

New or Dry Oil: Less than 10 mg/L, indicating minimal water contamination.
In-Service Oil (Good Condition): Less than 20 mg/L, showing the oil is in good condition with low risk of insulation degradation.
Moderate Condition: 20-30 mg/L, acceptable but requires monitoring and possibly oil drying.
Critical Condition: Greater than 30 mg/L, indicating high moisture levels that could lead to insulation breakdown and increased failure risk.
Few-Shot Example 4: Duval Triangle Values for Transformer 2484
User Query: "What is the Duval Triangle 123 value for transformer 2484?"

Model Response: For Transformer TxID_2484, the Duval Triangle gas concentration values and percentages are as follows:

Duval Triangle 1: CH₄: 3, C₂H₄: 5, C₂H₂: 0
CH₄: 37.5%, C₂H₄: 62.5%, C₂H₂: 0%
Duval Triangle 2: H₂: 52, CH₄: 3, C₂H₂: 0
H₂: 94.55%, CH₄: 5.45%, C₂H₂: 0%
Duval Triangle 3: CH₄: 3, C₂H₄: 5, C₂H₆: 82
CH₄: 3.23%, C₂H₄: 5.38%, C₂H₆: 91.39%

**DO NOT HALLUCINATE.**"""
response = generate_response(model, user_prompt, system_prompt)

cleaned_response = clean_response(response)
print(cleaned_response)