In [36]:
#import sys
#sys.path.append('c:/Users/Jonathan_Espinosa/Projects/novartis/system_impact_classification')
#sys.path.append('c:/Users/Jonathan_Espinosa/Projects/novartis/system_impact_classification/data_processed')

In [1]:
import os
print(os.getcwd())
os.chdir("c:/Users/Jonathan_Espinosa/Projects/novartis/system_impact_classification")
print(os.getcwd())

c:\Users\Jonathan_Espinosa\Projects\novartis\system_impact_classification\playground
c:\Users\Jonathan_Espinosa\Projects\novartis\system_impact_classification


In [2]:
import os
import pandas as pd
from langchain_core.messages import HumanMessage
from langchain_openai import AzureChatOpenAI
from dotenv import load_dotenv
from config import config

In [None]:
load_dotenv()

In [102]:
llm = AzureChatOpenAI(
    temperature=0,
    openai_api_version=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_deployment=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_version= os.getenv("AZURE_OPENAI_API_VERSION"))


In [103]:
#  Load tables given paths
path_map_equipment_groups = os.path.join(config['folder_data_processed'], config['filename_map_equipment_groups'])
path_equipment_group_probs = os.path.join(config['folder_data_processed'], config['filename_equipment_group_probs'])

df_map_equipment_groups = pd.read_csv(path_map_equipment_groups)
df_equipment_group_probs = pd.read_csv(path_equipment_group_probs)

# Get most similar group_name based on customer queryc
# Options: from langchain_experimental.agents import create_pandas_dataframe_agent (tp provide user more flexibility for agg/queries)

In [119]:
# @todo add fallback category
str_equipment_group_categories = ", ".join(df_map_equipment_groups['equipment_group_name'].values)
str_equipment_group_categories

'Heating, Cooling, Power Generation, Material Handling, Air Handling, Drying'

In [105]:
str_map_equipment_groups = df_map_equipment_groups.to_string(index=False)
str_map_equipment_groups

"equipment_group_name                             equipments\n             Heating        ['Boiler', 'Furnace', 'Heater']\n             Cooling                  ['Chiller', 'Cooler']\n    Power Generation               ['Generator', 'Turbine']\n   Material Handling ['Conveyor', 'Mixer', 'Pump', 'Valve']\n        Air Handling                      ['Fan', 'Blower']\n              Drying                              ['Dryer']"

In [127]:
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser 

class EquipmentGroupName(BaseModel):
    """
        This class is going to capture the classification result of 'New equipment' on 'equipment_group_name categories' and the classification score.
    """
    equipment_group_name : str = Field(description="Classified category from equipment_group_name") # default='Unknown'
    new_equipment: str  = Field(description="Equipment name from 'New quipment' that belong to equipment_group_name")
    classification_score: int = Field(ge=0, le=100, description="Confidence level of equipment_group_name classification", default=0)

output_parser = PydanticOutputParser(pydantic_object = EquipmentGroupName)
output_format_instructions = output_parser.get_format_instructions()

prompt_template = """You are an expert in manufacturing, your goal is to classify new equipment into an existing equipment group.
Be flexible and consider that the equipment name on 'New equipment' could have word variations like plural, synonym or misspelling.
'New equipment' should match one of the following 'equipment_group_name categories': {equipment_group_categories}.

{output_format_instructions}

'New equipment':
{user_equipment}

For the classification task, take into consideration similar names on 'equipments' column on the following table:
{map_equipment_groups}

"""

prompt = PromptTemplate(template=prompt_template, variables = {'user_equipment','map_equipment_groups', 'output_format_instructions', 'equipment_group_categories'}) # @todo add cols as variables

In [98]:
# print(output_format_instructions)

In [120]:
chain = prompt | llm | output_parser

user_equipment = 'Main Cooler' 
# Test cases  on the table :  Chiller, Cooler, Conveyor, Bolier 
# Test cases outside the table: Bolier, Boliers,  Main Cooler, Freezer,

result = chain.invoke({'user_equipment': user_equipment,
                    'map_equipment_groups':  str_map_equipment_groups,
                    'output_format_instructions': output_format_instructions,
                    'equipment_group_categories': str_equipment_group_categories,
                    })
print(result)

equipment_group_name='Cooling' new_equipment='Main Cooler' classification_score=80


In [118]:
df_equipment_probs = df_equipment_group_probs.copy()
equipment_group_name = result.equipment_group_name
df_equipment_probs = df_equipment_probs.loc[df_equipment_probs['equipment_group_name']==equipment_group_name]
df_equipment_probs['equipment_name'] = user_equipment
df_equipment_probs

Unnamed: 0,equipment_group_name,Criteria 1,Criteria 2,Criteria 3,Criteria 4,Criteria 5,Criteria 6,Criteria 7,Criteria 8a,Criteria 8b,equipment_name
1,Cooling,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,Main Cooler


In [130]:
partial_prompt =  prompt.partial(map_equipment_groups=str_map_equipment_groups,
                                 output_format_instructions=output_format_instructions,
                                equipment_group_categories=str_equipment_group_categories,
                                 )

chain = partial_prompt | llm | output_parser

user_equipment = 'Main Cooler' 
# Test cases  on the table :  Chiller, Cooler, Conveyor, Bolier 
# Test cases outside the table: Bolier, Boliers,  Main Cooler, Freezer,

result = chain.invoke({'user_equipment': user_equipment})
print(result)

equipment_group_name='Cooling' new_equipment='Main Cooler' classification_score=80
