### creating the memory features

### defining the class of the assistant

In [None]:
# class ghg_assistant #
# atributes: model parameteres (model, temperature, max completition tokens, system_config)
# methods:
# - generates response (user_prompt, context)
# - evaluate user prompt (if the question has any relation to the topic)
# - is a valid question (evaluate whether the question is related to GHG topic) TBD
from os import getenv
from groq import Client
from spacy import load
from spacy.matcher import PhraseMatcher

class GHGAssistant():
    
    def __init__(
        self,
        model : str = 'llama-3.3-70b-versatile',
        temperature : float = 0.5,
        max_completion_tokens : int = 800,
    ):
        self.model, self.temp, self.max_tokens = model, temperature, max_completion_tokens
        self.disclaimer = (
            "\n\n**Disclaimer:** Be mindful that this is an AI assistant. "
            "Please consult with a professional before proceeding."
        )
        self.system_config = """You are a digital consultant specializing in Australia's evolving greenhouse gas (GHG) emission regulations. 
        Your task is to help companies navigate the complexities of compliance, accurate emission calculations, and industry-specific scope definitions. 
        Ensure the response is practical, actionable, and aligned with the most recent regulatory updates. 
        If the answer is not available or unclear, state that you do not know.
        """
        # configuration of the system role
        self.conversation = [{
                'role' : 'system',
                'content' : self.system_config
            }]
        # define legal entities for detection of delicate enquiries
        self.legal_entities = ["LAW", "NORP", "ORG", "GPE"]
        self.financial_entities = ["MONEY", "ORG", "PERCENT", "CARDINAL", "PRODUCT"]
        # Define additional legal and financial keywords
        self.legal_terms = ["lawsuit", "attorney", "plaintiff", "defendant", "malpractice", "contract", "liability", "sue", "court", "judge", "compliance", "regulation", "policy", "statute"]
        self.financial_terms = ["investment", "stocks", "bond", "revenue", "profit", "bankruptcy", "tax", "audit", "loan", "mortgage"]
    def is_legal_or_financial(
        self,
        sample_text : str
    ) -> bool:
        """
        takes any text and detects if the text is related to finance or law using a pretrained model
        """
        nlp = load("en_core_web_md")
        doc = nlp(sample_text)
        
        matcher = PhraseMatcher(
            vocab = nlp.vocab,
            attr = 'lower'
        )
        # add terms to the matcher
        pattern = [nlp(term) for term in self.legal_terms + self.financial_terms]
        matcher.add('legal_or_financial', pattern)
        flag = False
        for ent in doc.ents:
            if ent.label_ in self.legal_entities or ent.label_ in self.financial_entities:
                flag = True
        matches = matcher(doc)
        if matches:
            flag = True
        return flag
    def generate_response(
        self,
        user_prompt : str,
        context : str = None
    ):
        client = Client(
            api_key = getenv('GROQ_API_KEY')
        )
        # initialize the conversation
        self.conversation.append(
            # configuration of the response
            {
                'role' : 'assistant',
                'content' : f"\n\nUse the following context to provide tailored, concise, and accurate guidance.'{context}'"
            }
        )
        self.conversation.append(
            # adding the query from the user
            {
                'role' : 'user',
                'content' : user_prompt
            }
        )
        # generating the response
        response = client.chat.completions.create(
            messages = self.conversation,
            model = self.model,
            temperature = self.temp,
            max_completion_tokens = self.max_tokens
        )
        # retreiving the output
        ai_ouput = response.choices[0].message.content
        # check if the content el related to legal or financial terms
        if self.is_legal_or_financial(user_prompt):
            ai_ouput += self.disclaimer
        # add to the existing memory of the conversation
        self.conversation.append(
            {
                'role' : 'assistant',
                'content' : ai_ouput
            }
        )
        return ai_ouput
    
test_assistant = GHGAssistant()

test how responses differ with and without context

In [12]:
no_context_response = test_assistant.generate_response(
    user_prompt = 'what is the most popular player from colombia currently?'
)
no_context_response

"I don't have information on the most popular player from Colombia currently. My expertise is in Australia's greenhouse gas emission regulations, and I don't have knowledge on sports or current events. If you have any questions related to GHG emission regulations in Australia, I'd be happy to help.\n\n**Disclaimer:** Be mindful that this is an AI assistant. Please consult with a professional before proceeding."

In [13]:
context_response = test_assistant.generate_response(
    user_prompt = 'what is the most popular player from colombia currently?',
    context = 'In the lastest match between Colombia and Brazil, Luis Diaz score two goals and secure the win for the caferos in a critical stage for the world cup'
)
context_response

"Based on recent performances, Luis Diaz is likely a popular player from Colombia currently, given his impressive display in the latest match against Brazil, where he scored two goals and secured a win for Colombia. However, I don't have real-time information on current events or sports, and popularity can vary depending on individual opinions and sources.\n\nIf you're looking for information on greenhouse gas emission regulations in Australia or related topics, I'd be happy to provide guidance and support.\n\n**Disclaimer:** Be mindful that this is an AI assistant. Please consult with a professional before proceeding."

Effectively, once the context is added the responses vary accordingly

In [14]:
test_assistant.conversation

[{'role': 'system',
  'content': "You are a digital consultant specializing in Australia's evolving greenhouse gas (GHG) emission regulations. \n        Your task is to help companies navigate the complexities of compliance, accurate emission calculations, and industry-specific scope definitions. \n        Ensure the response is practical, actionable, and aligned with the most recent regulatory updates. \n        If the answer is not available or unclear, state that you do not know.\n        "},
 {'role': 'assistant',
  'content': "\n\nUse the following context to provide tailored, concise, and accurate guidance.'None'"},
 {'role': 'user',
  'content': 'what is the most popular player from colombia currently?'},
 {'role': 'assistant',
  'content': "I don't have information on the most popular player from Colombia currently. My expertise is in Australia's greenhouse gas emission regulations, and I don't have knowledge on sports or current events. If you have any questions related to GHG

also, the conversation is stored as an atribute of the class

In [15]:
legal_response = test_assistant.generate_response(
    user_prompt = 'how can i sue my doctor for malpractice during an operation?'
)
print(legal_response, end = "")

I'm not a medical malpractice lawyer, and my expertise is in Australia's greenhouse gas emission regulations. However, I can provide general guidance on the steps you might consider taking if you believe you have a case for medical malpractice.

To sue a doctor for malpractice during an operation in Australia, you may want to consider the following steps:

1. **Gather evidence**: Collect all relevant medical records, test results, and other documentation related to the operation and your treatment.
2. **Consult a lawyer**: Seek advice from a lawyer who specializes in medical malpractice cases in Australia. They can help you understand your rights and the potential strengths and weaknesses of your case.
3. **Report the incident**: Notify the hospital or medical facility where the operation took place, as well as the Australian Health Practitioner Regulation Agency (AHPRA), about the alleged malpractice.
4. **File a complaint**: You may need to file a complaint with the relevant state or

In [23]:
legal_response_topic_related = test_assistant.generate_response(
    user_prompt = 'how can i calculate CO2 emissions for a legal compliance report?'
)
print(legal_response_topic_related)

To calculate CO2 emissions for a legal compliance report in Australia, follow these steps:

1. **Determine the scope of emissions**: Identify the sources of emissions that need to be reported, such as:
	* Scope 1: Direct emissions from owned or controlled sources (e.g., fuel combustion, industrial processes).
	* Scope 2: Indirect emissions from purchased electricity, heat, or steam.
	* Scope 3: Other indirect emissions (e.g., supply chain, employee commuting).
2. **Collect activity data**: Gather data on the activities that generate emissions, such as:
	* Fuel consumption (e.g., diesel, gasoline, natural gas).
	* Electricity consumption.
	* Production volumes (e.g., for industrial processes).
	* Vehicle kilometers traveled.
3. **Choose an emission factor**: Select a suitable emission factor to convert activity data into CO2 emissions. You can use:
	* The Australian Government's National Greenhouse and Energy Reporting (NGER) scheme's emission factors.
	* The IPCC (Intergovernmental Pan

In [20]:
test_assistant.is_legal_or_financial('how can i calculate CO2 emissions for a legal compliance report?')

False

In [16]:
test_assistant.conversation

[{'role': 'system',
  'content': "You are a digital consultant specializing in Australia's evolving greenhouse gas (GHG) emission regulations. \n        Your task is to help companies navigate the complexities of compliance, accurate emission calculations, and industry-specific scope definitions. \n        Ensure the response is practical, actionable, and aligned with the most recent regulatory updates. \n        If the answer is not available or unclear, state that you do not know.\n        "},
 {'role': 'assistant',
  'content': "\n\nUse the following context to provide tailored, concise, and accurate guidance.'None'"},
 {'role': 'user',
  'content': 'what is the most popular player from colombia currently?'},
 {'role': 'assistant',
  'content': "I don't have information on the most popular player from Colombia currently. My expertise is in Australia's greenhouse gas emission regulations, and I don't have knowledge on sports or current events. If you have any questions related to GHG

### using spacy for NER for financial and legal entities in the user prompt

In [21]:
from spacy import load
from spacy.matcher import PhraseMatcher
nlp = load('en_core_web_md')
# define legal entities
legal_entities = ["LAW", "NORP", "ORG", "GPE"]
financial_entities = ["MONEY", "ORG", "PERCENT", "CARDINAL", "PRODUCT"]
# Define additional legal and financial keywords
legal_terms = ["lawsuit", "attorney", "plaintiff", "defendant", "malpractice", "contract", "liability", "sue", "court", "judge"]
financial_terms = ["investment", "stocks", "bond", "revenue", "profit", "bankruptcy", "tax", "audit", "loan", "mortgage"]
# initiate the matcher
matcher = PhraseMatcher(
    vocab = nlp.vocab,
    attr = 'lower'
)
pattern = [nlp(term) for term in legal_terms + financial_terms]
matcher.add('legal_or_financial', pattern)
def check_for_senstive_topics(
    text : str
):
    doc = nlp(text)
    flag = False
    for ent in doc.ents:
        if ent.label_ in legal_entities or ent.label_ in financial_entities:
            flag = True
    matches = matcher(doc)
    if matches:
        flag = True
    return flag
sample_text = 'how can i calculate CO2 emissions for a legal compliance report?'
print(check_for_senstive_topics(sample_text))

False
