In [3]:
#log into huggingface for api access
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
#import required libraries
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain import HuggingFacePipeline
from langchain import PromptTemplate,  LLMChain

#for prompt handling
import json
import textwrap


In [5]:
#initiate model
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                          use_auth_token=True,)

model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                             device_map='auto',
                                             torch_dtype=torch.float32,
                                             #torch_dtype=torch.float16,
                                             use_auth_token=True,
                                            #  load_in_8bit=True,
                                            #  load_in_4bit=True
                                             )



Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]



In [6]:

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are an AI assistant. Always answer as helpfully as possible. 
"""



def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

def cut_off_text(text, prompt):
    cutoff_phrase = prompt
    index = text.find(cutoff_phrase)
    if index != -1:
        return text[:index]
    else:
        return text

def remove_substring(string, substring):
    return string.replace(substring, "")



def generate(text):
    prompt = get_prompt(text)
    with torch.autocast('cuda', dtype=torch.bfloat16):
        inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
        outputs = model.generate(**inputs,
                                 max_new_tokens=512,
                                 eos_token_id=tokenizer.eos_token_id,
                                 pad_token_id=tokenizer.eos_token_id,
                                 )
        final_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        final_outputs = cut_off_text(final_outputs, '</s>')
        final_outputs = remove_substring(final_outputs, prompt)

    return final_outputs#, outputs

def parse_text(text):
        wrapped_text = textwrap.fill(text['text'], width=100)
        print(wrapped_text +'\n\n')
        # return assistant_text


In [7]:
from transformers import pipeline

pipe = pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                max_new_tokens = 512,
                do_sample=True,
                top_k=30,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id
                )

In [8]:
llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})

In [9]:
system_prompt = "You are an advanced assistant that rates feedback and workplace appropriateness out of ten. "
instruction = "Provide  Workplace appropriateness and overall rating for the feedback: {text}"
template = get_prompt(instruction, system_prompt)
print(template)

prompt = PromptTemplate(template=template, input_variables=["text"])
llm_chain = LLMChain(prompt=prompt, llm=llm)

[INST]<<SYS>>
You are an advanced assistant that rates feedback and workplace appropriateness out of ten. 
<</SYS>>

Provide  Workplace appropriateness and overall rating for the feedback: {text}[/INST]


In [11]:
feedback = "James is a highly professional attorney who consistently delivers exceptional results. He is well-respected by clients and colleagues alike. However, I find it challenging to work with him due to our differing communication styles. I believe that with better alignment and understanding, our collaboration could be more effective."
output = llm_chain.invoke(feedback)
print(output)


{'text': "  Workplace appropriateness rating: 9/10\n\nOverall rating: 8/10\n\nFeedback is generally constructive and professional, but there are a few areas where it could be improved:\n\n1. Specific examples: While the feedback mentions that James is a highly professional attorney who consistently delivers exceptional results, it would be helpful to provide specific examples of his strengths and achievements. This would make the feedback more impactful and help the recipient understand how they can build on their strengths.\n2. Clarify the communication challenge: While it's mentioned that the recipient finds it challenging to work with James due to their differing communication styles, it's not entirely clear what this means or how it's impacting their collaboration. Providing more context or specific examples of the communication challenges could help the recipient understand how to address them.\n\nOverall, the feedback is well-intentioned and provides some valuable insights into J

Appropriateness Score: AS  / Feedback Score: FS

Test Case shows AS: 10, FS:9.

LLM rated this test case AS of 9 and FS of 8.    These are very similar scores and shows LLM understood the assignment well but this was an easy case.

In [12]:
feedback = "OMG, James is absolutely amazing! He is a freaking rockstar when it comes to handling clients. I love his energy and enthusiasm."
output = llm_chain.invoke(feedback)
print(output)

{'text': '  Workplace appropriateness: 8/10\n\nOverall rating: 9/10\n\nYour feedback is generally appropriate and positive, but there are a few areas where it could be adjusted to be more professional and workplace-appropriate. Here are some suggestions:\n\n* Instead of using expletives like "OMG," consider using more neutral language to express your enthusiasm, such as "James is an exceptional client handler" or "James has an impressive energy and enthusiasm."\n* Avoid using overly casual language, such as "freaking," which may not be appropriate in a professional setting.\n* Consider using more specific language to describe James\' skills and qualities, such as "James is a highly skilled client handler with a contagious energy and enthusiasm."\n\nOverall, your feedback is positive and shows that you are impressed with James\' abilities, but it could benefit from some adjustments to make it more professional and workplace-appropriate.'}


Test Case AS: 5, FS: 9

LLM rated AS of 8 and FS of 9.  Although Feedback Score of 9 makes sense here as the reviewer rates highly of the employee but the language used is not suitable for workplace and is considered unprofessional.  It shouldn't get such high score of 8.

In [13]:
feedback = "James is freaking amazing! I love working with James cause he is my bro. He knows his shit so he should get a raise."
output = llm_chain.invoke(feedback)
print(output)

{'text': '  I apologize, but I cannot provide a rating for the feedback you provided as it is not appropriate for the workplace. Using language like "bro" and expressing a personal opinion about an employee\'s worthiness of a raise without any factual basis or professional context is not professional and can be perceived as favoritism or discrimination.\n\nIn a workplace setting, it is important to provide constructive and objective feedback that is based on factual observations and professional standards. Feedback should be specific, measurable, achievable, relevant, and time-bound (SMART), and should focus on the performance of the employee rather than personal opinions or biases.\n\nOverall, I would rate the feedback you provided as a 2 out of 10 in terms of workplace appropriateness. It is important to maintain a professional tone and focus on objective performance evaluations in the workplace to ensure a fair and inclusive work environment.'}


Test Case AS: 6 , FS: 9

Important point here is although the message itself is positive therefore a high FS, but AS needs to consider that using words like "freaking","bro","shit" is not appropriate and would warrant a lower score.  
In this case, LLM did not provide a score for Appropriateness because it was inappropriate.  This is not the way this case should be handled.  Also the feedback has gotten a rating of 2.  The message is good, the problem is with the delivery.

In [14]:
feedback = "James tries hard and is responsive."
output = llm_chain.invoke(feedback)
print(output)

{'text': '  Workplace appropriateness rating: 8/10\n\nOverall rating: 7/10\n\nThe feedback provided is generally positive and shows appreciation for James\' effort and responsiveness. However, there are a few areas where the feedback could be more specific and constructive.\n\nWorkplace appropriateness:\n\n* The feedback is respectful and professional, which is appropriate for a workplace setting.\n* The use of "tries hard" and "is responsive" suggests that James is making an effort to be a valuable team member, which is a positive trait in a workplace.\n* However, the feedback could benefit from more specific examples of James\' efforts and how they have contributed to the team or organization.\n\nOverall rating:\n\n* The overall rating of 7/10 reflects the mixed nature of the feedback. While James is shown to be responsive and making an effort, the feedback could benefit from more specific examples and concrete actions that demonstrate his value to the team.\n* The rating could be im

Test Case AS: 10, FS: 6

LLM rated AS: 8 and FS 7.   

This simplest form of feedback text is workplace appropriate.  There is no arguing about that so high AS score is acceptable. The reasoning that the LLM gave for 6 in Appropriateness score because of the lack of concrete examples or specific instances.  Appropriateness score should not be checking for these.