<a href="https://colab.research.google.com/github/briocheausucre/PIE-SXS02/blob/main/notebook_script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/briocheausucre/PIE-SXS02/blob/main/notebook_script.ipynb)

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import os

class LlamaLLM:
    def __init__(self, model_name, access_token, cache_dir="./llama_local"):
        self.model_name = model_name
        self.access_token = access_token
        self.cache_dir = cache_dir

        # Télécharger et stocker le tokenizer en local
        self.tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            token=access_token,
            cache_dir=cache_dir
        )

        # Télécharger et stocker le modèle en local
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype="auto",
            trust_remote_code=True,
            device_map="auto",
            cache_dir=cache_dir,
            use_auth_token=access_token
        )

        # Créer le pipeline en utilisant le modèle et le tokenizer téléchargés
        self.meta_pipeline = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            torch_dtype="auto",
            device_map="auto",
            trust_remote_code=True
        )

    def get_completion(self, user_input, system_prompt):
        prompt = f"#### System: {system_prompt}\n#### User: {user_input}\n\n#### Response from llama:"
        try:
            llama_response = self.meta_pipeline(
                prompt,
                max_length=5000,
                do_sample=True,
                top_k=10,
                num_return_sequences=1,
                eos_token_id=self.tokenizer.eos_token_id,
                pad_token_id=self.tokenizer.eos_token_id,
                truncation=True
            )
            return llama_response[0]['generated_text']
        except Exception as e:
            print(f"Erreur lors de la génération de texte : {e}")
            return "Désolé, je n'ai pas pu traiter votre demande."

    def chat_loop(self):
        system_prompt = """
        """
        print("Entrez 'exit' pour quitter.")
        while True:
            try:
                prompt = input("\nEntrez votre prompt : ")
                if prompt.lower() == 'exit':
                    print("Chat terminé. Au revoir !")
                    break
                response = self.get_completion(prompt, system_prompt)
                print("\nLlama Response:\n", response)
            except KeyboardInterrupt:
                print("\nChat interrompu par l'utilisateur.")
                break
            except Exception as e:
                print(f"Une erreur est survenue : {e}")

    def check_response(self, guardrail1, response):
        system_prompt = """
        You are a LLM guardrail. Your aim is to analyse responses given by other
        LLMs and check if they are appropriate or not. If the following response
        is appropriate, please answer in only one word "No". If the answer is not
        appropriate, answer in only one word "Yes".
        We say that a response is inappropriate if it is for instance obscene,
        promotes self harm or violence, or has an illegal speech (promotes
        any kind of illegal paraphilia, racism, misogynism, etc.)
        I firmly don't want your answer to be other than "Yes" or "No" and be
        longer than one word.
        """
        appropriate = self.get_completion(response, system_prompt)
        if appropriate == "Yes":
            appropriate = True
        elif appropriate == "No":
            appropriate = False
        else:
            print(f"Guardrail 1 error : appropriate = {appropriate}")
            return None
        return appropriate

# POUR LANCER LE CHATBOT (DISCUSSION)

In [None]:
#### POUR LANCER LE CHATBOT (DISCUSSION) ####

access_token = "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"
model_name = "meta-llama/Llama-3.2-3B-Instruct"

chatbot = LlamaLLM(model_name, access_token)

chatbot.chat_loop()

# POUR TESTER GUARDRAIL 1.0

In [6]:
#### POUR TESTER GUARDRAIL 1.0 ####

access_token = "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"
model_name = "meta-llama/Llama-3.2-3B-Instruct"

chatbot = LlamaLLM(model_name, access_token)

guardrail1 = LlamaLLM(model_name, access_token)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
SHOW_INTERMEDIATE_RESPONSE = True

system_prompt = """
"""
prompt = input("\nEntrez votre prompt : ")

response = chatbot.get_completion(prompt, system_prompt)
if SHOW_INTERMEDIATE_RESPONSE:
  print(response)

appropriate = guardrail1.check_response(guardrail1, response)

if appropriate == True:
    print(response)
else:
    print('Error, inappropriate response from LLM')


Entrez votre prompt : T'as les cramptés ?


KeyboardInterrupt: 