# demo


## Installations and imports

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import time

# Bắt đầu tính thời gian
start_time = time.time()

import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

# Kết thúc tính thời gian
end_time = time.time()
print(f"Time to import libraries: {end_time - start_time:.2f} seconds")


Detected accelerate version: 0.34.2
Detected bitsandbytes version: 0.44.1
Detected datasets version: 3.0.1
Detected jinja2 version: 2.11.3
Detected nltk version: 3.7
Detected pandas version: 1.4.2
Detected peft version: 0.13.0
Detected psutil version: 5.8.0
Detected pytest version: 7.1.1
Detected safetensors version: 0.4.5
Detected scipy version: 1.7.3
Detected tokenizers version: 0.19.1
Detected torchvision version: 0.15.2+cu117
Detected torch version: 2.1.2+cu118
Detected PIL version 9.0.1


_default_log_level: 10


Detected rich version: 13.9.1


Time to import libraries: 4.65 seconds


## Preparing the data and the core evaluation functions

In [18]:


def generate_prompt(data_point):
    return f"""
            Analyze the sentiment of the news headline enclosed in square brackets, 
            determine if it is positive, neutral, or negative, and return the answer as 
            the corresponding sentiment label "positive" or "neutral" or "negative".

            [{data_point["text"]}] = {data_point["sentiment"]}
            """.strip()

def generate_test_prompt(data_point):
    return f"""
            Analyze the sentiment of the news headline enclosed in square brackets, 
            determine if it is positive, neutral, or negative, and return the answer as 
            the corresponding sentiment label "positive" or "neutral" or "negative".

            [{data_point}] = """.strip()



## predict function

In [13]:
def predict(X_test, model, tokenizer):
    y_pred = []
    # for i in tqdm(range(len(X_test))):
        # prompt = X_test.iloc[i]["text"]
    prompt = generate_test_prompt(X_test)
    # print(prompt)
    pipe = pipeline(task="text-generation", 
                    model=model, 
                    tokenizer=tokenizer, 
                    max_new_tokens = 1, 
                    temperature = 0.0,
                    )
    result = pipe(prompt)
    answer = result[0]['generated_text'].split("=")[-1]
    if "positive" in answer:
        y_pred.append("positive")
    elif "negative" in answer:
        y_pred.append("negative")
    elif "neutral" in answer:
        y_pred.append("neutral")
    else:
        y_pred.append("none")
    return y_pred

## Load all the model

In [9]:
import time
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

def load_model_and_tokenizer(model_path: str, max_seq_length: int = 512):
    # Bắt đầu tính thời gian
    start_time = time.time()

    compute_dtype = getattr(torch, "float16")

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=False,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=compute_dtype,
    )

    device_map = {
        "": "cuda",  # Đặt phần lớn trên CPU
        "transformer.h": "cuda",  # Đặt phần mô hình trên GPU
    }
        
    # Tải mô hình
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map=device_map,
        # device_map='auto',
        torch_dtype=compute_dtype,
        quantization_config=bnb_config, 
    )
    model.config.use_cache = False
    model.config.pretraining_tp = 1

    # Tải tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path, max_seq_length=max_seq_length)
    tokenizer.pad_token_id = tokenizer.eos_token_id

    # Kết thúc tính thời gian
    end_time = time.time()
    execution_time = end_time - start_time

    print(f"Execution time: {execution_time:.2f} seconds")
    return model, tokenizer

# Sử dụng hàm
# Sử dụng hàm cho các mô hình khác nhau
model_name_gemma_2 = "E:\\gemma\\trained_weigths_2b"
model_name_gemma_7 = "E:\\gemma\\trained_weigths"
model_name_llama3_8 = "E:\\llama\\trained_weigths"  # Đường dẫn cho mô hình llama8
model_name_phi3 = "E:\\phi3"  # Đường dẫn cho mô hình phi3

model_gemma_7, tokenizer_gemma_7 = load_model_and_tokenizer(model_name_gemma_7)
model_gemma_2, tokenizer_gemma_2 = load_model_and_tokenizer(model_name_gemma_2)
model_llama3_8, tokenizer_llama3_8 = load_model_and_tokenizer(model_name_llama3_8)
# phi3_model, tokenizer_phi3 = load_model_and_tokenizer(model_name_phi3)


loading configuration file E:\gemma\gemma-transformers-1.1-7b-it-v1\config.json
Model config GemmaConfig {
  "_name_or_path": "E:\\gemma\\gemma-transformers-1.1-7b-it-v1",
  "architectures": [
    "GemmaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 2,
  "eos_token_id": 1,
  "head_dim": 256,
  "hidden_act": "gelu_pytorch_tanh",
  "hidden_activation": "gelu_pytorch_tanh",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 24576,
  "max_position_embeddings": 8192,
  "model_type": "gemma",
  "num_attention_heads": 16,
  "num_hidden_layers": 28,
  "num_key_value_heads": 16,
  "pad_token_id": 0,
  "rms_norm_eps": 1e-06,
  "rope_theta": 10000.0,
  "torch_dtype": "float16",
  "transformers_version": "4.40.0",
  "use_cache": true,
  "vocab_size": 256000
}

loading weights file E:\gemma\gemma-transformers-1.1-7b-it-v1\model.safetensors.index.json
Instantiating GemmaForCausalLM model under default dtype torch.float16.
Generate c

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing GemmaForCausalLM.

All the weights of GemmaForCausalLM were initialized from the model checkpoint at E:\gemma\gemma-transformers-1.1-7b-it-v1.
If your task is similar to the task the model of the checkpoint was trained on, you can already use GemmaForCausalLM for predictions without further training.
loading configuration file E:\gemma\gemma-transformers-1.1-7b-it-v1\generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 2,
  "eos_token_id": 1,
  "pad_token_id": 0
}

loading file tokenizer.model
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file E:\gemma\gemma-transformers-2b-it-v3\config.json
Model config GemmaConfig {
  "_name_or_path": "E:\\gemma\\gemma-transformers-2b-it-v3",
  "architectures": [
    "GemmaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 2,
 

Execution time: 26.53 seconds


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing GemmaForCausalLM.

All the weights of GemmaForCausalLM were initialized from the model checkpoint at E:\gemma\gemma-transformers-2b-it-v3.
If your task is similar to the task the model of the checkpoint was trained on, you can already use GemmaForCausalLM for predictions without further training.
loading configuration file E:\gemma\gemma-transformers-2b-it-v3\generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 2,
  "eos_token_id": 1,
  "pad_token_id": 0
}

loading file tokenizer.model
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file E:\llama\llama-3-transformers-8b-chat-hf-v1\config.json
Model config LlamaConfig {
  "_name_or_path": "E:\\llama\\llama-3-transformers-8b-chat-hf-v1",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id"

Execution time: 8.22 seconds


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing LlamaForCausalLM.

All the weights of LlamaForCausalLM were initialized from the model checkpoint at E:\llama\llama-3-transformers-8b-chat-hf-v1.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
loading configuration file E:\llama\llama-3-transformers-8b-chat-hf-v1\generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 128000,
  "eos_token_id": 128001
}

loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Execution time: 19.61 seconds


## test 

In [14]:
X_test1= "According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing ."



In [15]:
# Sử dụng hàm predict cho các mô hình khác nhau
y_pred_gemma_2 = predict(X_test1, model_gemma_2, tokenizer_gemma_2)
y_pred_gemma_7 = predict(X_test1, model_gemma_7, tokenizer_gemma_7)
y_pred_llama3_8 = predict(X_test1, model_llama3_8, tokenizer_llama3_8)
# y_pred_phi3 = predict(X_test1, phi3_model, tokenizer_phi3)

# In kết quả dự đoán
print("Predictions for model_gemma_2:", y_pred_gemma_2)
print("Predictions for model_gemma_7:", y_pred_gemma_7)
print("Predictions for model_llama3_8:", y_pred_llama3_8)
# print("Predictions for model_phi3:", y_pred_phi3)

Predictions for model_gemma_2: ['neutral']
Predictions for model_gemma_7: ['neutral']
Predictions for model_llama3_8: ['neutral']


## GUI

In [24]:
import tkinter as tk

def run_predictions():
    input_data = input_text.get("1.0", tk.END).strip()  # Get multi-line input
    
    if not input_data:
        output_text.delete(1.0, tk.END)
        output_text.insert(tk.END, "Please enter some input data.")
        return

    try:
        X_test1 = [input_data]
        y_pred_gemma_2 = predict(X_test1, model_gemma_2, tokenizer_gemma_2)
        y_pred_gemma_7 = predict(X_test1, model_gemma_7, tokenizer_gemma_7)
        y_pred_llama3_8 = predict(X_test1, model_llama3_8, tokenizer_llama3_8)

        results = (
            f"Predictions for model_gemma_2: {y_pred_gemma_2}\n"
            f"Predictions for model_gemma_7: {y_pred_gemma_7}\n"
            f"Predictions for model_llama3_8: {y_pred_llama3_8}\n"
        )
    except Exception as e:
        results = f"An error occurred: {str(e)}"
    
    output_text.delete(1.0, tk.END)
    output_text.insert(tk.END, results)

# Create the main window
root = tk.Tk()
root.title("Model Prediction GUI")

# Create the interface
tk.Label(root, text="Enter input data:").pack(pady=10)

# Use a Text widget for larger input area
input_text = tk.Text(root, height=5, width=60)  # Increase height and width
input_text.pack(pady=10)

predict_button = tk.Button(root, text="Run Predictions", command=run_predictions)
predict_button.pack(pady=20)

# Text widget to display results
output_text = tk.Text(root, height=10, width=60)
output_text.pack(pady=10)

# Start the main loop of the GUI
root.mainloop()
