In [1]:
import torch
import emoji as emj
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain_core.output_parsers import StrOutputParser

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
GPU_MODE = True

In [3]:
model_name = "HuggingFaceH4/zephyr-7b-beta"
new_prompt = "You are a specialized AI that only converts text into emojis. You must respond with emojis only and nothing else. Do not include any explanations, descriptions, or additional text."
old_prompt = "You are an AI specialized in converting text into emojis. Your job is to respond *only* with emojis that best represent the input text. Do not include any words, explanations, or symbols apart from emojis."
if GPU_MODE is True:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config = bnb_config)
else:
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")

tokenizer = AutoTokenizer.from_pretrained(model_name)
text_genaration_pipeline = pipeline(
    model = model,
    tokenizer = tokenizer,
    task = "text-generation",
    temperature = 0.2,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=20,
)
llm = HuggingFacePipeline(pipeline=text_genaration_pipeline)
prompt_template = """
<|system|>
You are a specialized AI that only converts text into emojis. You must respond with emojis only and nothing else. Do not include any explanations, descriptions, or additional text.
</s>
<|user|>
{question}
</s>
<|assistant|>
"""
prompt = PromptTemplate(
     input_variables = ["question"],
     template=prompt_template
 )
llm_chain = prompt | llm | StrOutputParser()

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 8/8 [00:54<00:00,  6.82s/it]
  llm = HuggingFacePipeline(pipeline=text_genaration_pipeline)


In [4]:
def text_to_emoji(sentence):
    pure_text = llm_chain.invoke({"question":sentence})
    return ''.join(c for c in pure_text.split("\n")[0] if emj.is_emoji(c))

In [None]:
lines = 10
with open("./data/relevant_data.txt", 'r', encoding='utf-8') as file:
    content = [file.readline().strip() for _ in range(lines)]
print(content)

["However, a neighbour can complain to local authority about your hedge, if its height 'reduces the amenity value' of their garden, by reducing light to windows (not an issue, the gardens are too long) or denying them winter sunshine.", 'The experiments ended in 1965.', 'The systems technology and operation is fantastic, it will have a huge impact on water conservation.', 'The two countries are locked in a dispute over U.S. allegations that China steals U.S. technology and forces U.S. companies to share trade secrets in exchange for access to the Chinese market.', 'Should You Be a CSP?', 'Our mission is to send a message to kids everywhere about the importance of staying in school and loving it!', 'Creating a memory with loved ones?', 'Children are killed and disfigured on our roads every day, and every day we fail to stop the slaughter.', 'In the countryside of Ghana, in Ecuador, and in the Philippine Islands, countless people bring the produce of their farms and their handicrafts to 

In [6]:
for sentence in content:
    emoji = text_to_emoji(sentence)
    print(f"{sentence} - {emoji}")

  attn_output = torch.nn.functional.scaled_dot_product_attention(
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


However, a neighbour can complain to local authority about your hedge, if its height 'reduces the amenity value' of their garden, by reducing light to windows (not an issue, the gardens are too long) or denying them winter sunshine. - 🌳❌🙅🏻♂
The experiments ended in 1965. - 🔬📅⏳🚫
The systems technology and operation is fantastic, it will have a huge impact on water conservation. - 💧🔧🤩🌎💡
The two countries are locked in a dispute over U.S. allegations that China steals U.S. technology and forces U.S. companies to share trade secrets in exchange for access to the Chinese market. - 🤝🔎🛑
Should You Be a CSP? - 💻📊🔑❗
Our mission is to send a message to kids everywhere about the importance of staying in school and loving it! - 📝💭🧠🎓😍
Creating a memory with loved ones? - 📸👨👩
Children are killed and disfigured on our roads every day, and every day we fail to stop the slaughter. - 🚗🚶♂🚶♀
In the countryside of Ghana, in Ecuador, and in the Philippine Islands, countless people bring the produce of their

In [7]:
# result = llm_chain.invoke({"context":"Please give your answers strictly according to the following template and do not provide any additional information:\n<sentence in text> - <emoji translation>", "question":"generate random sentence and it's emoji variant"})
# pure_text = result.split("<|assistant|>")[1].strip()
# print(pure_text)

In [8]:
# for text in pure_text:
#     sent, emoji = text.split("-")