# Fine-tune Llama 2 in Google Colab



In [None]:
!pip install -q accelerate==0.21.0 bitsandbytes==0.40.2 transformers==4.31.0 xformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.8/211.8 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from PIL import Image

In [None]:
image_to_text = pipeline(
   "image-to-text",
   model="nlpconnect/vit-gpt2-image-captioning"
)



# Funcion para tranforma una image a texto
# la idea es pasar la imagen a texto
# y junto con el texto de la noticia
# pasarla como input al LLM para obtener una
# respuesta en lenguaje natural
def get_image_description(image_url):
    i_image = Image.open(image_url)
    if i_image.mode != "RGB":
      i_image = i_image.convert(mode="RGB")

    # Ojo el modelo recibe un Image de PIL
    # con los datos que se cargan con streamlit
    # hay que crear este input, pero debe
    # funcionar igual que el modelo para clasificar las imagenes
    r = image_to_text(i_image)[0]
    return r['generated_text']

In [None]:
# Codigo para cargar el LLM (LLama 2)


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=getattr(torch, 'float16'),
    bnb_4bit_use_double_quant=False,
)

model_name = "NousResearch/Llama-2-7b-chat-hf"

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map= {"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"


pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens= 200)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Funcion para crear la respuesta en lenguaje natural

prompt_base = """Give a natural language response of the results \
and try to explain to me with a lot of details the reason for the classification. \n\n"""

def nlp_answer(p: int, text = None, image = None):
  assert text or image, "Text or/and Image are requiered"

  prompt = prompt_base

  if text:
    prompt += f"News' Text: {text}\n"
  if image:
    prompt += f"News' Image Description: {image}\n"


  if p > 70:
    final_prompt = f'The news is fake with a {p}% probability, beacuse'
  elif p < 30:
    final_prompt = f'The news is real with a {100-p}% probability, beacuse'
  else:
    final_prompt = 'There is not much certainty about the classification of the news item, beacuse'

  prompt += 'Result and Explanation: ' + final_prompt


  r = pipe(prompt, return_full_text=False, temperature=0.8)
  result = final_prompt + r[0]['generated_text']
  result = result.split('\n')[0]

  return result

# Ejemplos

In [None]:
text = 'rainbow faucet selftitled.'
image = 'a candle is lit in a car window.'

In [None]:
print(nlp_answer(80, text, image))
print()
print()

The news is fake with a 80% probability, beacuse the text and image do not match. The text is about a rainbow faucet, but the image shows a candle being lit in a car window. This is a common technique used by fake news creators to make their content appear more believable.


In [None]:
print(nlp_answer(50, text, image))
print()
print()

There is not much certainty about the classification of the news item, beacuse the text and image do not provide any clear indication of the topic. The text could be about a new product, a creative expression, or a metaphor. The image could be a representation of a car ride, a candle, or a window. Therefore, the classification is "Undetermined".


In [None]:
print(nlp_answer(10, image, image))
print()
print()

The news is real with a 90% probability, beacuse the image description is very detailed and the image is clear.




In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens= 200)

In [None]:
prompt = """I want to label each token in the following sentences with the three classic labels of the named entity recognition task (Outer (O), Inside (I), Begin (B)). I want to tag skills for the labor market from job offers and job resumes

Sentence: • Collaborate with Tech partners to design and deploy Machine Learning services that can be integrated with strategic systems .
Labels: ['O', 'B', 'I', 'I', 'I', 'O', 'B', 'I', 'I', 'I', 'I', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] [END]

Sentence: The role will be in the firm's Applied AI and Machine Learning organization and will involve working closely with Digital & Platform Services Operations .
Labels:  """

len(prompt)

660

In [None]:
r = pipe(prompt, return_full_text=False, stop_sequence = "[END]")
print(r[0]['generated_text'])

 ['O', 'B', 'I', 'I', 'I', 'O', 'B', 'I', 'I', 'I', 'I', 'I', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] [


In [None]:
prompt = """Give a natural language response of the results and try to explain to me with a lot of details the reason for the classification.

## News' Description:
Title: rainbow faucet selftitled.
News' Image Description: a candle is lit in a car window.

## Answer:
The news is not a 50% fake news because"""

In [None]:
r = pipe(prompt, return_full_text=False, temperature=0.8)
print(r[0]['generated_text'])

 the image is not a real image of a rainbow faucet. The image is actually a stock photo of a candle lit in a car window. The image is not a real news article and is not a factual report of any kind. Therefore, it is a 100% fake news article.

## Reason: 
The image is not a real image of a rainbow faucet, as there is no such thing as a faucet that produces rainbows. The image is actually a stock photo of a candle lit in a car window, which is a common image used in various contexts such as advertising, social media, and news articles. The image is not a real news article and is not a factual report of any kind. Therefore, it is a 100% fake news article.
