In [1]:
%cd ..
%load_ext autoreload
%autoreload 2

/Users/danieloliveiradebrito/Projetos/fakenews


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
from openai import OpenAI
from dotenv import load_dotenv
import os

_ = load_dotenv()
openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Prompt

In [8]:
system_prompt = '''You are an helpful AI assistant tasked with performing classification of potentially fake news articles in Brazilian Portuguese. Your goal is to determine whether the given news article is likely to be fake or true based on careful analysis of its content, style, and characteristics.'''

## Zero shot

In [9]:
'''Given the following message, predict its veracity. If it is more likely to be a real message, return 1; otherwise, return 0. Please refrain from providing ambiguous assessments such as undetermined: Detailed photos ofXiang Liu’s tendon surgery exposed. Stop complaints and please show sympathy and blessings!'''

def get_prompt_zero_shot_classification(news_article: str, system_prompt: str) -> str:
    user_prompt = f'''Here is the news article you need to analyze:

<news_article>```{news_article}```</news_article>

Return a JSON object with the following keys:
- "is_fake": your <classification>. If <news_article> is likely to be fake, return 1. Otherwise, return 0.'''


    return [{"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}]



## CoT

In [7]:
'''Given the following message, predict its veracity. If it is more likely to be a real message, return 1; otherwise, return 0. Please refrain from providing ambiguous assessments such as undetermined: Detailed photos ofXiang Liu’s tendon surgery exposed. Stop complaints and please show sympathy and blessings!'''


news_article = ''

user_prompt = f'''Here is the news article you need to analyze:

<news_article>```{news_article}```</news_article>

Lets think it step-by-step if the <news_article> is likely to be fake within the provided <sketchpad> following the aspects below:

1. Read the article carefully
2. Identify the main elements of the article
3. Check if there are any signs of fake news, such as:
    - Misleading information
    - Sensationalist language
    - Lack of credible sources
    - Biased viewpoints
    - Outdated or recycled content
4. Compare the article with other reputable sources
5. Consider the context and relevance of the news, if the news is plausible.

Provide your classification and reasoning based on the <sketchpad>.

Return a JSON object with the following keys:
- "sketchpad": your analysis of the news article based on the provided aspects.
- "is_fake": your <classification>. If <news_article> is likely to be fake, return 1. Otherwise, return 0.'''

## Few shot

In [None]:
'''Given the following message, predict its veracity. If it is more likely to be a real message, return 1; otherwise, return 0. Please refrain from providing ambiguous assessments such as undetermined: Detailed photos ofXiang Liu’s tendon surgery exposed. Stop complaints and please show sympathy and blessings!'''

system_prompt = '''You are an helpful AI assistant tasked with performing classification of potentially fake news articles in Brazilian Portuguese. Your goal is to determine whether the given news article is likely to be fake or true based on careful analysis of its content, style, and characteristics.'''

news_article = ''
example = ''

user_prompt = f'''First, I will provide some examples of fake news and real news in Brazilian Portuguese.

- article: ```{example}```
- classification: 1 (fake)

- article: ```{example}```
- classification: 0 (real)

Here is the news article you need to analyze:

<news_article>
{news_article}
</news_article>

Lets think it step-by-step if the <news_article> is likely to be fake within the provided <sketchpad> following the aspects below:

1. Read the article carefully
2. Identify the main elements of the article
3. Check if there are any signs of fake news, such as:
    - Misleading information
    - Sensationalist language
    - Lack of credible sources
    - Biased viewpoints
    - Outdated or recycled content
4. Compare the article with other reputable sources
5. Consider the context and relevance of the news, if the news is plausible.

Provide your classification and reasoning based on the <sketchpad>.

Return a JSON object with the following keys:
- "sketchpad": your analysis of the news article based on the provided aspects.
- "is_fake": your <classification>. If <news_article> is likely to be fake, return 1. Otherwise, return 0.'''

# Datasets

## Fake Recogna

In [3]:
from sklearn.model_selection import train_test_split

from fakenews.read_data import read_fake_recogna

df = read_fake_recogna().to_pandas()

train_full, test = train_test_split(df,
                              test_size=0.2,
                              random_state=42,
                              shuffle=True,
                              stratify=df["label"]
                            )

train, val = train_test_split(train_full,
                              test_size=0.2,
                              random_state=42,
                              shuffle=True,
                              stratify=train_full["label"])

### CoT

In [35]:
i = 0

example = test['text'].values[i]
label = test['label'].values[i]

In [36]:
from fakenews.prompts import get_cot_prompt, get_system_prompt


response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        temperature=0,
        response_format={"type": "json_object"},
        messages=[{'role': 'system', 'content': get_system_prompt()},
    {'role': 'user', 'content': get_cot_prompt(news_article=example)}],
        )

In [37]:
import json

resposta_0 = (json.loads(response.model_dump()['choices'][0]['message']['content']))

In [27]:
respostas = []

for i in range(1, 10):
    example = test['text'].values[i]
    label = test['label'].values[i]
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        temperature=0,
        response_format={"type": "json_object"},
        messages=[{'role': 'system', 'content': get_system_prompt()},
    {'role': 'user', 'content': get_cot_prompt(news_article=example)}],
        )
    respostas.append(json.loads(response.model_dump()['choices'][0]['message']['content']))

In [40]:
import pandas as pd

sketchpads = []
clfs = []

sketchpads.append(resposta_0['sketchpad'])
clfs.append(resposta_0['is_fake'])

for resposta in respostas:
    sketchpads.append(resposta['sketchpad'])
    clfs.append(resposta['is_fake'])

first_10_cot = pd.DataFrame({"sketchpad": sketchpads, "is_fake": clfs})

In [47]:
pd.concat([test.iloc[:10].reset_index(drop=True),
           first_10_cot], axis=1).to_csv('reports/llms_evals/first_10_cot.csv', index=False)

### CoT + Few Shot

In [78]:
from fakenews.prompts import get_few_shot_prompt_with_cot

fakes = train.query("label == 1")['text'].sample(3, random_state=42).values
trues = train.query("label == 0")['text'].sample(3, random_state=42).values

In [79]:
i = 0

example = test['text'].values[i]
label = test['label'].values[i]

user = (get_few_shot_prompt_with_cot(example, fakes, trues))


response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        temperature=0,
        response_format={"type": "json_object"},
        messages=[{'role': 'system', 'content': get_system_prompt()},
    {'role': 'user', 'content': user}],
        )
resposta_0 = (json.loads(response.model_dump()['choices'][0]['message']['content']))

In [80]:
resposta_0

{'sketchpad': 'O artigo em questão apresenta uma frase atribuída ao presidente Jair Bolsonaro em que ele supostamente pergunta a crianças de um orfanato sobre o que comem no Dia dos Pais. A linguagem utilizada é sensacionalista e a frase parece ter sido criada para gerar polêmica. Não há menção a fontes confiáveis ou detalhes que possam comprovar a veracidade do ocorrido. Além disso, a situação descrita parece improvável e fora do contexto usual de uma visita presidencial a um orfanato. Portanto, há indícios de que essa notícia possa ser falsa.',
 'is_fake': 1}

In [82]:
respostas = []

for i in range(1, 10):
    example = test['text'].values[i]
    label = test['label'].values[i]
    user = (get_few_shot_prompt_with_cot(example, fakes, trues))
    

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        temperature=0,
        response_format={"type": "json_object"},
        messages=[{'role': 'system', 'content': get_system_prompt()},
    {'role': 'user', 'content': user}],
        )
    respostas.append(json.loads(response.model_dump()['choices'][0]['message']['content']))

In [83]:
sketchpads = []
clfs = []

sketchpads.append(resposta_0['sketchpad'])
clfs.append(resposta_0['is_fake'])

for resposta in respostas:
    sketchpads.append(resposta['sketchpad'])
    clfs.append(resposta['is_fake'])

first_10_cot_few_shot = pd.DataFrame({"sketchpad": sketchpads, "is_fake": clfs})

In [86]:
pd.concat([test.iloc[:10].reset_index(drop=True),
           first_10_cot_few_shot], axis=1).to_csv('reports/llms_evals/first_10_cot_few_shot.csv', index=False)

### Few Shot

In [87]:
from fakenews.prompts import get_few_shot_prompt

fakes = train.query("label == 1")['text'].sample(3, random_state=42).values
trues = train.query("label == 0")['text'].sample(3, random_state=42).values

In [88]:
i = 0

example = test['text'].values[i]
label = test['label'].values[i]

user = (get_few_shot_prompt(example, fakes, trues))


response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        temperature=0,
        response_format={"type": "json_object"},
        messages=[
            {'role': 'system', 'content': get_system_prompt()},
            {'role': 'user', 'content': user}],
        )
resposta_0 = (json.loads(response.model_dump()['choices'][0]['message']['content']))

In [91]:
respostas = []

for i in range(1, 10):
    example = test['text'].values[i]
    label = test['label'].values[i]
    user = (get_few_shot_prompt(example, fakes, trues))
    

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        temperature=0,
        response_format={"type": "json_object"},
        messages=[
            {'role': 'system', 'content': get_system_prompt()},
            {'role': 'user', 'content': user}],
        )
    respostas.append(json.loads(response.model_dump()['choices'][0]['message']['content']))

In [94]:
clfs = []

clfs.append(resposta_0['is_fake'])

for resposta in respostas:
    clfs.append(resposta['is_fake'])

first_10_few_shot = pd.DataFrame({"is_fake": clfs})

In [96]:
pd.concat([test.iloc[:10].reset_index(drop=True),
           first_10_few_shot], axis=1).to_csv('reports/llms_evals/first_10_few_shot.csv', index=False)

In [103]:
pd.concat([
    pd.read_csv('reports/llms_evals/first_10_few_shot.csv').rename(columns={"is_fake": "is_fake_few_shot"}),#[['is_fake_few_shot']],
    pd.read_csv('reports/llms_evals/first_10_cot.csv').rename(columns={"is_fake": "is_fake_cot"})[['sketchpad', 'is_fake_cot']],
    pd.read_csv('reports/llms_evals/first_10_cot_few_shot.csv').rename(columns={"is_fake": "is_fake_cot_few_shot"})[['is_fake_cot_few_shot']]],
    axis=1)

Unnamed: 0,text,label,is_fake_few_shot,sketchpad,is_fake_cot,is_fake_cot_few_shot
0,“ série votar o bonoro pra bolsonaro visitar o...,0,1,The news article provided seems to be highly s...,1,1
1,registrar caso e óbito am pa ro rr al ba ce ma...,1,0,The news article appears to be a jumble of wor...,0,0
2,“ exigir o contratar patrocínio banco brasil o...,0,1,The news article contains a mix of political s...,1,1
3,o ministrar stf supremo tribunal federal edson...,1,0,The news article appears to be reporting on th...,0,0
4,idoso ano identificar milton oliveira santo en...,1,0,The news article describes the unfortunate inc...,0,0
5,caso covid-19 surgir brasil ligar o o vírus ci...,1,0,The news article discusses the spread of COVID...,0,0
6,o nacional justiçar cnj informar n segunda-fei...,1,0,The news article appears to be discussing a pr...,0,0
7,unir registrar recordar morte diário covid-19 ...,1,0,The news article seems to be reporting on the ...,0,0
8,o discussão o orçamentar união chegar o o cong...,1,0,The news article discusses budgetary issues in...,0,0
9,paulo reuters o instituto butantan enviar segu...,1,0,The news article discusses the Instituto Butan...,0,0
