<a href="https://colab.research.google.com/github/fjadidi2001/fake_news_detection/blob/main/BertGnn_Apr1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The BERT model was proposed in BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. It’s a bidirectional transformer pretrained using a combination of masked language modeling objective and next sentence prediction on a large corpus comprising the Toronto Book Corpus and Wikipedia.



In [None]:
import pandas as pd
import numpy as np
import torch
from transformers import BertTokenizer, BertModel
from tensorflow.keras import layers, Model
import tensorflow as tf
from google.colab import drive
drive.mount('/content/drive/')
# Load and preprocess the data
df = pd.read_csv('/content/drive/MyDrive/Projects/Hayat/facebook-fact-check.csv', encoding='latin-1')

# Text preprocessing for BERT
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_len = 100

def encode_texts(texts):
    return tokenizer(
        texts.tolist(),
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

text_encodings = encode_texts(df['Context Post'].fillna(''))
input_ids = text_encodings['input_ids']
attention_mask = text_encodings['attention_mask']

# Social features preprocessing
social_features = ['share_count', 'reaction_count', 'comment_count']
social_data = df[social_features].fillna(0).values
social_data = (social_data - social_data.mean()) / (social_data.std() + 1e-7)

# Labels for fake news detection
rating_map = {
    'no factual content': 0,
    'mostly true': 1,
    # Add more ratings based on your dataset
    # For fake news: 0 = fake/not factual, 1 = true/factual
}
labels = df['Rating'].map(rating_map).fillna(0).values

# Convert torch tensors to tf tensors
input_ids_tf = tf.convert_to_tensor(input_ids.numpy())
attention_mask_tf = tf.convert_to_tensor(attention_mask.numpy())
social_data_tf = tf.convert_to_tensor(social_data)
labels_tf = tf.convert_to_tensor(labels)

# Text Branch with BERT
def create_text_branch(bert_model, use_cnn=True):
    input_ids = layers.Input(shape=(max_len,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32)

    # Wrap BERT in Lambda layer to use PyTorch model in TF
    def bert_layer(inputs):
        input_ids, attention_mask = inputs
        with torch.no_grad():
            outputs = bert_model(
                input_ids=torch.tensor(input_ids.numpy()),
                attention_mask=torch.tensor(attention_mask.numpy())
            )
        return outputs.last_hidden_state.numpy()

    bert_output = layers.Lambda(bert_layer)([input_ids, attention_mask])

    if use_cnn:
        x = layers.Conv1D(64, 5, activation='relu')(bert_output)
        x = layers.MaxPooling1D(5)(x)

    x = layers.Flatten()(x)
    return [input_ids, attention_mask], x

# Social Branch
def create_social_branch(input_shape, use_cnn=True):
    inputs = layers.Input(shape=input_shape)
    x = layers.Dense(64, activation='relu')(inputs)

    if use_cnn:
        x = layers.Reshape((-1, 1))(x)
        x = layers.Conv1D(32, 3, activation='relu')(x)
        x = layers.MaxPooling1D(2)(x)

    x = layers.Flatten()(x)
    return inputs, x

# Complete model
def create_model(bert_model):
    # Load BERT
    bert = BertModel.from_pretrained(
        "bert-base-uncased",
        torch_dtype=torch.float16,
        attn_implementation="sdpa"
    )

    # Create branches
    text_inputs, text_output = create_text_branch(bert, use_cnn=True)
    social_input, social_output = create_social_branch((len(social_features),), use_cnn=True)

    # Concatenate
    combined = layers.concatenate([text_output, social_output])

    # Dense layers for fake news classification
    x = layers.Dense(128, activation='relu')(combined)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)  # 0 = fake, 1 = true

    model = Model(inputs=[text_inputs, social_input], outputs=outputs)
    return model

# Create and compile
bert_model = BertModel.from_pretrained(
    "bert-base-uncased",
    torch_dtype=torch.float16,
    attn_implementation="sdpa"
)
model = create_model(bert_model)
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

# Model summary
model.summary()

# Train
history = model.fit(
    [input_ids_tf, attention_mask_tf, social_data_tf],
    labels_tf,
    epochs=10,
    batch_size=16,  # Smaller batch size due to BERT memory requirements
    validation_split=0.2
)

# Evaluation function
def evaluate_fake_news(model, input_data, true_labels):
    predictions = model.predict(input_data)
    pred_binary = (predictions > 0.5).astype(int)
    accuracy = np.mean(pred_binary == true_labels)
    print(f"Accuracy: {accuracy:.4f}")
    return predictions

# Example evaluation
predictions = evaluate_fake_news(
    model,
    [input_ids_tf, attention_mask_tf, social_data_tf],
    labels
)