<a href="https://colab.research.google.com/github/fjadidi2001/fake_news_detection/blob/main/BertGnn_Apr1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The BERT model was proposed in BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. It’s a bidirectional transformer pretrained using a combination of masked language modeling objective and next sentence prediction on a large corpus comprising the Toronto Book Corpus and Wikipedia.



In [2]:
import pandas as pd
import numpy as np
import torch
from transformers import BertTokenizer, BertModel
from tensorflow.keras import layers, Model
import tensorflow as tf
from google.colab import drive
drive.mount('/content/drive/')
# Load and preprocess the data
df = pd.read_csv('/content/drive/MyDrive/Projects/Hayat/facebook-fact-check.csv', encoding='latin-1')

# Text preprocessing for BERT
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_len = 100

def encode_texts(texts):
    return tokenizer(
        texts.tolist(),
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

text_encodings = encode_texts(df['Context Post'].fillna(''))
input_ids = text_encodings['input_ids']
attention_mask = text_encodings['attention_mask']

# Social features preprocessing
social_features = ['share_count', 'reaction_count', 'comment_count']
social_data = df[social_features].fillna(0).values
social_data = (social_data - social_data.mean()) / (social_data.std() + 1e-7)

# Labels for fake news detection
rating_map = {
    'no factual content': 0,
    'mostly true': 1,
    # Add more ratings based on your dataset
    # For fake news: 0 = fake/not factual, 1 = true/factual
}
labels = df['Rating'].map(rating_map).fillna(0).values

# Convert torch tensors to tf tensors
input_ids_tf = tf.convert_to_tensor(input_ids.numpy())
attention_mask_tf = tf.convert_to_tensor(attention_mask.numpy())
social_data_tf = tf.convert_to_tensor(social_data)
labels_tf = tf.convert_to_tensor(labels)

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [7]:
import pandas as pd
import numpy as np
from transformers import BertTokenizer, TFBertModel
from tensorflow.keras import layers, Model
import tensorflow as tf

# Load and preprocess the data
df = pd.read_csv('/content/drive/MyDrive/Projects/Hayat/facebook-fact-check.csv', encoding='latin-1')

# Text preprocessing for BERT
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_len = 100

def encode_texts(texts):
    return tokenizer(
        texts.tolist(),
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_tensors='tf'
    )

text_encodings = encode_texts(df['Context Post'].fillna(''))
input_ids_tf = text_encodings['input_ids']
attention_mask_tf = text_encodings['attention_mask']

# Social features preprocessing
social_features = ['share_count', 'reaction_count', 'comment_count']
social_data = df[social_features].fillna(0).values
social_data = (social_data - social_data.mean()) / (social_data.std() + 1e-7)
social_data_tf = tf.convert_to_tensor(social_data)

# Labels
rating_map = {'no factual content': 0, 'mostly true': 1}
labels_tf = tf.convert_to_tensor(df['Rating'].map(rating_map).fillna(0).values)

# Custom BERT Layer
class TFBertLayer(layers.Layer):
    def __init__(self, **kwargs):
        super(TFBertLayer, self).__init__(**kwargs)
        self.bert = TFBertModel.from_pretrained('bert-base-uncased')

    def call(self, inputs):
        input_ids, attention_mask = inputs
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return outputs[0]  # Return last_hidden_state

# Text Branch
def create_text_branch(use_cnn=True):
    input_ids = layers.Input(shape=(max_len,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32)

    bert_output = TFBertLayer()([input_ids, attention_mask])

    if use_cnn:
        x = layers.Conv1D(64, 5, activation='relu')(bert_output)
        x = layers.MaxPooling1D(5)(x)

    x = layers.Flatten()(x)
    return [input_ids, attention_mask], x

# Social Branch
def create_social_branch(input_shape, use_cnn=True):
    inputs = layers.Input(shape=input_shape)
    x = layers.Dense(64, activation='relu')(inputs)

    if use_cnn:
        x = layers.Reshape((-1, 1))(x)
        x = layers.Conv1D(32, 3, activation='relu')(x)
        x = layers.MaxPooling1D(2)(x)

    x = layers.Flatten()(x)
    return inputs, x

# Complete model
def create_model():
    text_inputs, text_output = create_text_branch(use_cnn=True)
    social_input, social_output = create_social_branch((len(social_features),), use_cnn=True)

    combined = layers.concatenate([text_output, social_output])
    x = layers.Dense(128, activation='relu')(combined)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = Model(inputs=[text_inputs, social_input], outputs=outputs)
    return model

# Create and compile
model = create_model()
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

model.summary()

# Train
history = model.fit(
    [input_ids_tf, attention_mask_tf, social_data_tf],
    labels_tf,
    epochs=10,
    batch_size=16,
    validation_split=0.2
)

# Evaluation function
def evaluate_fake_news(model, input_data, true_labels):
    predictions = model.predict(input_data)
    pred_binary = (predictions > 0.5).astype(int)
    accuracy = np.mean(pred_binary == true_labels)
    print(f"Accuracy: {accuracy:.4f}")
    return predictions

predictions = evaluate_fake_news(
    model,
    [input_ids_tf, attention_mask_tf, social_data_tf],
    labels_tf.numpy()
)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Epoch 1/10


Expected: [['keras_tensor_26', 'keras_tensor_27'], 'keras_tensor_32']
Received: inputs=('Tensor(shape=(None, 100))', 'Tensor(shape=(None, 100))', 'Tensor(shape=(None, 3))')


[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 207ms/step - accuracy: 0.7461 - loss: 0.5120 - precision_1: 0.8059 - recall_1: 0.8590 - val_accuracy: 0.5689 - val_loss: 0.6077 - val_precision_1: 0.8305 - val_recall_1: 0.3564
Epoch 2/10
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 99ms/step - accuracy: 0.8148 - loss: 0.3608 - precision_1: 0.9038 - recall_1: 0.8447 - val_accuracy: 0.5689 - val_loss: 0.6057 - val_precision_1: 0.8250 - val_recall_1: 0.3600
Epoch 3/10
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 102ms/step - accuracy: 0.8247 - loss: 0.3316 - precision_1: 0.9289 - recall_1: 0.8313 - val_accuracy: 0.5952 - val_loss: 0.6022 - val_precision_1: 0.8261 - val_recall_1: 0.4145
Epoch 4/10
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 100ms/step - accuracy: 0.8581 - loss: 0.2558 - precision_1: 0.9615 - recall_1: 0.8517 - val_accuracy: 0.5799 - val_loss: 0.6145 - val_precision_1: 0.8547 - val_recall_1:

Expected: [['keras_tensor_26', 'keras_tensor_27'], 'keras_tensor_32']
Received: inputs=('Tensor(shape=(32, 100))', 'Tensor(shape=(32, 100))', 'Tensor(shape=(32, 3))')


[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 240ms/step
Accuracy: 0.5387


## **1. Data Collection & Preprocessing**
### **1.1. Data Sources**
- Collect **fake and real news** datasets
- Extract two key components:
  - **Textual content** (headline, body).
  - **Social context** (user interactions, source credibility, sharing patterns).

### **1.2. Text Preprocessing (for BERT)**
- **Lowercasing & special character removal**.
- **Tokenization** using BERT's **WordPiece Tokenizer**.
- **Padding/truncation** to a fixed sequence length.
- **Convert tokens to embeddings** via **BERT's pre-trained model** (e.g., `bert-base-uncased`).

### **1.3. Graph Construction (for GNN)**
- **Nodes**: News articles, users, sources.
- **Edges**: User-news interactions (e.g., shares, likes, comments).
- **Node features**: Text embeddings (from BERT), credibility scores, engagement metrics.
- **Graph Representation**: Create an adjacency matrix for **Graph Neural Network (GNN)**.

---

## **2. Feature Extraction**
### **2.1. Textual Content (Text Branch using BERT)**
- Use a **pre-trained BERT** model.
- Extract **CLS token embedding** as a news representation.
- Apply a **Bidirectional LSTM (BiLSTM)** on top for context enhancement.

### **2.2. Social Context (Graph Neural Network)**
- Represent user-news interactions as a **Heterogeneous Graph**.
- Use **Graph Convolutional Network (GCN)** or **Graph Attention Network (GAT)** to capture relational features.
- Perform **node classification** to predict fake/real labels.

---

## **3. Model Architecture**
### **3.1. Text Branch (BERT + BiLSTM)**
- **BERT encoder** → Extracts deep semantic features.
- **BiLSTM layer** → Captures sequence dependencies.
- **Dense layer** → Outputs a feature vector.

### **3.2. Social Branch (GNN)**
- **Graph Convolutional Network (GCN) / Graph Attention Network (GAT)**.
- Aggregates neighbor node information (e.g., source credibility, user interactions).
- Outputs a **graph embedding**.

### **3.3. Fusion & Prediction**
- **Concatenate** the **BERT & GNN outputs**.
- Use **fully connected layers (Dense layers)**.
- Apply **softmax activation** for classification.

---

## **4. Training & Evaluation**
### **4.1. Loss & Optimization**
- Use **Cross-Entropy Loss** for classification.
- Optimizer: **AdamW** (for BERT) and **Adam** (for GNN).

### **4.2. Evaluation Metrics**
- **Accuracy, Precision, Recall, F1-Score**.
- **Graph metrics**: Node classification accuracy, AUC-ROC.



In [9]:
# Install required packages in Colab
!pip install transformers spektral tensorflow

import pandas as pd
import numpy as np
from transformers import BertTokenizer, TFBertModel
from tensorflow.keras import layers, Model
import tensorflow as tf
from spektral.layers import GCNConv  # For Graph Convolutional Network
from spektral.data import Dataset, Graph

# Step 1: Data Collection & Preprocessing
# 1.1 Data Sources
df = pd.read_csv('/content/drive/MyDrive/Projects/Hayat/facebook-fact-check.csv', encoding='latin-1')

# Extract textual content and social context
textual_content = df['Context Post'].fillna('')
social_context = df[['share_count', 'reaction_count', 'comment_count']].fillna(0)

# Labels
rating_map = {'no factual content': 0, 'mostly true': 1}  # Extend as needed
labels = df['Rating'].map(rating_map).fillna(0).values

# 1.2 Text Preprocessing (for BERT)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_len = 100

def preprocess_text(texts):
    # Lowercasing and basic cleaning (special characters can be handled by BERT tokenizer)
    texts = texts.str.lower()
    encodings = tokenizer(
        texts.tolist(),
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_tensors='tf'
    )
    return encodings['input_ids'], encodings['attention_mask']

input_ids, attention_mask = preprocess_text(textual_content)

# 1.3 Graph Construction (for GNN)
# Simplified graph: nodes are posts, edges based on shared interactions
# For demo, we'll create a basic adjacency matrix based on social metrics
n_nodes = len(df)
adjacency = np.zeros((n_nodes, n_nodes))
node_features = social_context.values  # Node features from social context

# Create edges based on similarity in reaction_count (simplified approach)
for i in range(n_nodes):
    for j in range(n_nodes):
        if i != j and abs(node_features[i, 1] - node_features[j, 1]) < 10:  # Threshold on reactions
            adjacency[i, j] = 1

# Normalize adjacency matrix
adjacency = adjacency / (adjacency.sum(axis=1, keepdims=True) + 1e-6)

# Step 2: Feature Extraction
# 2.1 Text Branch (BERT + BiLSTM)
class TFBertLayer(layers.Layer):
    def __init__(self, **kwargs):
        super(TFBertLayer, self).__init__(**kwargs)
        self.bert = TFBertModel.from_pretrained('bert-base-uncased')

    def call(self, inputs):
        input_ids, attention_mask = inputs
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return outputs[0]  # last_hidden_state

def create_text_branch():
    input_ids = layers.Input(shape=(max_len,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32)

    bert_output = TFBertLayer()([input_ids, attention_mask])  # Shape: (batch, max_len, 768)
    bilstm = layers.Bidirectional(layers.LSTM(64, return_sequences=False))(bert_output)
    dense = layers.Dense(128, activation='relu')(bilstm)

    return [input_ids, attention_mask], dense

# 2.2 Social Branch (GNN with GCN)
def create_social_branch(n_features, n_nodes):
    # Graph inputs
    node_input = layers.Input(shape=(n_nodes, n_features), dtype=tf.float32)  # Node features
    adj_input = layers.Input(shape=(n_nodes, n_nodes), dtype=tf.float32)      # Adjacency matrix

    # GCN layer
    gcn = GCNConv(64, activation='relu')([node_input, adj_input])
    gcn = layers.Dropout(0.3)(gcn)
    gcn = GCNConv(32, activation='relu')([gcn, adj_input])

    # Global pooling to get graph embedding
    graph_embedding = layers.GlobalAvgPool1D()(gcn)
    dense = layers.Dense(128, activation='relu')(graph_embedding)

    return [node_input, adj_input], dense

# Step 3: Model Architecture
def create_model(n_features, n_nodes):
    text_inputs, text_output = create_text_branch()
    social_inputs, social_output = create_social_branch(n_features, n_nodes)

    # Fusion
    combined = layers.concatenate([text_output, social_output])
    x = layers.Dense(128, activation='relu')(combined)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)  # Binary classification

    model = Model(inputs=text_inputs + social_inputs, outputs=outputs)
    return model

# Prepare graph inputs
n_features = social_context.shape[1]
node_features_tf = tf.convert_to_tensor(node_features[None, ...])  # Add batch dimension
adjacency_tf = tf.convert_to_tensor(adjacency[None, ...])          # Add batch dimension
labels_tf = tf.convert_to_tensor(labels)

# Create and compile model
model = create_model(n_features, n_nodes)
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=2e-5),  # AdamW for BERT
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

model.summary()

# Step 4: Training & Evaluation
history = model.fit(
    [input_ids, attention_mask, node_features_tf, adjacency_tf],
    labels_tf,
    epochs=10,
    batch_size=16,
    validation_split=0.2
)

# Evaluation
def evaluate_fake_news(model, input_data, true_labels):
    predictions = model.predict(input_data)
    pred_binary = (predictions > 0.5).astype(int)
    accuracy = np.mean(pred_binary == true_labels)
    precision = tf.keras.metrics.Precision()(true_labels, pred_binary)
    recall = tf.keras.metrics.Recall()(true_labels, pred_binary)
    f1 = 2 * (precision * recall) / (precision + recall + 1e-6)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    return predictions

predictions = evaluate_fake_news(
    model,
    [input_ids, attention_mask, node_features_tf, adjacency_tf],
    labels
)



Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ValueError: Exception encountered when calling GCNConv.call().

[1mCould not automatically infer the output shape / dtype of 'gcn_conv_1' (of type GCNConv). Either the `GCNConv.call()` method is incorrect, or you need to implement the `GCNConv.compute_output_spec() / compute_output_shape()` method. Error encountered:

Tried to convert 'y' to a tensor and failed. Error: None values not supported.[0m

Arguments received by GCNConv.call():
  • args=(['<KerasTensor shape=(None, 2282, 3), dtype=float32, sparse=False, name=keras_tensor_54>', '<KerasTensor shape=(None, 2282, 2282), dtype=float32, sparse=False, name=keras_tensor_55>'],)
  • kwargs={'mask': ['None', 'None']}

In [12]:

import pandas as pd
import numpy as np
from transformers import BertTokenizer, TFBertModel
from tensorflow.keras import layers, Model
import tensorflow as tf
from spektral.layers import GCNConv
from spektral.utils import normalized_adjacency

# Step 1: Data Collection & Preprocessing
df = pd.read_csv('/content/drive/MyDrive/Projects/Hayat/facebook-fact-check.csv', encoding='latin-1')

# Extract textual content and social context
textual_content = df['Context Post'].fillna('')
social_context = df[['share_count', 'reaction_count', 'comment_count']].fillna(0)

# Labels
rating_map = {'no factual content': 0, 'mostly true': 1}
labels = df['Rating'].map(rating_map).fillna(0).values

# 1.2 Text Preprocessing (for BERT)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_len = 100

def preprocess_text(texts):
    texts = texts.str.lower()
    encodings = tokenizer(
        texts.tolist(),
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_tensors='tf'
    )
    return encodings['input_ids'], encodings['attention_mask']

input_ids, attention_mask = preprocess_text(textual_content)

# 1.3 Graph Construction (for GNN)
n_nodes = len(df)
adjacency = np.zeros((n_nodes, n_nodes))
node_features = social_context.values

# Create edges based on reaction_count similarity
for i in range(n_nodes):
    for j in range(n_nodes):
        if i != j and abs(node_features[i, 1] - node_features[j, 1]) < 10:
            adjacency[i, j] = 1

# Normalize adjacency matrix
adjacency = normalized_adjacency(adjacency, symmetric=True)

# Step 2: Feature Extraction
# 2.1 Text Branch (BERT + BiLSTM)
class TFBertLayer(layers.Layer):
    def __init__(self, **kwargs):
        super(TFBertLayer, self).__init__(**kwargs)
        self.bert = TFBertModel.from_pretrained('bert-base-uncased')

    def call(self, inputs):
        input_ids, attention_mask = inputs
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return outputs[0]

def create_text_branch():
    input_ids = layers.Input(shape=(max_len,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32)

    bert_output = TFBertLayer()([input_ids, attention_mask])
    bilstm = layers.Bidirectional(layers.LSTM(64, return_sequences=False))(bert_output)
    dense = layers.Dense(128, activation='relu')(bilstm)

    return [input_ids, attention_mask], dense

# 2.2 Social Branch (GNN with GCN) - Fixed with Custom Layer
class GCNLayer(layers.Layer):
    def __init__(self, channels, **kwargs):
        super(GCNLayer, self).__init__(**kwargs)
        self.gcn = GCNConv(channels=channels, activation='relu')

    def call(self, inputs):
        node_features, adjacency = inputs
        return self.gcn([node_features, adjacency])

    def compute_output_shape(self, input_shape):
        # Input_shape: [(batch, n_nodes, n_features), (batch, n_nodes, n_nodes)]
        return (input_shape[0][0], input_shape[0][1], self.gcn.channels)

def create_social_branch(n_nodes, n_features):
    node_input = layers.Input(shape=(n_nodes, n_features), dtype=tf.float32)
    adj_input = layers.Input(shape=(n_nodes, n_nodes), dtype=tf.float32)

    gcn1 = GCNLayer(channels=64)([node_input, adj_input])
    gcn1 = layers.Dropout(0.3)(gcn1)
    gcn2 = GCNLayer(channels=32)([gcn1, adj_input])

    graph_embedding = layers.GlobalAvgPool1D()(gcn2)
    dense = layers.Dense(128, activation='relu')(graph_embedding)

    return [node_input, adj_input], dense

# Step 3: Model Architecture
def create_model(n_nodes, n_features):
    text_inputs, text_output = create_text_branch()
    social_inputs, social_output = create_social_branch(n_nodes, n_features)

    combined = layers.concatenate([text_output, social_output])
    x = layers.Dense(128, activation='relu')(combined)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = Model(inputs=text_inputs + social_inputs, outputs=outputs)
    return model

# Prepare inputs
n_features = social_context.shape[1]
node_features_tf = tf.convert_to_tensor(node_features[None, ...])
adjacency_tf = tf.convert_to_tensor(adjacency[None, ...])
labels_tf = tf.convert_to_tensor(labels)

# Create and compile model
model = create_model(n_nodes, n_features)
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=2e-5),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

model.summary()

# Step 4: Training & Evaluation
history = model.fit(
    [input_ids, attention_mask, node_features_tf, adjacency_tf],
    labels_tf,
    epochs=10,
    batch_size=16,
    validation_split=0.2
)

# Evaluation
def evaluate_fake_news(model, input_data, true_labels):
    predictions = model.predict(input_data)
    pred_binary = (predictions > 0.5).astype(int)
    accuracy = np.mean(pred_binary == true_labels)
    precision = tf.keras.metrics.Precision()(true_labels, pred_binary)
    recall = tf.keras.metrics.Recall()(true_labels, pred_binary)
    f1 = 2 * (precision * recall) / (precision + recall + 1e-6)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    return predictions

predictions = evaluate_fake_news(
    model,
    [input_ids, attention_mask, node_features_tf, adjacency_tf],
    labels
)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ValueError: Data cardinality is ambiguous. Make sure all arrays contain the same number of samples.'x' sizes: 1825, 1825, 1, 1
'y' sizes: 1825


In [16]:
import pandas as pd
import numpy as np
from transformers import BertTokenizer, TFBertModel
from tensorflow.keras import layers, Model
import tensorflow as tf
from spektral.layers import GCNConv
from spektral.utils import normalized_adjacency

# Step 1: Data Collection & Preprocessing
df = pd.read_csv('/content/drive/MyDrive/Projects/Hayat/facebook-fact-check.csv', encoding='latin-1')

# Filter to rows with valid ratings
rating_map = {'no factual content': 0, 'mostly true': 1}
df = df[df['Rating'].isin(rating_map.keys())]  # 1546 samples after filtering
n_nodes = len(df)

# Extract textual content and social context
textual_content = df['Context Post'].fillna('')
social_context = df[['share_count', 'reaction_count', 'comment_count']].fillna(0)

# Labels
labels = df['Rating'].map(rating_map).values

# 1.2 Text Preprocessing (for BERT)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_len = 100

def preprocess_text(texts):
    texts = texts.str.lower()
    encodings = tokenizer(
        texts.tolist(),
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_tensors='tf'
    )
    return encodings['input_ids'], encodings['attention_mask']

input_ids, attention_mask = preprocess_text(textual_content)

# 1.3 Graph Construction (for GNN)
adjacency = np.zeros((n_nodes, n_nodes))
node_features = social_context.values

# Create edges based on reaction_count similarity
for i in range(n_nodes):
    for j in range(n_nodes):
        if i != j and abs(node_features[i, 1] - node_features[j, 1]) < 10:
            adjacency[i, j] = 1

# Normalize adjacency matrix
adjacency = normalized_adjacency(adjacency, symmetric=True)

# Step 2: Feature Extraction
# 2.1 Text Branch (BERT + BiLSTM)
class TFBertLayer(layers.Layer):
    def __init__(self, **kwargs):
        super(TFBertLayer, self).__init__(**kwargs)
        self.bert = TFBertModel.from_pretrained('bert-base-uncased')

    def call(self, inputs):
        input_ids, attention_mask = inputs
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return outputs[0]

def create_text_branch():
    input_ids = layers.Input(shape=(max_len,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32)

    bert_output = TFBertLayer()([input_ids, attention_mask])
    bilstm = layers.Bidirectional(layers.LSTM(64, return_sequences=False))(bert_output)
    dense = layers.Dense(128, activation='relu')(bilstm)

    return [input_ids, attention_mask], dense

# 2.2 Social Branch (GNN with GCN) - Fixed
class GCNLayer(layers.Layer):
    def __init__(self, channels, **kwargs):
        super(GCNLayer, self).__init__(**kwargs)
        self.channels = channels
        self.gcn = GCNConv(channels=channels, activation='relu')

    def build(self, input_shape):
        # Build the GCN layer with input shape
        self.gcn.build([input_shape[0], input_shape[1]])
        self.built = True

    def call(self, inputs):
        node_features, adjacency = inputs
        return self.gcn([node_features, adjacency])

    def compute_output_shape(self, input_shape):
        return (input_shape[0][0], input_shape[0][1], self.channels)

def create_social_branch(n_nodes, n_features):
    node_input = layers.Input(shape=(n_nodes, n_features), dtype=tf.float32)
    adj_input = layers.Input(shape=(n_nodes, n_nodes), dtype=tf.float32)

    gcn1 = GCNLayer(channels=64)([node_input, adj_input])
    gcn1 = layers.Dropout(0.3)(gcn1)
    gcn2 = GCNLayer(channels=32)([gcn1, adj_input])

    graph_embedding = layers.GlobalAvgPool1D()(gcn2)
    dense = layers.Dense(128, activation='relu')(graph_embedding)

    return [node_input, adj_input], dense

# Step 3: Model Architecture
def create_model(n_nodes, n_features):
    text_inputs, text_output = create_text_branch()
    social_inputs, social_output = create_social_branch(n_nodes, n_features)

    combined = layers.concatenate([text_output, social_output])
    x = layers.Dense(128, activation='relu')(combined)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = Model(inputs=text_inputs + social_inputs, outputs=outputs)
    return model

# Prepare inputs
n_features = social_context.shape[1]
n_samples = len(labels)  # 1546
node_features_tf = tf.convert_to_tensor(node_features[None, ...])  # (1, n_nodes, n_features)
adjacency_tf = tf.convert_to_tensor(adjacency[None, ...])          # (1, n_nodes, n_nodes)
labels_tf = tf.convert_to_tensor(labels)

# Create and compile model
model = create_model(n_nodes, n_features)
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=2e-5),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

model.summary()

# Step 4: Training & Evaluation
batch_size = 16

# Tile graph inputs to match total number of samples
node_features_tiled = tf.tile(node_features_tf, [n_samples, 1, 1])  # (1546, n_nodes, n_features)
adjacency_tiled = tf.tile(adjacency_tf, [n_samples, 1, 1])          # (1546, n_nodes, n_nodes)

history = model.fit(
    [input_ids, attention_mask, node_features_tiled, adjacency_tiled],
    labels_tf,
    epochs=10,
    batch_size=batch_size,
    validation_split=0.2
)

# Evaluation
def evaluate_fake_news(model, input_data, true_labels):
    predictions = model.predict(input_data)
    pred_binary = (predictions > 0.5).astype(int)
    accuracy = np.mean(pred_binary == true_labels)
    precision = tf.keras.metrics.Precision()(true_labels, pred_binary)
    recall = tf.keras.metrics.Recall()(true_labels, pred_binary)
    f1 = 2 * (precision * recall) / (precision + recall + 1e-6)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    return predictions

predictions = evaluate_fake_news(
    model,
    [input_ids, attention_mask, node_features_tiled, adjacency_tiled],
    labels
)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ResourceExhaustedError: {{function_node __wrapped__Tile_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[1933,1933,1933] and type double on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Tile]

In [18]:

import pandas as pd
import numpy as np
from transformers import BertTokenizer, TFBertModel
from tensorflow.keras import layers, Model
import tensorflow as tf
from spektral.layers import GCNConv
from spektral.utils import normalized_adjacency

# Step 1: Data Collection & Preprocessing
df = pd.read_csv('/content/drive/MyDrive/Projects/Hayat/facebook-fact-check.csv', encoding='latin-1')

# Filter to rows with valid ratings
rating_map = {'no factual content': 0, 'mostly true': 1}
df = df[df['Rating'].isin(rating_map.keys())]  # 1933 samples after filtering
n_nodes = len(df)

# Extract textual content and social context
textual_content = df['Context Post'].fillna('')
social_context = df[['share_count', 'reaction_count', 'comment_count']].fillna(0)

# Labels
labels = df['Rating'].map(rating_map).values

# 1.2 Text Preprocessing (for BERT)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_len = 100

def preprocess_text(texts):
    texts = texts.str.lower()
    encodings = tokenizer(
        texts.tolist(),
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_tensors='tf'
    )
    return encodings['input_ids'], encodings['attention_mask']

input_ids, attention_mask = preprocess_text(textual_content)

# 1.3 Graph Construction (for GNN)
adjacency = np.zeros((n_nodes, n_nodes))
node_features = social_context.values

# Create edges based on reaction_count similarity
for i in range(n_nodes):
    for j in range(n_nodes):
        if i != j and abs(node_features[i, 1] - node_features[j, 1]) < 10:
            adjacency[i, j] = 1

# Normalize adjacency matrix
adjacency = normalized_adjacency(adjacency, symmetric=True)

# Prepare inputs
n_features = social_context.shape[1]
node_features_tf = tf.convert_to_tensor(node_features, dtype=tf.float32)  # (1933, 3)
adjacency_tf = tf.convert_to_tensor(adjacency, dtype=tf.float32)          # (1933, 1933)
labels_tf = tf.convert_to_tensor(labels, dtype=tf.float32)

# Step 2: Feature Extraction
# 2.1 Text Branch (BERT + BiLSTM)
class TFBertLayer(layers.Layer):
    def __init__(self, **kwargs):
        super(TFBertLayer, self).__init__(**kwargs)
        self.bert = TFBertModel.from_pretrained('bert-base-uncased')

    def call(self, inputs):
        input_ids, attention_mask = inputs
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return outputs[0]

def create_text_branch():
    input_ids = layers.Input(shape=(max_len,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32)

    bert_output = TFBertLayer()([input_ids, attention_mask])
    bilstm = layers.Bidirectional(layers.LSTM(64, return_sequences=False))(bert_output)
    dense = layers.Dense(128, activation='relu')(bilstm)

    return [input_ids, attention_mask], dense

# 2.2 Social Branch (GNN with GCN)
class GCNLayer(layers.Layer):
    def __init__(self, channels, **kwargs):
        super(GCNLayer, self).__init__(**kwargs)
        self.channels = channels
        self.gcn = GCNConv(channels=channels, activation='relu')

    def build(self, input_shape):
        self.gcn.build([input_shape[0], input_shape[1]])
        self.built = True

    def call(self, inputs):
        node_features, adjacency = inputs
        return self.gcn([node_features, adjacency])

    def compute_output_shape(self, input_shape):
        return (input_shape[0][0], input_shape[0][1], self.channels)

def create_social_branch(n_nodes, n_features):
    # Inputs without batch dimension (single graph)
    node_input = layers.Input(shape=(n_nodes, n_features), dtype=tf.float32)
    adj_input = layers.Input(shape=(n_nodes, n_nodes), dtype=tf.float32)

    gcn1 = GCNLayer(channels=64)([node_input, adj_input])
    gcn1 = layers.Dropout(0.3)(gcn1)
    gcn2 = GCNLayer(channels=32)([gcn1, adj_input])

    # Global pooling to get a single embedding per graph
    graph_embedding = layers.GlobalAvgPool1D()(gcn2)  # (batch_size, 32)

    # Expand to match batch size dynamically
    def broadcast_embedding(x):
        batch_size = tf.shape(input_ids)[0]  # Dynamically get batch size from text input
        return tf.tile(tf.expand_dims(x, 0), [batch_size, 1])

    graph_embedding = layers.Lambda(broadcast_embedding)(graph_embedding)
    dense = layers.Dense(128, activation='relu')(graph_embedding)

    return [node_input, adj_input], dense

# Step 3: Model Architecture
def create_model(n_nodes, n_features):
    text_inputs, text_output = create_text_branch()
    social_inputs, social_output = create_social_branch(n_nodes, n_features)

    combined = layers.concatenate([text_output, social_output])
    x = layers.Dense(128, activation='relu')(combined)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = Model(inputs=text_inputs + social_inputs, outputs=outputs)
    return model

# Create and compile model
model = create_model(n_nodes, n_features)
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=2e-5),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

model.summary()

# Step 4: Training & Evaluation
batch_size = 16
n_samples = len(labels)

history = model.fit(
    [input_ids, attention_mask, node_features_tf[None, ...], adjacency_tf[None, ...]],  # Add batch dim
    labels_tf,
    epochs=10,
    batch_size=batch_size,
    validation_split=0.2
)

# Evaluation
def evaluate_fake_news(model, input_data, true_labels):
    predictions = model.predict(input_data)
    pred_binary = (predictions > 0.5).astype(int)
    accuracy = np.mean(pred_binary == true_labels)
    precision = tf.keras.metrics.Precision()(true_labels, pred_binary)
    recall = tf.keras.metrics.Recall()(true_labels, pred_binary)
    f1 = 2 * (precision * recall) / (precision + recall + 1e-6)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    return predictions

predictions = evaluate_fake_news(
    model,
    [input_ids, attention_mask, node_features_tf[None, ...], adjacency_tf[None, ...]],
    labels
)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

NotImplementedError: Exception encountered when calling Lambda.call().

[1mWe could not automatically infer the shape of the Lambda's output. Please specify the `output_shape` argument for this Lambda layer.[0m

Arguments received by Lambda.call():
  • args=('<KerasTensor shape=(None, 32), dtype=float32, sparse=False, name=keras_tensor_114>',)
  • kwargs={'mask': 'None'}

In [19]:
# Install required packages in Colab
!pip install transformers spektral tensorflow

import pandas as pd
import numpy as np
from transformers import BertTokenizer, TFBertModel
from tensorflow.keras import layers, Model
import tensorflow as tf
from spektral.layers import GCNConv
from spektral.utils import normalized_adjacency

# Step 1: Data Collection & Preprocessing
df = pd.read_csv('/content/drive/MyDrive/Projects/Hayat/facebook-fact-check.csv', encoding='latin-1')

# Filter to rows with valid ratings
rating_map = {'no factual content': 0, 'mostly true': 1}
df = df[df['Rating'].isin(rating_map.keys())]
n_nodes = len(df)  # 1933

# Extract textual content and social context
textual_content = df['Context Post'].fillna('')
social_context = df[['share_count', 'reaction_count', 'comment_count']].fillna(0)

# Labels
labels = df['Rating'].map(rating_map).values

# 1.2 Text Preprocessing (for BERT)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_len = 100

def preprocess_text(texts):
    texts = texts.str.lower()
    encodings = tokenizer(
        texts.tolist(),
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_tensors='tf'
    )
    return encodings['input_ids'], encodings['attention_mask']

input_ids, attention_mask = preprocess_text(textual_content)

# 1.3 Graph Construction (for GNN)
adjacency = np.zeros((n_nodes, n_nodes))
node_features = social_context.values

# Create edges based on reaction_count similarity
for i in range(n_nodes):
    for j in range(n_nodes):
        if i != j and abs(node_features[i, 1] - node_features[j, 1]) < 10:
            adjacency[i, j] = 1

# Normalize adjacency matrix
adjacency = normalized_adjacency(adjacency, symmetric=True)

# Prepare inputs
n_features = social_context.shape[1]
node_features_tf = tf.convert_to_tensor(node_features, dtype=tf.float32)  # (1933, 3)
adjacency_tf = tf.convert_to_tensor(adjacency, dtype=tf.float32)          # (1933, 1933)
labels_tf = tf.convert_to_tensor(labels, dtype=tf.float32)

# Step 2: Feature Extraction
# 2.1 Text Branch (BERT + BiLSTM)
class TFBertLayer(layers.Layer):
    def __init__(self, **kwargs):
        super(TFBertLayer, self).__init__(**kwargs)
        self.bert = TFBertModel.from_pretrained('bert-base-uncased')

    def call(self, inputs):
        input_ids, attention_mask = inputs
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return outputs[0]

def create_text_branch():
    input_ids = layers.Input(shape=(max_len,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32)

    bert_output = TFBertLayer()([input_ids, attention_mask])
    bilstm = layers.Bidirectional(layers.LSTM(64, return_sequences=False))(bert_output)
    dense = layers.Dense(128, activation='relu')(bilstm)

    return [input_ids, attention_mask], dense

# 2.2 Social Branch (GNN with GCN)
class GCNLayer(layers.Layer):
    def __init__(self, channels, **kwargs):
        super(GCNLayer, self).__init__(**kwargs)
        self.channels = channels
        self.gcn = GCNConv(channels=channels, activation='relu')

    def build(self, input_shape):
        self.gcn.build([input_shape[0], input_shape[1]])
        self.built = True

    def call(self, inputs):
        node_features, adjacency = inputs
        return self.gcn([node_features, adjacency])

    def compute_output_shape(self, input_shape):
        return (input_shape[0][0], input_shape[0][1], self.channels)

def create_social_branch(n_nodes, n_features, text_input_ids):
    node_input = layers.Input(shape=(n_nodes, n_features), dtype=tf.float32)
    adj_input = layers.Input(shape=(n_nodes, n_nodes), dtype=tf.float32)

    gcn1 = GCNLayer(channels=64)([node_input, adj_input])
    gcn1 = layers.Dropout(0.3)(gcn1)
    gcn2 = GCNLayer(channels=32)([gcn1, adj_input])

    graph_embedding = layers.GlobalAvgPool1D()(gcn2)  # (batch_size, 32)

    # Broadcast embedding to match text batch size
    def broadcast_embedding(x):
        batch_size = tf.shape(text_input_ids)[0]
        return tf.tile(tf.expand_dims(x, 0), [batch_size, 1])

    # Specify output shape for Lambda
    def broadcast_output_shape(input_shape):
        return (None, 32)  # (batch_size, 32), where batch_size is dynamic

    graph_embedding = layers.Lambda(
        broadcast_embedding,
        output_shape=broadcast_output_shape
    )(graph_embedding)

    dense = layers.Dense(128, activation='relu')(graph_embedding)

    return [node_input, adj_input], dense

# Step 3: Model Architecture
def create_model(n_nodes, n_features):
    text_inputs, text_output = create_text_branch()
    social_inputs, social_output = create_social_branch(n_nodes, n_features, text_inputs[0])  # Pass input_ids

    combined = layers.concatenate([text_output, social_output])
    x = layers.Dense(128, activation='relu')(combined)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = Model(inputs=text_inputs + social_inputs, outputs=outputs)
    return model

# Create and compile model
model = create_model(n_nodes, n_features)
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=2e-5),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

model.summary()

# Step 4: Training & Evaluation
batch_size = 16
n_samples = len(labels)

history = model.fit(
    [input_ids, attention_mask, node_features_tf[None, ...], adjacency_tf[None, ...]],
    labels_tf,
    epochs=10,
    batch_size=batch_size,
    validation_split=0.2
)

# Evaluation
def evaluate_fake_news(model, input_data, true_labels):
    predictions = model.predict(input_data)
    pred_binary = (predictions > 0.5).astype(int)
    accuracy = np.mean(pred_binary == true_labels)
    precision = tf.keras.metrics.Precision()(true_labels, pred_binary)
    recall = tf.keras.metrics.Recall()(true_labels, pred_binary)
    f1 = 2 * (precision * recall) / (precision + recall + 1e-6)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    return predictions

predictions = evaluate_fake_news(
    model,
    [input_ids, attention_mask, node_features_tf[None, ...], adjacency_tf[None, ...]],
    labels
)



Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ValueError: Data cardinality is ambiguous. Make sure all arrays contain the same number of samples.'x' sizes: 1546, 1546, 1, 1
'y' sizes: 1546


In [20]:
# Install required packages (e.g., in Colab)
!pip install transformers spektral tensorflow

import pandas as pd
import numpy as np
from transformers import BertTokenizer, TFBertModel
from tensorflow.keras import layers, Model
import tensorflow as tf
from spektral.layers import GCNConv
from spektral.utils import normalized_adjacency

# Step 1: Data Preprocessing
# Load and filter data
df = pd.read_csv('/content/drive/MyDrive/Projects/Hayat/facebook-fact-check.csv', encoding='latin-1')
rating_map = {'no factual content': 0, 'mostly true': 1}
df = df[df['Rating'].isin(rating_map.keys())]
n_nodes = len(df)  # 1546 samples

# Textual content and labels
textual_content = df['Context Post'].fillna('')
labels = df['Rating'].map(rating_map).values

# Social context for graph
social_context = df[['share_count', 'reaction_count', 'comment_count']].fillna(0)

# Text Preprocessing (BERT)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_len = 100

def preprocess_text(texts):
    encodings = tokenizer(
        texts.tolist(),
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_tensors='tf'
    )
    return encodings['input_ids'], encodings['attention_mask']

input_ids, attention_mask = preprocess_text(textual_content)  # Shapes: (1546, 100), (1546, 100)

# Graph Construction (GNN)
adjacency = np.zeros((n_nodes, n_nodes))
node_features = social_context.values  # Shape: (1546, 3)

# Simple edge creation based on reaction_count similarity
for i in range(n_nodes):
    for j in range(n_nodes):
        if i != j and abs(node_features[i, 1] - node_features[j, 1]) < 10:
            adjacency[i, j] = 1

adjacency = normalized_adjacency(adjacency, symmetric=True)  # Shape: (1546, 1546)
node_features_tf = tf.convert_to_tensor(node_features, dtype=tf.float32)  # (1546, 3)
adjacency_tf = tf.convert_to_tensor(adjacency, dtype=tf.float32)          # (1546, 1546)
labels_tf = tf.convert_to_tensor(labels, dtype=tf.float32)                # (1546,)

# Step 2: Define Model Components
# Text Branch (BERT + BiLSTM)
class TFBertLayer(layers.Layer):
    def __init__(self, **kwargs):
        super(TFBertLayer, self).__init__(**kwargs)
        self.bert = TFBertModel.from_pretrained('bert-base-uncased')

    def call(self, inputs):
        input_ids, attention_mask = inputs
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return outputs[0]  # Shape: (batch_size, max_len, 768)

def create_text_branch():
    input_ids = layers.Input(shape=(max_len,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32)
    bert_output = TFBertLayer()([input_ids, attention_mask])
    bilstm = layers.Bidirectional(layers.LSTM(64, return_sequences=False))(bert_output)
    dense = layers.Dense(128, activation='relu')(bilstm)
    return [input_ids, attention_mask], dense

# Social Branch (GNN with GCN)
def create_social_branch(n_nodes, n_features):
    node_input = layers.Input(shape=(n_nodes, n_features), dtype=tf.float32)  # (batch_size, 1546, 3)
    adj_input = layers.Input(shape=(n_nodes, n_nodes), dtype=tf.float32)      # (batch_size, 1546, 1546)

    gcn1 = GCNConv(channels=64, activation='relu')([node_input, adj_input])
    gcn1 = layers.Dropout(0.3)(gcn1)
    gcn2 = GCNConv(channels=32, activation='relu')([gcn1, adj_input])
    graph_embedding = layers.GlobalAvgPool1D()(gcn2)  # Shape: (batch_size, 32)

    return [node_input, adj_input], graph_embedding

# Step 3: Combine into Full Model
def create_model(n_nodes, n_features):
    text_inputs, text_output = create_text_branch()
    social_inputs, social_output = create_social_branch(n_nodes, n_features)

    # Concatenate text and social outputs
    combined = layers.concatenate([text_output, social_output])
    x = layers.Dense(128, activation='relu')(combined)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = Model(inputs=text_inputs + social_inputs, outputs=outputs)
    return model

# Create and compile model
n_features = social_context.shape[1]  # 3
model = create_model(n_nodes, n_features)
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=2e-5),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Step 4: Prepare Inputs for Training
batch_size = 16
n_samples = len(labels)  # 1546

# Tile graph inputs to match batch size
node_features_tiled = tf.tile(node_features_tf[None, ...], [batch_size, 1, 1])  # (batch_size, 1546, 3)
adjacency_tiled = tf.tile(adjacency_tf[None, ...], [batch_size, 1, 1])          # (batch_size, 1546, 1546)

# Train the model
history = model.fit(
    [input_ids, attention_mask, node_features_tiled[:n_samples], adjacency_tiled[:n_samples]],
    labels_tf,
    epochs=10,
    batch_size=batch_size,
    validation_split=0.2
)s



Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ValueError: Exception encountered when calling GCNConv.call().

[1mCould not automatically infer the output shape / dtype of 'gcn_conv_12' (of type GCNConv). Either the `GCNConv.call()` method is incorrect, or you need to implement the `GCNConv.compute_output_spec() / compute_output_shape()` method. Error encountered:

Tried to convert 'y' to a tensor and failed. Error: None values not supported.[0m

Arguments received by GCNConv.call():
  • args=(['<KerasTensor shape=(None, 1933, 3), dtype=float32, sparse=False, name=keras_tensor_138>', '<KerasTensor shape=(None, 1933, 1933), dtype=float32, sparse=False, name=keras_tensor_139>'],)
  • kwargs={'mask': ['None', 'None']}

In [21]:
import tensorflow as tf
from spektral.layers import GCNConv

class GCNConvWrapper(tf.keras.layers.Layer):
    def __init__(self, channels, **kwargs):
        super(GCNConvWrapper, self).__init__(**kwargs)
        self.channels = channels
        # Initialize the GCNConv layer with the specified number of output channels
        self.gcn = GCNConv(channels=channels, activation='relu')

    def build(self, input_shape):
        # Input_shape is a list: [node_features_shape, adjacency_shape]
        node_shape, adj_shape = input_shape
        # Build the GCNConv layer with the input shapes
        self.gcn.build([node_shape, adj_shape])
        super(GCNConvWrapper, self).build(input_shape)

    def call(self, inputs):
        # Inputs is a list: [node_features, adjacency]
        node_features, adjacency = inputs
        # Pass the inputs to the GCNConv layer
        return self.gcn([node_features, adjacency])

    def compute_output_shape(self, input_shape):
        # Input_shape: [(batch, n_nodes, n_features), (batch, n_nodes, n_nodes)]
        batch_size = input_shape[0][0]  # None (batch size)
        n_nodes = input_shape[0][1]     # 1933 in your case
        # Output shape: (batch_size, n_nodes, channels)
        return (batch_size, n_nodes, self.channels)

In [22]:
from tensorflow.keras import layers, Model

# Define inputs
node_input = layers.Input(shape=(1933, 3), dtype=tf.float32, name="node_input")
adj_input = layers.Input(shape=(1933, 1933), dtype=tf.float32, name="adj_input")

# Apply the GCNConvWrapper
gcn1 = GCNConvWrapper(channels=64)([node_input, adj_input])
gcn1 = layers.Dropout(0.3)(gcn1)
gcn2 = GCNConvWrapper(channels=32)([gcn1, adj_input])

# Pool the graph into a single embedding
graph_embedding = layers.GlobalAvgPool1D()(gcn2)
output = layers.Dense(128, activation='relu')(graph_embedding)

# Build the model
model = Model(inputs=[node_input, adj_input], outputs=output)

# Check the model summary
model.summary()