In [1]:
pip install transformers pandas scikit-learn torch flask


Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, TrainingArguments, Trainer
import torch

# Load dataset
df_fake = pd.read_csv("data/Fake.csv")
df_real = pd.read_csv("data/True.csv")

df_fake['label'] = 0
df_real['label'] = 1

df = pd.concat([df_fake, df_real]).sample(frac=1).reset_index(drop=True)
df['text'] = df['title'] + " " + df['text']
df = df[['text', 'label']]

# Split dataset
train_texts, val_texts = train_test_split(df, test_size=0.1)

# Tokenize
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_function(example):
    return tokenizer(example['text'], padding='max_length', truncation=True)

train_dataset = Dataset.from_pandas(train_texts).map(tokenize_function, batched=True)
val_dataset = Dataset.from_pandas(val_texts).map(tokenize_function, batched=True)

# Remove unused columns
train_dataset = train_dataset.remove_columns(["text", "__index_level_0__"])
val_dataset = val_dataset.remove_columns(["text", "__index_level_0__"])
train_dataset.set_format("torch")
val_dataset.set_format("torch")

# Model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

# Training
training_args = TrainingArguments(
    output_dir="model/bert_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    logging_dir="logs",
    logging_steps=10,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

trainer.train()
trainer.save_model("model/bert_model")
tokenizer.save_pretrained("model/tokenizer")


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

RuntimeError: Failed to import transformers.trainer because of the following error (look up to see its traceback):
Failed to import transformers.integrations.integration_utils because of the following error (look up to see its traceback):
Failed to import transformers.modeling_tf_utils because of the following error (look up to see its traceback):
Your currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.

In [5]:
pip install tf-keras

Collecting tf-keras
  Downloading tf_keras-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Downloading tf_keras-2.19.0-py3-none-any.whl (1.7 MB)
   ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
   ------------ --------------------------- 0.5/1.7 MB 2.8 MB/s eta 0:00:01
   ------------------------------ --------- 1.3/1.7 MB 4.0 MB/s eta 0:00:01
   ------------------------------ --------- 1.3/1.7 MB 4.0 MB/s eta 0:00:01
   ---------------------------------------- 1.7/1.7 MB 2.0 MB/s eta 0:00:00
Installing collected packages: tf-keras
Successfully installed tf-keras-2.19.0
Note: you may need to restart the kernel to use updated packages.


In [9]:
!pip uninstall keras -y
!pip install tf-keras


Found existing installation: keras 3.9.0
Uninstalling keras-3.9.0:
  Successfully uninstalled keras-3.9.0
Collecting keras>=3.5.0 (from tensorflow<2.20,>=2.19->tf-keras)
  Downloading keras-3.9.2-py3-none-any.whl.metadata (6.1 kB)
Downloading keras-3.9.2-py3-none-any.whl (1.3 MB)
   ---------------------------------------- 0.0/1.3 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.3 MB ? eta -:--:--
   ------- -------------------------------- 0.3/1.3 MB ? eta -:--:--
   --------------- ------------------------ 0.5/1.3 MB 1.7 MB/s eta 0:00:01
   ---------------------------------------- 1.3/1.3 MB 2.0 MB/s eta 0:00:00
Installing collected packages: keras
Successfully installed keras-3.9.2


In [1]:
!pip install transformers tensorflow==2.14




ERROR: Could not find a version that satisfies the requirement tensorflow==2.14 (from versions: 2.16.0rc0, 2.16.1, 2.16.2, 2.17.0rc0, 2.17.0rc1, 2.17.0, 2.17.1, 2.18.0rc0, 2.18.0rc1, 2.18.0rc2, 2.18.0, 2.18.1, 2.19.0rc0, 2.19.0)
ERROR: No matching distribution found for tensorflow==2.14


In [3]:
!pip uninstall keras tensorflow -y
!pip install torch transformers datasets scikit-learn flask


Found existing installation: keras 3.9.2
Uninstalling keras-3.9.2:
  Successfully uninstalled keras-3.9.2
Found existing installation: tensorflow 2.19.0
Uninstalling tensorflow-2.19.0:
  Successfully uninstalled tensorflow-2.19.0


In [9]:
import os
os.environ["TRANSFORMERS_NO_TF"] = "1"


In [5]:
import os
os.environ["TRANSFORMERS_NO_TF"] = "1"

import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch

# Load the data (adjust path if needed)
df_fake = pd.read_csv("Fake.csv")
df_real = pd.read_csv("True.csv")

df_fake['label'] = 0
df_real['label'] = 1

df = pd.concat([df_fake, df_real]).sample(frac=1).reset_index(drop=True)
df['text'] = df['title'] + " " + df['text']
df = df[['text', 'label']]

# Split the dataset
train_texts, val_texts = train_test_split(df, test_size=0.1)

# Tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def tokenize_function(example):
    return tokenizer(example['text'], padding="max_length", truncation=True, max_length=512)

train_dataset = Dataset.from_pandas(train_texts).map(tokenize_function, batched=True)
val_dataset = Dataset.from_pandas(val_texts).map(tokenize_function, batched=True)

# Cleanup
train_dataset = train_dataset.remove_columns(["text", "__index_level_0__"])
val_dataset = val_dataset.remove_columns(["text", "__index_level_0__"])
train_dataset.set_format("torch")
val_dataset.set_format("torch")

# Load model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

# Training setup
training_args = TrainingArguments(
    output_dir="bert_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    logging_dir="logs",
    logging_steps=10,
    load_best_model_at_end=True,
    disable_tqdm=True
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Train!
trainer.train()

# Save model + tokenizer
model.save_pretrained("bert_model")
tokenizer.save_pretrained("tokenizer")


Map:   0%|          | 0/40408 [00:00<?, ? examples/s]

Map:   0%|          | 0/4490 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ImportError: Using the `Trainer` with `PyTorch` requires `accelerate>=0.26.0`: Please run `pip install transformers[torch]` or `pip install 'accelerate>=0.26.0'`

In [None]:
import os
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
