# BERT Training - Bloom's Taxonomy Classifier

Fine-tune BERT for educational question classification.

**Requirements:** GPU recommended for faster training.

In [15]:
# IMPORTANT: Disable TensorFlow to avoid dependency conflicts
import os
os.environ['USE_TF'] = '0'
os.environ['USE_TORCH'] = '1'
os.environ['TRANSFORMERS_NO_TF'] = '1'

In [16]:
import sys

# Navigate to project root (parent of notebooks folder)
if os.path.basename(os.getcwd()) == 'notebooks':
    os.chdir('..')

# Add src to path
if 'src' not in sys.path and os.path.exists('src'):
    sys.path.insert(0, 'src')

print(f'Working directory: {os.getcwd()}')

Working directory: c:\Users\MSI\Desktop\FastText


In [17]:
import torch
import pandas as pd

print(f'PyTorch version: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
else:
    print('Running on CPU (training will be slower)')

PyTorch version: 2.9.1+cpu
CUDA available: False
Running on CPU (training will be slower)


In [19]:
from preprocessing import load_and_preprocess_data, load_config
from bert_classifier import SemanticBERT, train_bert_model
from evaluate import ModelEvaluator, evaluate_bert_model

print('Modules imported successfully!')

RuntimeError: Failed to import transformers.models.bert.modeling_bert because of the following error (look up to see its traceback):
module 'ml_dtypes' has no attribute 'float8_e3m4'

In [None]:
# Load configuration
config = load_config('config/config.yaml')

# Load and preprocess data
train_df, val_df, test_df, label_to_id, id_to_label = load_and_preprocess_data(
    'data/raw/bloom_questions.csv',
    text_column='question',
    label_column='level',
    config_path='config/config.yaml'
)

print(f'Classes: {list(label_to_id.keys())}')
print(f'Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}')

In [None]:
# Training config - reduce for faster training
config['bert']['epochs'] = 3
config['bert']['batch_size'] = 8

print('Training config:')
print(f"  Epochs: {config['bert']['epochs']}")
print(f"  Batch size: {config['bert']['batch_size']}")

In [None]:
# Train BERT model
print('Starting BERT training...')

model = train_bert_model(
    train_df,
    val_df,
    label_to_id,
    config,
    save_dir='models/bert'
)

print('Training complete!')

In [None]:
# Evaluate on test set
labels = ['Remember', 'Understand', 'Apply', 'Analyze', 'Evaluate', 'Create']
evaluator = ModelEvaluator(labels)

metrics = evaluate_bert_model(model, test_df, evaluator)
evaluator.print_results('Semantic-BERT')

In [None]:
# Plot confusion matrix
evaluator.plot_confusion_matrix('Semantic-BERT', figsize=(10, 8))

In [None]:
# Test predictions
test_questions = [
    'What is the capital of France?',
    'Explain how photosynthesis works.',
    'Calculate the area of a circle.',
    'Compare democracy and monarchy.',
    'Evaluate the effectiveness of vaccines.',
    'Design an experiment to test gravity.'
]

for q in test_questions:
    result = model.predict_single(q)
    print(f"[{result['level']:12}] ({result['confidence']:5.0%}) {q}")