In [15]:
import subprocess

In [16]:
import os
import sys

augmented = False
layer_mapping = "bottom"

# Show current working directory
print(f"Current working directory: {os.getcwd()}")
print()

# Build directory names with conditional suffix
aug_suffix = "_aug" if augmented else ""
temp_output_dir = f"MRPC/models/temp_ts_tinybert_{layer_mapping}{aug_suffix}"
final_output_dir = f"MRPC/models/ts_tinybert_{layer_mapping}{aug_suffix}"

# Build command arguments for Step 1
step1_args = [
    sys.executable, "task_distill.py",  # Use sys.executable to get current Python interpreter
    "--teacher_model", "MRPC/models/teacher_bert_mrpc",
    "--student_model", "MRPC/models/student_tinybert",
    "--data_dir", "MRPC/glue_data/MRPC",
    "--task_name", "MRPC",
    "--output_dir", temp_output_dir,
    "--max_seq_length", "128",
    "--train_batch_size", "32",
    "--learning_rate", "5e-5",
    "--num_train_epochs", "20",
    "--do_lower_case"
]

# Add --aug_train flag if augmented is True
if augmented:
    step1_args.append("--aug_train")

print("=" * 60)
print("STEP 1: Intermediate Layer Distillation")
print("=" * 60)
print(f"Output directory: {temp_output_dir}")
print(f"Command: {' '.join(step1_args)}")
print()

# Run with stderr merged into stdout so all output is visible
result1 = subprocess.run(step1_args, capture_output=True, text=True)
# Print all output
if result1.stdout:
    print("STDOUT:")
    print(result1.stdout)
if result1.stderr:
    print("\nSTDERR:")
    print(result1.stderr)

print(f"\nExit code: {result1.returncode}")

if result1.returncode == 0:
    print("\n" + "=" * 60)
    print("✓ Step 1 completed successfully!")
    print("=" * 60)
    print("STEP 2: Prediction Layer Distillation")
    print("=" * 60)
    print(f"Output directory: {final_output_dir}")
    print()
    
    # Step 2 only runs if Step 1 succeeded
    step2_args = [
        sys.executable, "task_distill.py",
        "--pred_distill",
        "--teacher_model", "MRPC/models/teacher_bert_mrpc",
        "--student_model", temp_output_dir,
        "--data_dir", "MRPC/glue_data/MRPC",
        "--task_name", "MRPC",
        "--output_dir", final_output_dir,
        "--do_lower_case",
        "--learning_rate", "2e-5",
        "--num_train_epochs", "3",
        "--eval_step", "100",
        "--max_seq_length", "128",
        "--train_batch_size", "32"
    ]
    
    # Add --aug_train flag if augmented is True
    if augmented:
        step2_args.append("--aug_train")
    
    print(f"Command: {' '.join(step2_args)}")
    print()
    
    result2 = subprocess.run(step2_args, stderr=subprocess.STDOUT, text=True)
    
    print(f"\nExit code: {result2.returncode}")
    
    if result2.returncode == 0:
        print("\n" + "=" * 60)
        print("✓✓ Both steps completed successfully!")
        print(f"Final model saved to: {final_output_dir}")
        print("=" * 60)
    else:
        print("\n" + "=" * 60)
        print(f"✗ Step 2 failed with exit code: {result2.returncode}")
        print("=" * 60)
else:
    print("\n" + "=" * 60)
    print(f"✗ Step 1 failed with exit code: {result1.returncode}")
    print("Step 2 will not run.")
    print("=" * 60)

Current working directory: c:\Users\povhi\OneDrive\01_Uni\03_TU Wien\MSc Data Science\03_Deep Learning for NLP\project\dlnlp-2025WS-project-group13\TinyBERT

STEP 1: Intermediate Layer Distillation
Output directory: MRPC/models/temp_ts_tinybert_bottom
Command: c:\Users\povhi\anaconda3\envs\dlnlp\python.exe task_distill.py --teacher_model MRPC/models/teacher_bert_mrpc --student_model MRPC/models/student_tinybert --data_dir MRPC/glue_data/MRPC --task_name MRPC --output_dir MRPC/models/temp_ts_tinybert_bottom --max_seq_length 128 --train_batch_size 32 --learning_rate 5e-5 --num_train_epochs 20 --do_lower_case

STDOUT:
01/23 10:06:27 PM The args: Namespace(data_dir='MRPC/glue_data/MRPC', teacher_model='MRPC/models/teacher_bert_mrpc', student_model='MRPC/models/student_tinybert', task_name='MRPC', output_dir='MRPC/models/temp_ts_tinybert_bottom', cache_dir='', max_seq_length=128, do_eval=False, do_lower_case=True, train_batch_size=32, eval_batch_size=32, learning_rate=5e-05, weight_decay=0.

In [19]:
import sys
import subprocess
layer_mapping = ""
aug_suffix = "aug"
final_output_dir = f"MRPC/models/teacher_bert_mrpc"

eval_args = [
    sys.executable, "task_distill.py",
    "--do_eval",
    "--student_model", final_output_dir,
    "--data_dir", "MRPC/glue_data/MRPC",
    "--task_name", "MRPC",
    "--output_dir", final_output_dir + "/eval_results",
    "--do_lower_case",
    "--eval_batch_size", "32",
    "--max_seq_length", "128"
]

result = subprocess.run(eval_args, capture_output=True, text=True)
print(result.stdout)
if result.stderr:
    print(result.stderr)
print(f"Exit code: {result.returncode}")

01/24 01:05:47 PM The args: Namespace(data_dir='MRPC/glue_data/MRPC', teacher_model=None, student_model='MRPC/models/teacher_bert_mrpc', task_name='MRPC', output_dir='MRPC/models/teacher_bert_mrpc/eval_results', cache_dir='', max_seq_length=128, do_eval=True, do_lower_case=True, train_batch_size=32, eval_batch_size=32, learning_rate=5e-05, weight_decay=0.0001, num_train_epochs=3.0, warmup_proportion=0.1, no_cuda=False, seed=42, gradient_accumulation_steps=1, aug_train=False, eval_step=50, pred_distill=False, data_url='', temperature=1.0, layer_map='uniform')
01/24 01:05:47 PM device: cuda n_gpu: 1
01/24 01:05:47 PM Writing example 0 of 408
01/24 01:05:47 PM *** Example ***
01/24 01:05:47 PM guid: dev-1
01/24 01:05:47 PM tokens: [CLS] he said the foods ##er ##vic ##e pie business doesn ' t fit the company ' s long - term growth strategy . [SEP] " the foods ##er ##vic ##e pie business does not fit our long - term growth strategy . [SEP]
01/24 01:05:47 PM input_ids: 101 2002 2056 1996 944

In [None]:
import torch

if torch.cuda.is_available():
    device = torch.cuda.current_device()
    print(f"GPU Name: {torch.cuda.get_device_name(device)}")
    
    capability = torch.cuda.get_device_capability(device)
    print(f"Compute Capability: {capability}")
    
    major, minor = capability
    
    # Check for INT8 Tensor Core support
    if major >= 7 and minor >= 5:
        print("✓ INT8 Tensor Cores: SUPPORTED")
    elif major >= 7:
        print("⚠ FP16 Tensor Cores only (no INT8)")
    else:
        print("✗ No Tensor Cores")
else:
    print("No CUDA GPU available")

GPU Name: NVIDIA GeForce RTX 3070 Laptop GPU
Compute Capability: (8, 6)
✓ INT8 Tensor Cores: SUPPORTED


: 