In [10]:
import os
import sys
import subprocess
import shutil

layer_mapping = "bottom"

# Show current working directory
print(f"Current working directory: {os.getcwd()}")
print()
pruning = True

# Build directory names with conditional suffix - USE os.path.join()
prune_suffix = "_pruned" if pruning else ""
temp_output_dir = os.path.join(os.getcwd(), "MRPC", f"temp_ts_tinybert_{layer_mapping}{prune_suffix}")
final_output_dir = os.path.join(os.getcwd(), "MRPC", f"ts_tinybert_{layer_mapping}{prune_suffix}")

# Clean up existing directories to start fresh
if os.path.exists(temp_output_dir):
    print(f"Removing existing directory: {temp_output_dir}")
    shutil.rmtree(temp_output_dir)
if os.path.exists(final_output_dir):
    print(f"Removing existing directory: {final_output_dir}")
    shutil.rmtree(final_output_dir)

os.makedirs(temp_output_dir, exist_ok=True)
os.makedirs(final_output_dir, exist_ok=True)

# Build command arguments for Step 1
step1_args = [
    sys.executable, "task_distill.py",
    "--teacher_model", "MRPC/ts_teacher_bert",
    "--student_model", "MRPC/student_tinybert",
    "--data_dir", "glue_data/MRPC",
    "--task_name", "MRPC",
    "--output_dir", temp_output_dir,
    "--max_seq_length", "128",
    "--train_batch_size", "32",
    "--learning_rate", "5e-5",
    "--num_train_epochs", "20",
    "--do_lower_case", 
    '--layer_map', layer_mapping,
    "--prune"
]

print("=" * 60)
print("STEP 1: Intermediate Layer Distillation")
print("=" * 60)
print(f"Output directory: {temp_output_dir}")

# Run with stderr merged into stdout so all output is visible
result1 = subprocess.run(step1_args, capture_output=True, text=True)
# Print all output
if result1.stdout:
    print("STDOUT:")
    print(result1.stdout)
if result1.stderr:
    print("\nSTDERR:")
    print(result1.stderr)

print(f"\nExit code: {result1.returncode}")

if result1.returncode == 0:   
    # Step 2 only runs if Step 1 succeeded
    step2_args = [
        sys.executable, "task_distill.py",
        "--pred_distill",
        "--teacher_model", "MRPC/ts_teacher_bert",
        "--student_model", temp_output_dir,
        "--data_dir", "glue_data/MRPC",
        "--task_name", "MRPC",
        "--output_dir", final_output_dir,
        "--do_lower_case",
        "--learning_rate", "2e-5",
        "--num_train_epochs", "3",
        "--eval_step", "100",
        "--max_seq_length", "128",
        "--train_batch_size", "32",
        '--layer_map', layer_mapping,
        "--prune"
    ]
    result2 = subprocess.run(step2_args, stderr=subprocess.STDOUT, text=True)
    
    print(f"\nExit code: {result2.returncode}")
    
    if result2.returncode == 0:
        print("\n" + "=" * 60)
        print("✓✓ Both steps completed successfully!")
        print(f"Final model saved to: {final_output_dir}")
        print("=" * 60)
    else:
        print("\n" + "=" * 60)
        print(f"✗ Step 2 failed with exit code: {result2.returncode}")
        print("=" * 60)
else:
    print("\n" + "=" * 60)
    print(f"✗ Step 1 failed with exit code: {result1.returncode}")
    print("Step 2 will not run.")
    print("=" * 60)


if result2.returncode == 0: 
    eval_args = [
        sys.executable, "task_distill.py",
        "--do_eval",
        "--student_model", final_output_dir,
        "--data_dir", "glue_data/MRPC",
        "--task_name", "MRPC",
        "--output_dir", final_output_dir + "/eval_results",
        "--do_lower_case",
        "--eval_batch_size", "32",
        "--max_seq_length", "128"
    ]

    result3 = subprocess.run(eval_args, capture_output=True, text=True)
    print(result3.stdout)
    if result3.stderr:
        print(result3.stderr)
    print(f"Exit code: {result3.returncode}")
else:
    print("no eval")

Current working directory: c:\Users\povhi\OneDrive\01_Uni\03_TU Wien\MSc Data Science\03_Deep Learning for NLP\project\dlnlp-2025WS-project-group13\TinyBERT

STEP 1: Intermediate Layer Distillation
Output directory: c:\Users\povhi\OneDrive\01_Uni\03_TU Wien\MSc Data Science\03_Deep Learning for NLP\project\dlnlp-2025WS-project-group13\TinyBERT\MRPC\temp_ts_tinybert_bottom_pruned
STDOUT:
01/31 08:12:32 PM The args: Namespace(data_dir='glue_data/MRPC', teacher_model='MRPC/ts_teacher_bert', student_model='MRPC/student_tinybert', task_name='MRPC', output_dir='c:\\Users\\povhi\\OneDrive\\01_Uni\\03_TU Wien\\MSc Data Science\\03_Deep Learning for NLP\\project\\dlnlp-2025WS-project-group13\\TinyBERT\\MRPC\\temp_ts_tinybert_bottom_pruned', cache_dir='', max_seq_length=128, do_eval=False, do_lower_case=True, train_batch_size=32, eval_batch_size=32, learning_rate=5e-05, weight_decay=0.0001, num_train_epochs=20.0, warmup_proportion=0.1, no_cuda=False, seed=42, gradient_accumulation_steps=1, aug_t

In [5]:
eval_args = [
    sys.executable, "task_distill.py",
    "--do_eval",
    "--student_model", final_output_dir,
    "--data_dir", "glue_data/MRPC",
    "--task_name", "MRPC",
    "--output_dir", final_output_dir + "/eval_results",
    "--do_lower_case",
    "--eval_batch_size", "32",
    "--max_seq_length", "128"
]

result3 = subprocess.run(eval_args, capture_output=True, text=True)
print(result3.stdout)
if result3.stderr:
    print(result3.stderr)
print(f"Exit code: {result3.returncode}")

01/29 08:43:06 PM The args: Namespace(data_dir='glue_data/MRPC', teacher_model=None, student_model='MRPC/ts_tinybert_bottom_pruned', task_name='MRPC', output_dir='MRPC/ts_tinybert_bottom_pruned/eval_results', cache_dir='', max_seq_length=128, do_eval=True, do_lower_case=True, train_batch_size=32, eval_batch_size=32, learning_rate=5e-05, weight_decay=0.0001, num_train_epochs=3.0, warmup_proportion=0.1, no_cuda=False, seed=42, gradient_accumulation_steps=1, aug_train=False, eval_step=50, pred_distill=False, data_url='', temperature=1.0, layer_map='uniform', prune=False, prune_amount=0.2, pruning_steps=6)
01/29 08:43:06 PM device: cuda n_gpu: 1
01/29 08:43:06 PM Writing example 0 of 408
01/29 08:43:06 PM *** Example ***
01/29 08:43:06 PM guid: dev-1
01/29 08:43:06 PM tokens: [CLS] he said the foods ##er ##vic ##e pie business doesn ' t fit the company ' s long - term growth strategy . [SEP] " the foods ##er ##vic ##e pie business does not fit our long - term growth strategy . [SEP]
01/29 