# BBC Text Representations - Classification

**Roll Number:** SE22UARI195

**Tasks:**
1. Load all 9 representations (4 sparse + 5 dense)
2. Train Logistic Regression with C tuning on DEV
3. Evaluate on TEST: Macro-F1 (primary) and Accuracy
4. Generate preds_test.csv for best representation
5. Compare all methods

---

## 1. Setup & Imports

In [1]:
# Core libraries
import pandas as pd
import numpy as np
import pickle
import json
from pathlib import Path
import time

# Scikit-learn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, accuracy_score, classification_report

# Progress bar
from tqdm.notebook import tqdm

# Warnings
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Imports successful!")

‚úÖ Imports successful!


In [2]:
# Configuration
ROLL = "SE22UARI195"
CACHE_DIR = Path("../cache")
MODELS_DIR = Path("../models")
OUTPUTS_DIR = Path("../outputs")

# Create outputs directory
OUTPUTS_DIR.mkdir(exist_ok=True)

print(f"Roll Number: {ROLL}")
print(f"Cache Directory: {CACHE_DIR}")
print(f"Models Directory: {MODELS_DIR}")
print(f"Outputs Directory: {OUTPUTS_DIR}")

Roll Number: SE22UARI195
Cache Directory: ../cache
Models Directory: ../models
Outputs Directory: ../outputs


## 2. Load Preprocessed Data & Labels

In [3]:
print("üìÇ Loading preprocessed data...\n")

with open(CACHE_DIR / 'train_processed.pkl', 'rb') as f:
    train_df = pickle.load(f)

with open(CACHE_DIR / 'dev_processed.pkl', 'rb') as f:
    dev_df = pickle.load(f)

with open(CACHE_DIR / 'test_processed.pkl', 'rb') as f:
    test_df = pickle.load(f)

# Extract labels
y_train = train_df['label'].values
y_dev = dev_df['label'].values
y_test = test_df['label'].values

print(f"‚úÖ TRAIN: {len(train_df)} documents, {len(set(y_train))} classes")
print(f"‚úÖ DEV: {len(dev_df)} documents")
print(f"‚úÖ TEST: {len(test_df)} documents")
print(f"\nClasses: {sorted(set(y_train))}")

üìÇ Loading preprocessed data...

‚úÖ TRAIN: 1335 documents, 5 classes
‚úÖ DEV: 445 documents
‚úÖ TEST: 445 documents

Classes: ['business', 'entertainment', 'politics', 'sport', 'tech']


## 3. Load All Representations

In [4]:
print("\nüìä Loading sparse representations...\n")

# Load sparse matrices
from scipy.sparse import load_npz

representations = {}

# One-Hot Encoding
representations['ohe'] = {
    'train': load_npz(MODELS_DIR / 'X_train_ohe.npz'),
    'dev': load_npz(MODELS_DIR / 'X_dev_ohe.npz'),
    'test': load_npz(MODELS_DIR / 'X_test_ohe.npz')
}
print(f"‚úÖ OHE: {representations['ohe']['train'].shape}")

# Bag-of-Words
representations['bow'] = {
    'train': load_npz(MODELS_DIR / 'X_train_bow.npz'),
    'dev': load_npz(MODELS_DIR / 'X_dev_bow.npz'),
    'test': load_npz(MODELS_DIR / 'X_test_bow.npz')
}
print(f"‚úÖ BOW: {representations['bow']['train'].shape}")

# N-grams
representations['ngram'] = {
    'train': load_npz(MODELS_DIR / 'X_train_ngram.npz'),
    'dev': load_npz(MODELS_DIR / 'X_dev_ngram.npz'),
    'test': load_npz(MODELS_DIR / 'X_test_ngram.npz')
}
print(f"‚úÖ N-grams: {representations['ngram']['train'].shape}")

# TF-IDF
representations['tfidf'] = {
    'train': load_npz(MODELS_DIR / 'X_train_tfidf.npz'),
    'dev': load_npz(MODELS_DIR / 'X_dev_tfidf.npz'),
    'test': load_npz(MODELS_DIR / 'X_test_tfidf.npz')
}
print(f"‚úÖ TF-IDF: {representations['tfidf']['train'].shape}")


üìä Loading sparse representations...

‚úÖ OHE: (1335, 2000)
‚úÖ BOW: (1335, 11515)
‚úÖ N-grams: (1335, 18625)
‚úÖ TF-IDF: (1335, 11515)


In [5]:
print("\nüìä Loading dense representations...\n")

# Word2Vec Skip-gram NS
representations['w2v_sg_ns'] = {
    'train': np.load(MODELS_DIR / 'X_train_w2v_sg_ns.npy'),
    'dev': np.load(MODELS_DIR / 'X_dev_w2v_sg_ns.npy'),
    'test': np.load(MODELS_DIR / 'X_test_w2v_sg_ns.npy')
}
print(f"‚úÖ W2V Skip-gram NS: {representations['w2v_sg_ns']['train'].shape}")

# Word2Vec CBOW NS
representations['w2v_cbow_ns'] = {
    'train': np.load(MODELS_DIR / 'X_train_w2v_cbow_ns.npy'),
    'dev': np.load(MODELS_DIR / 'X_dev_w2v_cbow_ns.npy'),
    'test': np.load(MODELS_DIR / 'X_test_w2v_cbow_ns.npy')
}
print(f"‚úÖ W2V CBOW NS: {representations['w2v_cbow_ns']['train'].shape}")

# Word2Vec Skip-gram HS
representations['w2v_sg_hs'] = {
    'train': np.load(MODELS_DIR / 'X_train_w2v_sg_hs.npy'),
    'dev': np.load(MODELS_DIR / 'X_dev_w2v_sg_hs.npy'),
    'test': np.load(MODELS_DIR / 'X_test_w2v_sg_hs.npy')
}
print(f"‚úÖ W2V Skip-gram HS: {representations['w2v_sg_hs']['train'].shape}")

# Word2Vec CBOW HS
representations['w2v_cbow_hs'] = {
    'train': np.load(MODELS_DIR / 'X_train_w2v_cbow_hs.npy'),
    'dev': np.load(MODELS_DIR / 'X_dev_w2v_cbow_hs.npy'),
    'test': np.load(MODELS_DIR / 'X_test_w2v_cbow_hs.npy')
}
print(f"‚úÖ W2V CBOW HS: {representations['w2v_cbow_hs']['train'].shape}")

# GloVe
representations['glove'] = {
    'train': np.load(MODELS_DIR / 'X_train_glove.npy'),
    'dev': np.load(MODELS_DIR / 'X_dev_glove.npy'),
    'test': np.load(MODELS_DIR / 'X_test_glove.npy')
}
print(f"‚úÖ GloVe: {representations['glove']['train'].shape}")


üìä Loading dense representations...

‚úÖ W2V Skip-gram NS: (1335, 100)
‚úÖ W2V CBOW NS: (1335, 100)
‚úÖ W2V Skip-gram HS: (1335, 100)
‚úÖ W2V CBOW HS: (1335, 100)
‚úÖ GloVe: (1335, 100)


## 4. Classification Helper Functions

In [6]:
def train_and_evaluate(X_train, y_train, X_dev, y_dev, X_test, y_test, 
                       method_name, C_values=[0.01, 0.1, 1.0, 10.0, 100.0]):
    """
    Train Logistic Regression with hyperparameter tuning on DEV.
    Report best performance on TEST.
    
    Returns:
        dict with best_C, macro_f1, accuracy, predictions
    """
    print(f"\n{'='*60}")
    print(f"Training: {method_name}")
    print(f"{'='*60}")
    
    best_C = None
    best_f1_dev = -1
    best_model = None
    
    # Tune C on DEV set
    print(f"\nüîç Tuning hyperparameter C on DEV set...")
    for C in C_values:
        clf = LogisticRegression(
            C=C,
            max_iter=1000,
            random_state=42,
            solver='lbfgs',
            multi_class='multinomial'
        )
        
        clf.fit(X_train, y_train)
        y_pred_dev = clf.predict(X_dev)
        
        f1_dev = f1_score(y_dev, y_pred_dev, average='macro')
        acc_dev = accuracy_score(y_dev, y_pred_dev)
        
        print(f"  C={C:7.2f} -> DEV Macro-F1: {f1_dev:.4f}, Accuracy: {acc_dev:.4f}")
        
        if f1_dev > best_f1_dev:
            best_f1_dev = f1_dev
            best_C = C
            best_model = clf
    
    print(f"\n‚úÖ Best C: {best_C} (DEV Macro-F1: {best_f1_dev:.4f})")
    
    # Evaluate on TEST set
    print(f"\nüìä Evaluating on TEST set...")
    y_pred_test = best_model.predict(X_test)
    
    f1_test = f1_score(y_test, y_pred_test, average='macro')
    acc_test = accuracy_score(y_test, y_pred_test)
    
    print(f"\nüéØ TEST Results:")
    print(f"   Macro-F1: {f1_test:.4f}")
    print(f"   Accuracy: {acc_test:.4f}")
    
    # Per-class results
    print(f"\nüìã Classification Report:")
    print(classification_report(y_test, y_pred_test, digits=4))
    
    return {
        'method': method_name,
        'best_C': best_C,
        'macro_f1': float(f1_test),
        'accuracy': float(acc_test),
        'predictions': y_pred_test
    }

print("‚úÖ Helper functions defined!")

‚úÖ Helper functions defined!


## 5. Train Classifiers on All Representations

In [7]:
print("\n" + "="*80)
print("TRAINING CLASSIFIERS ON ALL REPRESENTATIONS")
print("="*80)

results_classification = {}


TRAINING CLASSIFIERS ON ALL REPRESENTATIONS


### 5.1 Sparse Methods

In [8]:
# One-Hot Encoding
result = train_and_evaluate(
    representations['ohe']['train'], y_train,
    representations['ohe']['dev'], y_dev,
    representations['ohe']['test'], y_test,
    method_name='One-Hot Encoding (OHE)'
)
results_classification['ohe'] = {
    'macro_f1': result['macro_f1'],
    'accuracy': result['accuracy']
}


Training: One-Hot Encoding (OHE)

üîç Tuning hyperparameter C on DEV set...
  C=   0.01 -> DEV Macro-F1: 0.9519, Accuracy: 0.9528
  C=   0.10 -> DEV Macro-F1: 0.9610, Accuracy: 0.9618
  C=   1.00 -> DEV Macro-F1: 0.9635, Accuracy: 0.9640
  C=  10.00 -> DEV Macro-F1: 0.9729, Accuracy: 0.9730
  C= 100.00 -> DEV Macro-F1: 0.9656, Accuracy: 0.9663

‚úÖ Best C: 10.0 (DEV Macro-F1: 0.9729)

üìä Evaluating on TEST set...

üéØ TEST Results:
   Macro-F1: 0.9654
   Accuracy: 0.9663

üìã Classification Report:
               precision    recall  f1-score   support

     business     0.9519    0.9706    0.9612       102
entertainment     0.9867    0.9610    0.9737        77
     politics     0.9759    0.9643    0.9701        84
        sport     0.9714    1.0000    0.9855       102
         tech     0.9487    0.9250    0.9367        80

     accuracy                         0.9663       445
    macro avg     0.9669    0.9642    0.9654       445
 weighted avg     0.9664    0.9663    0.9662    

In [9]:
# Bag-of-Words
result = train_and_evaluate(
    representations['bow']['train'], y_train,
    representations['bow']['dev'], y_dev,
    representations['bow']['test'], y_test,
    method_name='Bag-of-Words (BOW)'
)
results_classification['bow'] = {
    'macro_f1': result['macro_f1'],
    'accuracy': result['accuracy']
}


Training: Bag-of-Words (BOW)

üîç Tuning hyperparameter C on DEV set...
  C=   0.01 -> DEV Macro-F1: 0.9612, Accuracy: 0.9618
  C=   0.10 -> DEV Macro-F1: 0.9636, Accuracy: 0.9640
  C=   1.00 -> DEV Macro-F1: 0.9638, Accuracy: 0.9640
  C=  10.00 -> DEV Macro-F1: 0.9611, Accuracy: 0.9618
  C= 100.00 -> DEV Macro-F1: 0.9591, Accuracy: 0.9596

‚úÖ Best C: 1.0 (DEV Macro-F1: 0.9638)

üìä Evaluating on TEST set...

üéØ TEST Results:
   Macro-F1: 0.9683
   Accuracy: 0.9685

üìã Classification Report:
               precision    recall  f1-score   support

     business     0.9604    0.9510    0.9557       102
entertainment     0.9870    0.9870    0.9870        77
     politics     0.9759    0.9643    0.9701        84
        sport     0.9714    1.0000    0.9855       102
         tech     0.9494    0.9375    0.9434        80

     accuracy                         0.9685       445
    macro avg     0.9688    0.9680    0.9683       445
 weighted avg     0.9685    0.9685    0.9684       44

In [10]:
# N-grams
result = train_and_evaluate(
    representations['ngram']['train'], y_train,
    representations['ngram']['dev'], y_dev,
    representations['ngram']['test'], y_test,
    method_name='N-grams (1,2)'
)
results_classification['ngram'] = {
    'macro_f1': result['macro_f1'],
    'accuracy': result['accuracy']
}


Training: N-grams (1,2)

üîç Tuning hyperparameter C on DEV set...
  C=   0.01 -> DEV Macro-F1: 0.9633, Accuracy: 0.9640
  C=   0.10 -> DEV Macro-F1: 0.9636, Accuracy: 0.9640
  C=   1.00 -> DEV Macro-F1: 0.9635, Accuracy: 0.9640
  C=  10.00 -> DEV Macro-F1: 0.9611, Accuracy: 0.9618
  C= 100.00 -> DEV Macro-F1: 0.9611, Accuracy: 0.9618

‚úÖ Best C: 0.1 (DEV Macro-F1: 0.9636)

üìä Evaluating on TEST set...

üéØ TEST Results:
   Macro-F1: 0.9639
   Accuracy: 0.9640

üìã Classification Report:
               precision    recall  f1-score   support

     business     0.9505    0.9412    0.9458       102
entertainment     0.9870    0.9870    0.9870        77
     politics     0.9643    0.9643    0.9643        84
        sport     0.9714    1.0000    0.9855       102
         tech     0.9487    0.9250    0.9367        80

     accuracy                         0.9640       445
    macro avg     0.9644    0.9635    0.9639       445
 weighted avg     0.9639    0.9640    0.9639       445



In [11]:
# TF-IDF (save predictions for submission)
result = train_and_evaluate(
    representations['tfidf']['train'], y_train,
    representations['tfidf']['dev'], y_dev,
    representations['tfidf']['test'], y_test,
    method_name='TF-IDF'
)
results_classification['tfidf'] = {
    'macro_f1': result['macro_f1'],
    'accuracy': result['accuracy']
}

# Save TF-IDF predictions
tfidf_predictions = result['predictions']


Training: TF-IDF

üîç Tuning hyperparameter C on DEV set...
  C=   0.01 -> DEV Macro-F1: 0.2862, Accuracy: 0.4742
  C=   0.10 -> DEV Macro-F1: 0.9214, Accuracy: 0.9213
  C=   1.00 -> DEV Macro-F1: 0.9751, Accuracy: 0.9753
  C=  10.00 -> DEV Macro-F1: 0.9686, Accuracy: 0.9685
  C= 100.00 -> DEV Macro-F1: 0.9686, Accuracy: 0.9685

‚úÖ Best C: 1.0 (DEV Macro-F1: 0.9751)

üìä Evaluating on TEST set...

üéØ TEST Results:
   Macro-F1: 0.9639
   Accuracy: 0.9640

üìã Classification Report:
               precision    recall  f1-score   support

     business     0.9505    0.9412    0.9458       102
entertainment     0.9870    0.9870    0.9870        77
     politics     0.9643    0.9643    0.9643        84
        sport     0.9714    1.0000    0.9855       102
         tech     0.9487    0.9250    0.9367        80

     accuracy                         0.9640       445
    macro avg     0.9644    0.9635    0.9639       445
 weighted avg     0.9639    0.9640    0.9639       445



### 5.2 Dense Methods (TF-IDF Weighted)

In [12]:
# Word2Vec Skip-gram NS
result = train_and_evaluate(
    representations['w2v_sg_ns']['train'], y_train,
    representations['w2v_sg_ns']['dev'], y_dev,
    representations['w2v_sg_ns']['test'], y_test,
    method_name='Word2Vec Skip-gram NS + TF-IDF'
)
results_classification['w2v_ns_tfidf'] = {
    'macro_f1': result['macro_f1'],
    'accuracy': result['accuracy']
}


Training: Word2Vec Skip-gram NS + TF-IDF

üîç Tuning hyperparameter C on DEV set...
  C=   0.01 -> DEV Macro-F1: 0.9325, Accuracy: 0.9326
  C=   0.10 -> DEV Macro-F1: 0.9390, Accuracy: 0.9393
  C=   1.00 -> DEV Macro-F1: 0.9387, Accuracy: 0.9393
  C=  10.00 -> DEV Macro-F1: 0.9339, Accuracy: 0.9348
  C= 100.00 -> DEV Macro-F1: 0.9306, Accuracy: 0.9326

‚úÖ Best C: 0.1 (DEV Macro-F1: 0.9390)

üìä Evaluating on TEST set...

üéØ TEST Results:
   Macro-F1: 0.9306
   Accuracy: 0.9326

üìã Classification Report:
               precision    recall  f1-score   support

     business     0.9293    0.9020    0.9154       102
entertainment     0.9367    0.9610    0.9487        77
     politics     0.9070    0.9286    0.9176        84
        sport     0.9623    1.0000    0.9808       102
         tech     0.9200    0.8625    0.8903        80

     accuracy                         0.9326       445
    macro avg     0.9310    0.9308    0.9306       445
 weighted avg     0.9323    0.9326    0.9

In [13]:
# Word2Vec CBOW NS (optional - for comparison)
result = train_and_evaluate(
    representations['w2v_cbow_ns']['train'], y_train,
    representations['w2v_cbow_ns']['dev'], y_dev,
    representations['w2v_cbow_ns']['test'], y_test,
    method_name='Word2Vec CBOW NS + TF-IDF'
)
# Note: Not required in results.json but good for comparison


Training: Word2Vec CBOW NS + TF-IDF

üîç Tuning hyperparameter C on DEV set...
  C=   0.01 -> DEV Macro-F1: 0.9016, Accuracy: 0.9011
  C=   0.10 -> DEV Macro-F1: 0.9186, Accuracy: 0.9191
  C=   1.00 -> DEV Macro-F1: 0.9299, Accuracy: 0.9303
  C=  10.00 -> DEV Macro-F1: 0.9322, Accuracy: 0.9326
  C= 100.00 -> DEV Macro-F1: 0.9309, Accuracy: 0.9303

‚úÖ Best C: 10.0 (DEV Macro-F1: 0.9322)

üìä Evaluating on TEST set...

üéØ TEST Results:
   Macro-F1: 0.9250
   Accuracy: 0.9258

üìã Classification Report:
               precision    recall  f1-score   support

     business     0.9038    0.9216    0.9126       102
entertainment     0.9367    0.9610    0.9487        77
     politics     0.9059    0.9167    0.9112        84
        sport     0.9515    0.9608    0.9561       102
         tech     0.9324    0.8625    0.8961        80

     accuracy                         0.9258       445
    macro avg     0.9261    0.9245    0.9250       445
 weighted avg     0.9260    0.9258    0.9256 

In [14]:
# Word2Vec Skip-gram HS
result = train_and_evaluate(
    representations['w2v_sg_hs']['train'], y_train,
    representations['w2v_sg_hs']['dev'], y_dev,
    representations['w2v_sg_hs']['test'], y_test,
    method_name='Word2Vec Skip-gram HS + TF-IDF'
)
results_classification['w2v_hs_tfidf'] = {
    'macro_f1': result['macro_f1'],
    'accuracy': result['accuracy']
}


Training: Word2Vec Skip-gram HS + TF-IDF

üîç Tuning hyperparameter C on DEV set...
  C=   0.01 -> DEV Macro-F1: 0.9328, Accuracy: 0.9326
  C=   0.10 -> DEV Macro-F1: 0.9392, Accuracy: 0.9393
  C=   1.00 -> DEV Macro-F1: 0.9362, Accuracy: 0.9371
  C=  10.00 -> DEV Macro-F1: 0.9342, Accuracy: 0.9348
  C= 100.00 -> DEV Macro-F1: 0.9318, Accuracy: 0.9326

‚úÖ Best C: 0.1 (DEV Macro-F1: 0.9392)

üìä Evaluating on TEST set...

üéØ TEST Results:
   Macro-F1: 0.9327
   Accuracy: 0.9348

üìã Classification Report:
               precision    recall  f1-score   support

     business     0.9388    0.9020    0.9200       102
entertainment     0.9241    0.9481    0.9359        77
     politics     0.9070    0.9286    0.9176        84
        sport     0.9714    1.0000    0.9855       102
         tech     0.9221    0.8875    0.9045        80

     accuracy                         0.9348       445
    macro avg     0.9327    0.9332    0.9327       445
 weighted avg     0.9347    0.9348    0.9

In [15]:
# Word2Vec CBOW HS (optional - for comparison)
result = train_and_evaluate(
    representations['w2v_cbow_hs']['train'], y_train,
    representations['w2v_cbow_hs']['dev'], y_dev,
    representations['w2v_cbow_hs']['test'], y_test,
    method_name='Word2Vec CBOW HS + TF-IDF'
)
# Note: Not required in results.json but good for comparison


Training: Word2Vec CBOW HS + TF-IDF

üîç Tuning hyperparameter C on DEV set...
  C=   0.01 -> DEV Macro-F1: 0.9325, Accuracy: 0.9326
  C=   0.10 -> DEV Macro-F1: 0.9347, Accuracy: 0.9348
  C=   1.00 -> DEV Macro-F1: 0.9391, Accuracy: 0.9393
  C=  10.00 -> DEV Macro-F1: 0.9337, Accuracy: 0.9348
  C= 100.00 -> DEV Macro-F1: 0.9249, Accuracy: 0.9258

‚úÖ Best C: 1.0 (DEV Macro-F1: 0.9391)

üìä Evaluating on TEST set...

üéØ TEST Results:
   Macro-F1: 0.9289
   Accuracy: 0.9303

üìã Classification Report:
               precision    recall  f1-score   support

     business     0.9293    0.9020    0.9154       102
entertainment     0.9136    0.9610    0.9367        77
     politics     0.9157    0.9048    0.9102        84
        sport     0.9615    0.9804    0.9709       102
         tech     0.9231    0.9000    0.9114        80

     accuracy                         0.9303       445
    macro avg     0.9286    0.9296    0.9289       445
 weighted avg     0.9303    0.9303    0.9301  

In [16]:
# GloVe
result = train_and_evaluate(
    representations['glove']['train'], y_train,
    representations['glove']['dev'], y_dev,
    representations['glove']['test'], y_test,
    method_name='GloVe + TF-IDF'
)
results_classification['glove_tfidf'] = {
    'macro_f1': result['macro_f1'],
    'accuracy': result['accuracy']
}


Training: GloVe + TF-IDF

üîç Tuning hyperparameter C on DEV set...
  C=   0.01 -> DEV Macro-F1: 0.9206, Accuracy: 0.9213
  C=   0.10 -> DEV Macro-F1: 0.9371, Accuracy: 0.9371
  C=   1.00 -> DEV Macro-F1: 0.9390, Accuracy: 0.9393
  C=  10.00 -> DEV Macro-F1: 0.9433, Accuracy: 0.9438
  C= 100.00 -> DEV Macro-F1: 0.9386, Accuracy: 0.9393

‚úÖ Best C: 10.0 (DEV Macro-F1: 0.9433)

üìä Evaluating on TEST set...

üéØ TEST Results:
   Macro-F1: 0.9267
   Accuracy: 0.9281

üìã Classification Report:
               precision    recall  f1-score   support

     business     0.9355    0.8529    0.8923       102
entertainment     0.9487    0.9610    0.9548        77
     politics     0.8889    0.9524    0.9195        84
        sport     0.9623    1.0000    0.9808       102
         tech     0.8974    0.8750    0.8861        80

     accuracy                         0.9281       445
    macro avg     0.9266    0.9283    0.9267       445
 weighted avg     0.9283    0.9281    0.9274       445



## 6. Summary Comparison

In [17]:
print("\n" + "="*80)
print("CLASSIFICATION RESULTS SUMMARY")
print("="*80)

print(f"\n{'Method':<30} {'Macro-F1':<12} {'Accuracy':<12}")
print("-"*80)

# Sort by Macro-F1
sorted_results = sorted(results_classification.items(), 
                       key=lambda x: x[1]['macro_f1'], 
                       reverse=True)

for method, metrics in sorted_results:
    print(f"{method:<30} {metrics['macro_f1']:<12.4f} {metrics['accuracy']:<12.4f}")

print("\n" + "="*80)

# Find best method
best_method = sorted_results[0][0]
best_f1 = sorted_results[0][1]['macro_f1']
print(f"\nüèÜ Best Method: {best_method} (Macro-F1: {best_f1:.4f})")
print("="*80)


CLASSIFICATION RESULTS SUMMARY

Method                         Macro-F1     Accuracy    
--------------------------------------------------------------------------------
bow                            0.9683       0.9685      
ohe                            0.9654       0.9663      
ngram                          0.9639       0.9640      
tfidf                          0.9639       0.9640      
w2v_hs_tfidf                   0.9327       0.9348      
w2v_ns_tfidf                   0.9306       0.9326      
glove_tfidf                    0.9267       0.9281      


üèÜ Best Method: bow (Macro-F1: 0.9683)


## 7. Generate preds_test.csv

In [18]:
print("\nüíæ Generating preds_test.csv...")

# Create predictions DataFrame
preds_df = pd.DataFrame({
    'id': test_df['id'].values,
    'pred': tfidf_predictions
})

# Save to outputs
preds_df.to_csv(OUTPUTS_DIR / 'preds_test.csv', index=False)

print(f"‚úÖ Saved: {OUTPUTS_DIR / 'preds_test.csv'}")
print(f"   Shape: {preds_df.shape}")
print(f"\nFirst 5 predictions:")
print(preds_df.head())


üíæ Generating preds_test.csv...
‚úÖ Saved: ../outputs/preds_test.csv
   Shape: (445, 2)

First 5 predictions:
          id           pred
0  bbc_00006       politics
1  bbc_00018       business
2  bbc_00022          sport
3  bbc_00024           tech
4  bbc_00029  entertainment


## 8. Save Classification Results

In [19]:
# Save classification results
with open(CACHE_DIR / 'classification_results.json', 'w') as f:
    json.dump(results_classification, f, indent=2)

print("\nüíæ Classification results saved to cache/classification_results.json")


üíæ Classification results saved to cache/classification_results.json


In [20]:
print("\nüéâ Notebook 04: Classification - COMPLETE!")
print("\nNext steps:")
print("  1. Run notebook 05: Retrieval")
print("  2. Generate deterministic queries")
print("  3. Calculate MAP@5, Recall@10, Negation Top-1%")
print("  4. Merge all notebooks into final submission")


üéâ Notebook 04: Classification - COMPLETE!

Next steps:
  1. Run notebook 05: Retrieval
  2. Generate deterministic queries
  3. Calculate MAP@5, Recall@10, Negation Top-1%
  4. Merge all notebooks into final submission
