In [1]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
df = pd.read_csv('/home/hxxzhang/DoLa/token_probabilities_dola.csv')
# df = pd.concat([df, pd.read_csv('/home/hxxzhang/DoLa/token_probabilities_dola.csv')], axis=0)

# Feature selection: Using 'top1_token_id' and 'top1_prob' from each premature stage
features = [col for col in df.columns if 'token_id' in col or 'prob' in col]
X = df[features]
y = df['label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling the probabilities
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_train_scaled = X_train

In [None]:
import joblib
joblib.dump(scaler, './model/scaler.pkl')

# Random forest

In [2]:
X_test_scaled = scaler.transform(X_test)

X_train_scaled = X_train
X_test_scaled = X_test

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_scaled, y_train)

y_pred = clf.predict(X_test_scaled)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9424223844824703
Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.06      0.10      1087
           1       0.94      1.00      0.97     16854

    accuracy                           0.94     17941
   macro avg       0.93      0.53      0.54     17941
weighted avg       0.94      0.94      0.92     17941



In [3]:
import joblib
joblib.dump(clf, './model/random_forest_model2.pkl')

['./model/random_forest_model2.pkl']

In [4]:
# Evaluation on recent
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load the dataset
df = pd.read_csv('/home/hxxzhang/DoLa/token_probabilities_recent.csv')
features = [col for col in df.columns if 'token_id' in col or 'prob' in col]
X = df[features]
y = df['label']

# Scaling the probabilities
loaded_model = joblib.load('/home/hxxzhang/DoLa/model/random_forest_model2.pkl')
# scaler = joblib.load('/home/hxxzhang/DoLa/model/scaler.pkl')
# X = scaler.fit_transform(X)

y_pred = loaded_model.predict(X)
print("Accuracy:", accuracy_score(y, y_pred))
print("Classification Report:\n", classification_report(y, y_pred))

Accuracy: 0.9807956754706245
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00      2835
           1       0.98      1.00      0.99    144788

    accuracy                           0.98    147623
   macro avg       0.49      0.50      0.50    147623
weighted avg       0.96      0.98      0.97    147623



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Two csv files has overlap. Should remove the overlap.
## No, it can not be removed. but there is data inbalance exist.

# Transformer model

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, classification_report

class DoLaDataset(Dataset):
    def __init__(self, csv_path):
        if csv_path=='train':
            df = pd.read_csv('/home/hxxzhang/DoLa/token_probabilities.csv')
            df = pd.concat([df, pd.read_csv('/home/hxxzhang/DoLa/token_probabilities_dola.csv')], axis=0)
        else:
            df = pd.read_csv('/home/hxxzhang/DoLa/token_probabilities_recent.csv')
        # Define layer names in order
        premature_layers = [f'premature_{i}' for i in range(16, 32, 2)]
        self.layer_names = premature_layers + ['mature']
        self.num_layers = len(self.layer_names)
        
        layer_features = []
        
        # Process each layer
        for layer_name in self.layer_names:
            layer_data = []
            # Get top-5 token IDs and probabilities for each layer
            for k in range(1, 6):
                token_id_col = f'{layer_name}_top{k}_token_id'
                prob_col = f'{layer_name}_top{k}_prob'
                
                if token_id_col in df.columns and prob_col in df.columns:
                    # Normalize token IDs by vocabulary size (assuming max token ID is 32000)
                    token_ids = df[token_id_col].values / 32000.0
                    probs = df[prob_col].values
                    layer_data.extend([token_ids, probs])
            
            # Stack features for this layer
            layer_features.append(np.stack(layer_data, axis=1))
        
        # Stack features for all layers
        self.features = np.stack(layer_features, axis=1)  # Shape: [batch, num_layers, feature_dim]
        self.labels = df['label'].values if 'label' in df.columns else None
        
        # Convert to tensors
        self.features = torch.FloatTensor(self.features)
        if self.labels is not None:
            self.labels = torch.LongTensor(self.labels)
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        if self.labels is not None:
            return self.features[idx], self.labels[idx]
        return self.features[idx]

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_seq_length=100):
        super().__init__()
        pe = torch.zeros(max_seq_length, d_model)
        position = torch.arange(0, max_seq_length, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

class DoLaTransformer(nn.Module):
    def __init__(self, feature_dim, num_layers, nhead=4, num_encoder_layers=2, dim_feedforward=256, dropout=0.1):
        super().__init__()
        
        self.feature_projection = nn.Linear(feature_dim, dim_feedforward)
        self.pos_encoder = PositionalEncoding(dim_feedforward)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=dim_feedforward,
            nhead=nhead,
            dim_feedforward=dim_feedforward * 2,
            dropout=dropout,
            batch_first=True
        )
        
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_encoder_layers
        )
        
        self.layer_attention = nn.MultiheadAttention(
            embed_dim=dim_feedforward,
            num_heads=nhead,
            dropout=dropout,
            batch_first=True
        )
        
        # Progressive depth weighting
        self.depth_weights = nn.Parameter(torch.linspace(1.0, 2.0, num_layers))
        
        self.classifier = nn.Sequential(
            nn.Linear(dim_feedforward, dim_feedforward),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(dim_feedforward, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        # x shape: [batch_size, num_layers, feature_dim]
        
        # Project features
        x = self.feature_projection(x)
        
        # Add positional encoding
        x = self.pos_encoder(x)
        
        # Apply transformer encoder
        x = self.transformer_encoder(x)
        
        # Apply depth-weighted attention
        weights = self.depth_weights.softmax(dim=0)
        x = x * weights.unsqueeze(0).unsqueeze(-1)
        
        # Self-attention across layers
        x, _ = self.layer_attention(x, x, x)
        
        # Global average pooling
        x = x.mean(dim=1)
        
        # Classification
        return self.classifier(x).squeeze(-1)

def train_model(model, train_loader, val_loader, num_epochs=50, learning_rate=1e-4, device='cuda'):
    criterion = nn.BCELoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=5, factor=0.5)
    
    best_val_auc = 0
    best_model_state = None
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0
        train_preds = []
        train_labels = []
        
        for batch_features, batch_labels in train_loader:
            batch_features = batch_features.to(device)
            batch_labels = batch_labels.float().to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_features)
            loss = criterion(outputs, batch_labels)
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            train_loss += loss.item()
            train_preds.extend(outputs.detach().cpu().numpy())
            train_labels.extend(batch_labels.cpu().numpy())
        
        train_auc = roc_auc_score(train_labels, train_preds)
        
        # Validation
        model.eval()
        val_preds = []
        val_labels = []
        
        with torch.no_grad():
            for batch_features, batch_labels in val_loader:
                batch_features = batch_features.to(device)
                outputs = model(batch_features)
                val_preds.extend(outputs.cpu().numpy())
                val_labels.extend(batch_labels.numpy())
        
        val_auc = roc_auc_score(val_labels, val_preds)
        
        # Update learning rate
        scheduler.step(val_auc)
        
        # Save best model
        if val_auc > best_val_auc:
            best_val_auc = val_auc
            best_model_state = model.state_dict().copy()
        
        print(f'Epoch {epoch + 1}/{num_epochs}:')
        print(f'Train Loss: {train_loss/len(train_loader):.4f}, Train AUC: {train_auc:.4f}')
        print(f'Val AUC: {val_auc:.4f}, Best Val AUC: {best_val_auc:.4f}')
        
        # Early stopping check
        if optimizer.param_groups[0]['lr'] < 1e-6:
            print('Learning rate too small. Stopping training.')
            break
    
    # Load best model
    model.load_state_dict(best_model_state)
    return model

def main():
    # Hyperparameters
    BATCH_SIZE = 32
    NUM_EPOCHS = 20
    LEARNING_RATE = 1e-4
    FEATURE_DIM = 10  # 5 token IDs + 5 probabilities per layer
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Load datasets
    train_dataset = DoLaDataset('train')
    val_dataset = DoLaDataset('2')
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
    
    # Initialize model
    model = DoLaTransformer(
        feature_dim=FEATURE_DIM,
        num_layers=train_dataset.num_layers,
        nhead=4,
        num_encoder_layers=2,
        dim_feedforward=256,
        dropout=0.1
    ).to(DEVICE)
    
    # Train model
    model = train_model(
        model,
        train_loader,
        val_loader,
        num_epochs=NUM_EPOCHS,
        learning_rate=LEARNING_RATE,
        device=DEVICE
    )
    
    # Final evaluation
    model.eval()
    val_preds = []
    val_labels = []
    
    with torch.no_grad():
        for batch_features, batch_labels in val_loader:
            batch_features = batch_features.to(DEVICE)
            outputs = model(batch_features)
            val_preds.extend((outputs > 0.5).cpu().numpy())
            val_labels.extend(batch_labels.numpy())
    
    print("\nFinal Evaluation:")
    print(classification_report(val_labels, val_preds))

if __name__ == "__main__":
    main()