# TTML Multi-Task Learning Examples

This notebook demonstrates advanced usage of the TTML model for multi-task learning scenarios. We'll cover:

1. Multi-Task Learning
   - Combining classification and regression tasks
   - Joint feature learning
   - Task-specific performance analysis

2. Transfer Learning
   - Pre-training on large datasets
   - Fine-tuning for specific tasks
   - Performance comparison

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score

# Import TTML modules
from tabular_transformer.models import TabularTransformer
from tabular_transformer.models.task_heads import (
    ClassificationHead,
    RegressionHead,
    MultiTaskHead
)
from tabular_transformer.training import Trainer
from tabular_transformer.inference import predict
from tabular_transformer.explainability import global_explanations
from tabular_transformer.utils.config import TransformerConfig
from tabular_transformer.data.dataset import TabularDataset

# Import data utilities
from data_utils import download_adult_dataset, download_wine_quality_dataset

## Part 1: Multi-Task Learning

We'll use the Wine Quality dataset to simultaneously predict quality scores (regression) and wine type (classification).

In [None]:
# Download both red and white wine datasets
wine_red = download_wine_quality_dataset(save_csv=False, variant='red')
wine_white = download_wine_quality_dataset(save_csv=False, variant='white')

# Add wine type indicator
wine_red['wine_type'] = 0  # Red wine
wine_white['wine_type'] = 1  # White wine

# Combine datasets
# Standardize column names - the datasets have inconsistent naming
# The red wine dataset uses 'class' while white wine uses 'Class'
if 'class' in wine_red.columns:
    wine_red = wine_red.rename(columns={'class': 'quality'})
elif 'Class' in wine_red.columns:
    wine_red = wine_red.rename(columns={'Class': 'quality'})

if 'class' in wine_white.columns:
    wine_white = wine_white.rename(columns={'class': 'quality'})
elif 'Class' in wine_white.columns:
    wine_white = wine_white.rename(columns={'Class': 'quality'})

# Print column names to verify
print("Red wine columns (after standardization):", wine_red.columns.tolist())
print("White wine columns (after standardization):", wine_white.columns.tolist())

# Standardize column names - the datasets have inconsistent naming
# The red wine dataset uses 'class' while white wine uses 'Class'
if 'class' in wine_red.columns:
    wine_red = wine_red.rename(columns={'class': 'quality'})
elif 'Class' in wine_red.columns:
    wine_red = wine_red.rename(columns={'Class': 'quality'})

if 'class' in wine_white.columns:
    wine_white = wine_white.rename(columns={'class': 'quality'})
elif 'Class' in wine_white.columns:
    wine_white = wine_white.rename(columns={'Class': 'quality'})

# Print column names to verify
print("Red wine columns (after standardization):", wine_red.columns.tolist())
print("White wine columns (after standardization):", wine_white.columns.tolist())

wine_df = pd.concat([wine_red, wine_white], axis=0).reset_index(drop=True)
print("Combined dataset shape:", wine_df.shape)
print("\nWine type distribution:")
print(wine_df['wine_type'].value_counts(normalize=True))

In [None]:
# Identify numeric and categorical columns
numeric_features = wine_df.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = wine_df.select_dtypes(include=['object']).columns.tolist()

# Remove target columns from features
quality_column = 'quality'
type_column = 'wine_type'

if quality_column in numeric_features:
    numeric_features.remove(quality_column)
if type_column in numeric_features:
    numeric_features.remove(type_column)
if quality_column in categorical_features:
    categorical_features.remove(quality_column)
if type_column in categorical_features:
    categorical_features.remove(type_column)

# Create train/test datasets
train_dataset, test_dataset, _ = TabularDataset.from_dataframe(
    dataframe=wine_df,
    numeric_columns=numeric_features,
    categorical_columns=categorical_features,
    target_columns={
        'quality': [quality_column],
        'wine_type': [type_column]
    },
    validation_split=0.2,
    random_state=42
)

In [None]:
# Get feature dimensions from preprocessor
feature_dims = train_dataset.preprocessor.get_feature_dimensions()
numeric_dim = feature_dims['numeric_dim']
categorical_dims = feature_dims['categorical_dims']
categorical_embedding_dims = feature_dims['categorical_embedding_dims']

# Model configuration
config = TransformerConfig(
    embed_dim=128,
    num_heads=8,
    num_layers=4,
    dropout=0.2,
    variational=False
)

# Initialize transformer encoder
encoder = TabularTransformer(
    numeric_dim=numeric_dim,
    categorical_dims=categorical_dims,
    categorical_embedding_dims=categorical_embedding_dims,
    config=config
)

# Create task-specific heads
quality_head = RegressionHead(
    input_dim=128,  # Should match config.embed_dim
    output_dim=1  # Single target value
)

type_head = ClassificationHead(
    input_dim=128,  # Should match config.embed_dim
    num_classes=2  # Binary classification for wine type
)

# Combine heads into multi-task head
# Combine heads into multi-task head
# Combine heads into multi-task head
multi_task_head = MultiTaskHead(
    name="multi_task",
    input_dim=128,  # Should match config.embed_dim
    name="multi_task",
    input_dim=128,  # Should match config.embed_dim
    heads={
        'quality': quality_head,
        'wine_type': type_head
    },
    task_weights={
        'quality': 1.0,
        'wine_type': 1.0
    }
)

In [None]:
# Create data loaders
train_loader = train_dataset.create_dataloader(batch_size=64, shuffle=True)
test_loader = test_dataset.create_dataloader(batch_size=64, shuffle=False)

# Initialize trainer
trainer = Trainer(
    encoder=encoder,
    task_head=multi_task_head,
    optimizer=None,  # Will be created by trainer
    device=None  # Will use CUDA if available
)

# Train the model
history = trainer.train(
    train_loader=train_loader,
    val_loader=test_loader,
    num_epochs=25,
    early_stopping_patience=3
)

In [None]:
# Make predictions
predictions = trainer.predict(test_loader)

# Get predictions for each task
quality_pred = predictions['quality']['predictions'].numpy()
type_pred = torch.argmax(predictions['wine_type']['probabilities'], dim=1).numpy()

# Get true values
y_quality_test = test_dataset.targets['quality']
y_type_test = test_dataset.targets['wine_type']

# Evaluate quality predictions
mse = mean_squared_error(y_quality_test, quality_pred)
r2 = r2_score(y_quality_test, quality_pred)

print("Quality Prediction Results:")
print(f"RMSE: {np.sqrt(mse):.4f}")
print(f"R² Score: {r2:.4f}")

# Evaluate type predictions
accuracy = accuracy_score(y_type_test, type_pred)
print("\nWine Type Classification Results:")
print(f"Accuracy: {accuracy:.4f}")

# Plot quality predictions
plt.figure(figsize=(10, 5))
plt.subplot(121)
plt.scatter(y_quality_test, quality_pred, alpha=0.5)
plt.plot([y_quality_test.min(), y_quality_test.max()],
         [y_quality_test.min(), y_quality_test.max()], 'r--', lw=2)
plt.xlabel('Actual Quality')
plt.ylabel('Predicted Quality')
plt.title('Quality Predictions')

# Plot type predictions confusion matrix
plt.subplot(122)
cm = pd.crosstab(y_type_test, type_pred, normalize='index')
sns.heatmap(cm, annot=True, fmt='.2f', cmap='Blues')
plt.title('Wine Type Predictions')
plt.xlabel('Predicted Type')
plt.ylabel('True Type')

plt.tight_layout()
plt.show()

## Part 2: Transfer Learning

Now we'll demonstrate transfer learning by pre-training on the Adult Income dataset and fine-tuning on a subset of the Wine dataset.

In [None]:
# Download Adult dataset
adult_df = download_adult_dataset(save_csv=False)
print("Adult dataset shape:", adult_df.shape)
print("\nFeature types:")
print(adult_df.dtypes)

In [None]:
# Identify numeric and categorical columns
numeric_features_adult = adult_df.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features_adult = adult_df.select_dtypes(include=['object']).columns.tolist()

# Remove target column from features
target_column = 'class'
if target_column in numeric_features_adult:
    numeric_features_adult.remove(target_column)
if target_column in categorical_features_adult:
    categorical_features_adult.remove(target_column)

# Create train/test datasets
train_dataset_adult, test_dataset_adult, _ = TabularDataset.from_dataframe(
    dataframe=adult_df,
    numeric_columns=numeric_features_adult,
    categorical_columns=categorical_features_adult,
    target_columns={'main': [target_column]},
    validation_split=0.2,
    random_state=42
)

In [None]:
# Get feature dimensions from preprocessor
feature_dims_adult = train_dataset_adult.preprocessor.get_feature_dimensions()
numeric_dim_adult = feature_dims_adult['numeric_dim']
categorical_dims_adult = feature_dims_adult['categorical_dims']
categorical_embedding_dims_adult = feature_dims_adult['categorical_embedding_dims']

# Model configuration
config = TransformerConfig(
    embed_dim=128,
    num_heads=8,
    num_layers=4,
    dropout=0.2,
    variational=False
)

# Initialize transformer encoder for pre-training
pretrain_encoder = TabularTransformer(
    numeric_dim=numeric_dim_adult,
    categorical_dims=categorical_dims_adult,
    categorical_embedding_dims=categorical_embedding_dims_adult,
    config=config
)

# Initialize classification head for pre-training
pretrain_head = ClassificationHead(
    input_dim=128,  # Should match config.embed_dim
    num_classes=2  # Binary classification for income
)

In [None]:
# Create data loaders
train_loader_adult = train_dataset_adult.create_dataloader(batch_size=64, shuffle=True)
test_loader_adult = test_dataset_adult.create_dataloader(batch_size=64, shuffle=False)

# Initialize trainer
pretrain_trainer = Trainer(
    encoder=pretrain_encoder,
    task_head=pretrain_head,
    optimizer=None,  # Will be created by trainer
    device=None  # Will use CUDA if available
)

# Train the model
pretrain_history = pretrain_trainer.train(
    train_loader=train_loader_adult,
    val_loader=test_loader_adult,
    num_epochs=20,
    early_stopping_patience=3
)

In [None]:
# Create small dataset for fine-tuning (10% of wine data)
n_samples = len(train_dataset) // 10
indices = np.random.choice(len(train_dataset), n_samples, replace=False)

small_dataset = TabularDataset(
    dataframe=wine_df.iloc[indices],
    numeric_columns=numeric_features,
    categorical_columns=categorical_features,
    target_columns={'main': [quality_column]},
    preprocessor=train_dataset.preprocessor  # Use same preprocessor
)

# Initialize new regression head for fine-tuning
finetune_head = RegressionHead(
    input_dim=128,  # Should match config.embed_dim
    output_dim=1  # Single target value
)

# Freeze encoder layers
for param in pretrain_encoder.parameters():
    param.requires_grad = False

# Create data loaders
train_loader_small = small_dataset.create_dataloader(batch_size=32, shuffle=True)

# Initialize trainer
finetune_trainer = Trainer(
    encoder=pretrain_encoder,
    task_head=finetune_head,
    optimizer=None,  # Will be created by trainer
    device=None  # Will use CUDA if available
)

# Fine-tune the model
finetune_history = finetune_trainer.train(
    train_loader=train_loader_small,
    val_loader=test_loader,
    num_epochs=10,
    early_stopping_patience=3
)

In [None]:
# Train a new model from scratch on the same small dataset
scratch_encoder = TabularTransformer(
    numeric_dim=numeric_dim,
    categorical_dims=categorical_dims,
    categorical_embedding_dims=categorical_embedding_dims,
    config=config
)

scratch_head = RegressionHead(
    input_dim=128,  # Should match config.embed_dim
    output_dim=1  # Single target value
)

# Initialize trainer
scratch_trainer = Trainer(
    encoder=scratch_encoder,
    task_head=scratch_head,
    optimizer=None,  # Will be created by trainer
    device=None  # Will use CUDA if available
)

# Train from scratch
scratch_history = scratch_trainer.train(
    train_loader=train_loader_small,
    val_loader=test_loader,
    num_epochs=10,
    early_stopping_patience=3
)

In [None]:
# Make predictions
transfer_predictions = finetune_trainer.predict(test_loader)
scratch_predictions = scratch_trainer.predict(test_loader)

# Get predictions
transfer_pred = transfer_predictions['main']['predictions'].numpy()
scratch_pred = scratch_predictions['main']['predictions'].numpy()

# Get true values
y_test = test_dataset.targets['main']

# Compare results
transfer_mse = mean_squared_error(y_test, transfer_pred)
transfer_r2 = r2_score(y_test, transfer_pred)

scratch_mse = mean_squared_error(y_test, scratch_pred)
scratch_r2 = r2_score(y_test, scratch_pred)

print("Transfer Learning Results:")
print(f"RMSE: {np.sqrt(transfer_mse):.4f}")
print(f"R² Score: {transfer_r2:.4f}")

print("\nTraining from Scratch Results:")
print(f"RMSE: {np.sqrt(scratch_mse):.4f}")
print(f"R² Score: {scratch_r2:.4f}")

# Plot comparison
plt.figure(figsize=(15, 5))

plt.subplot(121)
plt.scatter(y_test, transfer_pred, alpha=0.5, label='Transfer Learning')
plt.plot([y_test.min(), y_test.max()],
         [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Quality')
plt.ylabel('Predicted Quality')
plt.title('Transfer Learning Predictions')

plt.subplot(122)
plt.scatter(y_test, scratch_pred, alpha=0.5, label='From Scratch')
plt.plot([y_test.min(), y_test.max()],
         [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Quality')
plt.ylabel('Predicted Quality')
plt.title('Training from Scratch Predictions')

plt.tight_layout()
plt.show()

## Conclusion

This notebook demonstrated advanced capabilities of the TTML model:

1. Multi-Task Learning
   - Successfully combined quality regression and type classification
   - Achieved good performance on both tasks
   - Demonstrated shared feature learning

2. Transfer Learning
   - Effectively transferred knowledge from Adult dataset
   - Improved performance on small Wine dataset
   - Showed benefits over training from scratch

These techniques show how the TTML model can be used effectively in scenarios with limited data or multiple related tasks.