In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import lib

# Import all libraries
import pandas as pd
import numpy as np
from pathlib import Path
import gc
import torch
from fastai.vision.all import *
from sklearn.model_selection import StratifiedGroupKFold

print("All libraries imported successfully!")

All libraries imported successfully!


In [3]:
# Configuration class
class CFG:
    # File paths
    BASE_PATH = Path('./data')
    TRAIN_FEATURES_PATH = BASE_PATH / 'train_features.csv'
    TRAIN_LABELS_PATH = BASE_PATH / 'train_labels.csv'
    TEST_FEATURES_PATH = BASE_PATH / 'test_features.csv'

    # RTX 5090 optimized settings
    # MODEL_ARCHITECTURE = 'convnext_large_in22k'  # Upgraded to larger model
    MODEL_ARCHITECTURE = 'timm/eva02_large_patch14_448.mim_m38m_ft_in22k_in1k'  # Upgraded to larger model

    IMAGE_SIZE = 448      # Higher resolution
    BATCH_SIZE = 16       # Optimized for RTX 5090
    N_FOLDS = 5
    EPOCHS = 15           # Moderate increase in training epochs

    # RTX 5090 optimization
    NUM_WORKERS = 12      # Optimized threading
    PIN_MEMORY = True
    PREFETCH_FACTOR = 4

    # Competition settings
    TARGET_COL = 'label'
    SEED = 42
    BASE_LR = 1e-3

In [4]:
with open("fast_ai_k_fold_data.pkl", "rb") as f:
    stats = pickle.load(f)


In [5]:
print(f"RTX 5090 Configuration:")
print(f"   Model: {CFG.MODEL_ARCHITECTURE}")
print(f"   Resolution: {CFG.IMAGE_SIZE}x{CFG.IMAGE_SIZE}")
print(f"   Batch Size: {CFG.BATCH_SIZE}")
print(f"   Training Epochs: {CFG.EPOCHS}")

# RTX 5090 CUDA optimization settings
if torch.cuda.is_available():
    print(f"Detected: {torch.cuda.get_device_name(0)}")
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.allow_tf32 = True
    torch.backends.cuda.matmul.allow_tf32 = True
    print("RTX 5090 optimizations enabled")
    try:
        test_tensor = torch.randn(100, 100).cuda()
        result = torch.mm(test_tensor, test_tensor)
        print("CUDA test passed!")
        del test_tensor, result
        torch.cuda.empty_cache()
    except Exception as e:
        print(f"CUDA test failed: {e}")
        print("Please check PyTorch CUDA installation!")
else:
    print("CUDA unavailable, using CPU mode")

# Set random seed for reproducibility
set_seed(CFG.SEED, reproducible=True)

# Data augmentation transforms
def get_transforms():
    return aug_transforms(
        size=CFG.IMAGE_SIZE,
        min_scale=0.7,
        max_rotate=20,
        max_lighting=0.4,
        max_warp=0.25,
        p_affine=0.9,
        p_lighting=0.9
    )

# Data preparation
print("\nPreparing data...")

# Check if data files exist
required_files = [CFG.TRAIN_FEATURES_PATH, CFG.TRAIN_LABELS_PATH, CFG.TEST_FEATURES_PATH]
for file_path in required_files:
    if not file_path.exists():
        print(f"File not found: {file_path}")
        print("Please ensure the following files are in the current directory:")
        print("  - train_features.csv")
        print("  - train_labels.csv")
        print("  - test_features.csv")
        raise FileNotFoundError(f"Missing required file: {file_path}")

train_features_df = pd.read_csv(CFG.TRAIN_FEATURES_PATH)
train_labels_df = pd.read_csv(CFG.TRAIN_LABELS_PATH)
test_features_df = pd.read_csv(CFG.TEST_FEATURES_PATH)

# Process labels - convert one-hot to categorical
# train_labels_df['label'] = train_labels_df.iloc[:, 1:].idxmax(axis=1)
train_labels_df['label'] = train_labels_df.iloc[:, 1:].to_numpy().argmax(axis=1)

df = train_features_df.merge(train_labels_df[['id', 'label']], on='id')

# Create image paths
df['image_path'] = df['filepath'].apply(lambda x: CFG.BASE_PATH / x)
test_features_df['image_path'] = test_features_df['filepath'].apply(lambda x: CFG.BASE_PATH / x)

print(f"Data loaded successfully!")
print(f"   Training images: {len(df)}")
print(f"   Test images: {len(test_features_df)}")
print(f"   Number of classes: {df['label'].nunique()}")

# Check class distribution
print("\nClass distribution:")
print(df['label'].value_counts())

# Cross-validation setup
print("\nSetting up StratifiedGroupKFold...")
df['fold'] = -1
splitter = StratifiedGroupKFold(n_splits=CFG.N_FOLDS, shuffle=True, random_state=CFG.SEED)

# Assign fold numbers
for fold, (train_idx, val_idx) in enumerate(splitter.split(df, df['label'], groups=df['site'])):
    df.loc[val_idx, 'fold'] = fold

print("Fold distribution:")
print(df.fold.value_counts())

# Training loop
print(f"\nStarting RTX 5090 Training - {CFG.N_FOLDS} Fold Cross Validation")

val_oof_logits = []
all_test_preds = []
all_oof_preds = []
fold_scores = []
raw_test_logits = []


vocab = None

RTX 5090 Configuration:
   Model: timm/eva02_large_patch14_448.mim_m38m_ft_in22k_in1k
   Resolution: 448x448
   Batch Size: 16
   Training Epochs: 15
Detected: NVIDIA GeForce RTX 3090
RTX 5090 optimizations enabled
CUDA test passed!

Preparing data...
Data loaded successfully!
   Training images: 16488
   Test images: 4464
   Number of classes: 8

Class distribution:
label
6    2492
0    2474
3    2423
5    2254
2    2213
7    2013
1    1641
4     978
Name: count, dtype: int64

Setting up StratifiedGroupKFold...
Fold distribution:
fold
1    4525
3    3771
2    3244
4    2728
0    2220
Name: count, dtype: int64

Starting RTX 5090 Training - 5 Fold Cross Validation


In [6]:
df[ 'predict' ] = 0
stats['val_oof_labels'] = []

for fold in range(CFG.N_FOLDS):
    print(f"\n{'='*50}")
    print(f"Fold {fold} - RTX 5090 Training")
    print(f"{'='*50}")

    fold_data = df[ df['fold'] == fold ]

    stats['val_oof_labels'].append(fold_data['label'])


Fold 0 - RTX 5090 Training

Fold 1 - RTX 5090 Training

Fold 2 - RTX 5090 Training

Fold 3 - RTX 5090 Training

Fold 4 - RTX 5090 Training


In [7]:
all_oof_logits = torch.cat(stats['val_oof_logits'])
all_oof_labels = torch.cat([torch.tensor(s.values) for s in stats['val_oof_labels']])

In [8]:
train_data_folded = pd.concat([ df[ df['fold'] == fold ] for fold in range(CFG.N_FOLDS) ])

train_data_folded

Unnamed: 0,id,filepath,site,label,image_path,fold,predict
5,ZJ000005,train_features/ZJ000005.jpg,S0019,4,data/train_features/ZJ000005.jpg,0,0
11,ZJ000011,train_features/ZJ000011.jpg,S0014,5,data/train_features/ZJ000011.jpg,0,0
16,ZJ000016,train_features/ZJ000016.jpg,S0105,5,data/train_features/ZJ000016.jpg,0,0
20,ZJ000020,train_features/ZJ000020.jpg,S0105,5,data/train_features/ZJ000020.jpg,0,0
21,ZJ000021,train_features/ZJ000021.jpg,S0068,6,data/train_features/ZJ000021.jpg,0,0
...,...,...,...,...,...,...,...
16463,ZJ016463,train_features/ZJ016463.jpg,S0159,4,data/train_features/ZJ016463.jpg,4,0
16471,ZJ016471,train_features/ZJ016471.jpg,S0062,1,data/train_features/ZJ016471.jpg,4,0
16478,ZJ016478,train_features/ZJ016478.jpg,S0062,7,data/train_features/ZJ016478.jpg,4,0
16479,ZJ016479,train_features/ZJ016479.jpg,S0083,1,data/train_features/ZJ016479.jpg,4,0


In [9]:
import temperature_scaling as ts

# targets = torch.tensor(ts_df['label'].to_numpy())
# ce = F.cross_entropy(logits, targets)

t_optim, optim_loss = ts.fit_temperature_lbfgs(all_oof_logits, all_oof_labels)

t_optim, optim_loss

Consider using tensor.detach() first. (Triggered internally at /pytorch/torch/csrc/autograd/generated/python_variable_methods.cpp:835.)
  loss = float(closure())


(1.3095703523498126, 0.6898698816628085)

In [10]:
all_oof_preds = F.softmax(all_oof_logits / 1, dim=1)

all_oof_class = all_oof_preds.argmax(dim=1)
all_oof_max_prob, _ = all_oof_preds.max(dim=1)

all_oof_correct = all_oof_class == all_oof_labels

all_oof_max_prob

tensor([0.9947, 0.3376, 0.9100,  ..., 0.8889, 1.0000, 0.9292])

In [11]:
lib.choose_tau_from_val(all_oof_logits, all_oof_labels, target_prec=0.95, min_coverage=0.44)

Has something with both precision and coverage: 


(0.9682453870773315, 0.9888352751731873, 0.4400169849395752)

In [22]:
lib.choose_tau_per_class_from_val(all_oof_logits, all_oof_labels, target_prec=0.95, min_coverage=[0.5, 0.75, 0.04, 0.88, 0.9, 0.78, 0.67, 0.4], measure='margin')

(tensor([0.9223, 0.6173, 0.9298, 0.3379, 0.3293, 0.6965, 0.7603, 0.8500]),
 tensor([0.9667, 0.9684, 0.9365, 0.9627, 0.9601, 0.9657, 0.9619, 0.9663]),
 tensor([0.5002, 0.7502, 0.0400, 0.8803, 0.9007, 0.7803, 0.6700, 0.4002]),
 tensor([2883, 1393, 3150, 2256,  806, 1830, 2391, 1779]),
 {'p': tensor([[5.0197e-03, 1.6039e-05, 7.9644e-05,  ..., 1.9084e-05, 1.4033e-04,
           2.7018e-05],
          [2.0657e-01, 1.4820e-01, 3.3759e-01,  ..., 5.9625e-02, 6.6019e-02,
           2.2551e-02],
          [1.4036e-02, 3.4728e-04, 7.1978e-02,  ..., 9.1003e-01, 2.0680e-03,
           8.6264e-06],
          ...,
          [8.8891e-01, 2.8935e-03, 1.8825e-04,  ..., 1.9553e-05, 7.7637e-05,
           1.0700e-01],
          [4.7240e-07, 1.0000e+00, 3.9504e-08,  ..., 6.6545e-08, 1.4908e-07,
           3.4523e-08],
          [3.9732e-03, 9.3420e-04, 9.2915e-01,  ..., 2.1221e-02, 8.6399e-04,
           2.2131e-02]]),
  'pred': tensor([4, 2, 5,  ..., 0, 1, 2]),
  'score': tensor([0.9897, 0.1310, 0.8380,  

In [44]:
val_accs = [s['val_acc'] for s in stats['fold_scores']]
val_loss = [s['val_loss'] for s in stats['fold_scores']]

weights_acc = torch.softmax(torch.tensor(val_accs) * 5, dim=0)
weights_loss = torch.softmax(1 / torch.tensor(val_loss) * 4, dim=0)

print(f"Fold weights by acc: {[f'{w:.3f}' for w in weights_acc.tolist()]}")
print(f"Fold weights by loss: {[f'{w:.3f}' for w in weights_loss.tolist()]}")


Fold weights by acc: ['0.238', '0.219', '0.162', '0.185', '0.196']
Fold weights by loss: ['0.294', '0.436', '0.050', '0.125', '0.094']


In [45]:
# averaged_test_logits = sum(w * test_logit for w, test_logit in zip(weights_acc, stats['raw_test_logits']))
averaged_test_logits = sum(w * test_logit for w, test_logit in zip(weights_loss, stats['raw_test_logits']))

# averaged_test_logits = sum(1/5 * test_logit for w, test_logit in zip(weights_acc, stats['raw_test_logits']))

In [46]:
averaged_test_probs = F.softmax(averaged_test_logits / t_optim, dim=1)

In [47]:
# Weighted ensemble predictions
# ensemble_preds = sum(w * pred for w, pred in zip(weights_acc, raw_test_logits))
#
# weights_loss = torch.softmax(1 / torch.tensor(val_loss) * 5, dim=0)
# print(f"Fold weights by loss: {[f'{w:.3f}' for w in weights_loss.tolist()]}")
#
# # Weighted ensemble predictions
# ensemble_preds_loss = sum(w * pred for w, pred in zip(weights_loss, all_test_preds))

# Create submission file
print("\nCreating submission file by accuracy...")

import data

submission_df = pd.DataFrame(columns=['id'] + list(data.species_labels))
submission_df['id'] = test_features_df['id']
submission_df[list(data.species_labels)] = averaged_test_probs.numpy()

submission_df.to_csv('submission_fast_ai_k_fold_common_temp.csv', index=False)



Creating submission file by accuracy...


In [5]:


    # Create fold splitter function
    def get_splitter(fold_num):
        def _inner(o):
            val_mask = o['fold'] == fold_num
            train_mask = o['fold'] != fold_num
            return o.index[train_mask], o.index[val_mask]
        return _inner

    # DataBlock configuration
    dblock = DataBlock(
        blocks=(ImageBlock, CategoryBlock),
        get_x=ColReader('image_path'),
        get_y=ColReader(CFG.TARGET_COL),
        splitter=get_splitter(fold),
        item_tfms=Resize(CFG.IMAGE_SIZE, method=ResizeMethod.Pad, pad_mode=PadMode.Zeros),
        batch_tfms=[*get_transforms(), Normalize.from_stats(*imagenet_stats)]
    )

    print(f"Creating DataLoaders (batch size: {CFG.BATCH_SIZE})...")

    # Create DataLoaders with RTX 5090 optimizations
    if torch.cuda.is_available():
        dls = dblock.dataloaders(
            df,
            bs=CFG.BATCH_SIZE,
            num_workers=CFG.NUM_WORKERS,
            pin_memory=CFG.PIN_MEMORY,
            prefetch_factor=CFG.PREFETCH_FACTOR
        )
    else:
        dls = dblock.dataloaders(
            df,
            bs=16,
            num_workers=4
        )

    # Store vocabulary from first fold
    if vocab is None:
        vocab = dls.vocab
        print(f"Class vocabulary: {list(vocab)}")

    print(f"Creating {CFG.MODEL_ARCHITECTURE} model...")

    # Setup callbacks for training
    cbs = [
        EarlyStoppingCallback(monitor='valid_loss', patience=3),
        SaveModelCallback(monitor='valid_loss', fname=f'best_model_fold_{fold}')
    ]

    # Create learner with mixed precision
    learn = vision_learner(
        dls,
        CFG.MODEL_ARCHITECTURE,
        metrics=[error_rate, accuracy],
        cbs=cbs
    ).to_fp16()

    learn.load(f'best_model_fold_{fold}', device='cuda')

    # print(f"Starting training for {CFG.EPOCHS} epochs...")
    #
    # # Find optimal learning rate
    # try:
    #     lr_min, lr_steep = learn.lr_find()
    #     print(f"Suggested learning rate: {lr_steep:.2e}")
    #     final_lr = lr_steep
    # except Exception as e:
    #     print("Using default learning rate", e)
    #     final_lr = CFG.BASE_LR
    #
    # # Train the model
    # learn.fit_one_cycle(CFG.EPOCHS, lr_max=final_lr)

    ts_df = learn.dls.valid_ds.items
    ts_dl = learn.dls.test_dl(ts_df)
    logits, _ = learn.get_preds(dl=ts_dl, act=lambda x: x)

    val_oof_logits.append(logits)

    import temperature_scaling as ts

    targets = torch.tensor(ts_df['label'].to_numpy())
    ce = F.cross_entropy(logits, targets)

    t_optim, optim_loss = ts.fit_temperature_lbfgs(logits, targets)

    # Record validation scores
    # val_results = learn.validate()
    val_loss = optim_loss
    # val_loss = float(val_results[0])
    # val_acc = float(val_results[2])  # accuracy is the 2nd metric

    val_acc = F.softmax(logits / t_optim, dim=1).argmax(dim=1).eq(targets).float().mean().item()

    fold_scores.append({'fold': fold, 'val_loss': val_loss, 'val_acc': val_acc, 't_optim': t_optim})
    print(f"Fold {fold} validation results: Loss={ce:.4f}, Optimized loss: {val_loss:.4f}, Optim temeprature={t_optim:.4f}, Acc={val_acc:.4f}")

    # Generate test predictions
    print("Generating predictions...")
    test_dl = dls.test_dl(test_features_df)
    preds, _ = learn.get_preds(dl=test_dl, act=lambda x: x)

    raw_test_logits.append(preds)

    preds = F.softmax(preds / t_optim, dim=1)

    all_test_preds.append(preds)

    # Get out-of-fold predictions
    # val_dl = dls.valid
    # oof_preds, _ = learn.get_preds(dl=val_dl)
    # all_oof_preds.append(oof_preds)

    # Memory cleanup
    print("Memory cleanup...")
    del learn, dls, test_dl
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

with open("fast_ai_k_fold_data.pkl", "wb") as f:
    pickle.dump({
        'val_oof_logits': val_oof_logits,
        'all_test_preds': all_test_preds,
        'all_oof_preds': all_oof_preds,
        'fold_scores': fold_scores,
        'raw_test_logits': raw_test_logits,
    }, f)

print(f"\n{'='*50}")
print("RTX 5090 Training Complete!")
print(f"{'='*50}")

# Display fold results
print("\nCross-validation results:")
for score in fold_scores:
    print(f"Fold {score['fold']}: Loss={score['val_loss']:.4f}, Acc={score['val_acc']:.4f}")

avg_loss = np.mean([s['val_loss'] for s in fold_scores])
avg_acc = np.mean([s['val_acc'] for s in fold_scores])
print(f"\nAverage performance: Loss={avg_loss:.4f}, Acc={avg_acc:.4f}")

# Ensemble strategy - weighted by validation performance
print(f"\nExecuting ensemble strategy...")

val_accs = [s['val_acc'] for s in fold_scores]
val_loss = [s['val_loss'] for s in fold_scores]

weights_acc = torch.softmax(torch.tensor(val_accs) * 5, dim=0)
print(f"Fold weights by acc: {[f'{w:.3f}' for w in weights_acc.tolist()]}")

# Weighted ensemble predictions
ensemble_preds = sum(w * pred for w, pred in zip(weights_acc, all_test_preds))

weights_loss = torch.softmax(1 / torch.tensor(val_loss) * 5, dim=0)
print(f"Fold weights by loss: {[f'{w:.3f}' for w in weights_loss.tolist()]}")

# Weighted ensemble predictions
ensemble_preds_loss = sum(w * pred for w, pred in zip(weights_loss, all_test_preds))

# Create submission file
print("\nCreating submission file by accuracy...")

submission_df = pd.DataFrame(columns=['id'] + list(vocab))
submission_df['id'] = test_features_df['id']
submission_df[list(vocab)] = ensemble_preds.numpy()

# Save submission
submission_df.to_csv('submission_fast_ai_k_fold_accuracy.csv', index=False)


print("\nCreating submission file by loss...")

submission_df = pd.DataFrame(columns=['id'] + list(vocab))
submission_df['id'] = test_features_df['id']
submission_df[list(vocab)] = ensemble_preds_loss.numpy()

# Save submission
submission_df.to_csv('submission_fast_ai_k_fold_loss.csv', index=False)


print(f"\n{'='*50}")
print("RTX 5090 Submission File Created!")
print(f"{'='*50}")
print("=== Configuration Summary ===")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
print(f"Model: {CFG.MODEL_ARCHITECTURE}")
print(f"Resolution: {CFG.IMAGE_SIZE}x{CFG.IMAGE_SIZE}")
print(f"Batch Size: {CFG.BATCH_SIZE}")
print(f"Total Training Epochs: {CFG.N_FOLDS * CFG.EPOCHS}")
print(f"Average Validation Accuracy: {avg_acc:.4f}")
print(f"Mixed Precision Training: {'Yes' if torch.cuda.is_available() else 'No'}")
print("")
print("Submission file: rtx5090_submission.csv")
print("Ready to dominate the competition!")


Fold 0 - RTX 5090 Training


KeyboardInterrupt: 