In [1]:
import os
import gc
import cv2
import math
import copy
import time
import random

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

from torch.cuda.amp import autocast
from ptflops import get_model_complexity_info

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2


from sklearn.metrics import f1_score,roc_auc_score,precision_score,recall_score
from transformers import get_cosine_schedule_with_warmup

import timm


# Utils
import joblib
from tqdm.notebook import tqdm
from collections import defaultdict

import warnings
warnings.filterwarnings("ignore")

In [2]:
data_dir='/home/fateplsf/hw/multi_class/hw1/images'
hw_dir="/home/fateplsf/hw/multi_class/hw1"

os.makedirs(hw_dir+"/model",exist_ok=True)

In [3]:
CONFIG = {
    "n_accumulate": 1,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}

In [4]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed()

In [5]:
train_df = pd.read_csv(f'{data_dir}/train.txt', delim_whitespace=True,header=None,names=['path', 'label']) 
valid_df = pd.read_csv(f'{data_dir}/val.txt', delim_whitespace=True,header=None,names=['path', 'label']) 
test_df = pd.read_csv(f'{data_dir}/test.txt', delim_whitespace=True,header=None,names=['path', 'label']) 

In [6]:
print(train_df["label"].unique())
print(train_df["label"].nunique())

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49]
50


In [7]:
print(valid_df["label"].unique())
print(valid_df["label"].nunique())

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49]
50


In [8]:
class ImageDataset(Dataset):
    def __init__(self, df, augment=None):
        self.length = len(df)
        self.df = df
        self.augment = augment

    def __len__(self):
        return self.length
    def __getitem__(self,index):
        d = self.df.iloc[index]
        img_path=img_path=data_dir+"/"+d["path"]
        img=cv2.imread(img_path)
        img = img.astype(np.float32)/255
        label=d["label"]
        
        if self.augment is not None: 
            img = self.augment(image=img)["image"]
            


        return {
            'image': img,
            'label': torch.tensor(label, dtype=torch.long)
            
        }

In [9]:
data_transforms = {
    "train": A.Compose([
        A.Resize(384, 384),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        
        A.ShiftScaleRotate(shift_limit=0.1, 
                           scale_limit=0.1, 
                           rotate_limit=30, 
                           
                           p=0.5),
        A.HueSaturationValue(
                hue_shift_limit=0.1, 
                sat_shift_limit=0.1, 
                val_shift_limit=0.1, 
                p=0.5 
            ),
        A.RandomBrightnessContrast(
                brightness_limit=(-0.1,0.1), 
                contrast_limit=(-0.1, 0.1),  
                p=0.5 
            ),
        
#         A.Normalize(),
        ToTensorV2()], p=1.),
    
    "valid": A.Compose([
        A.Resize(384, 384),

#         A.Normalize(),
        ToTensorV2()], p=1.)
}

In [10]:
class ImageDataset_c(Dataset):
    def __init__(self, df, augment=None, channel_select=None):
        """
        channel_select: list of int, e.g. [0], [1,2], [0,1,2]
                        None 表示用全部通道
                        0 = Blue, 1 = Green, 2 = Red (cv2 是 BGR)
        """
        self.length = len(df)
        self.df = df
        self.augment = augment
        if channel_select is None:
            self.channel_select = [0, 1, 2]  # 默認全通道
        else:
            self.channel_select = channel_select

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        d = self.df.iloc[index]
        img_path = data_dir + "/" + d["path"]
        img = cv2.imread(img_path)

        if img is None:
            raise FileNotFoundError(f"Image not found at: {img_path}")

        img = img.astype(np.float32) / 255.0
        

        # 選通道並保持 shape 為 (H, W, C_selected)
        img = img[:, :, self.channel_select]

        if self.augment is not None:
            img = self.augment(image=img)["image"]

        return {
            'image': img,
            'label': torch.tensor(d["label"], dtype=torch.long)
        }


In [11]:
print(next(iter(ImageDataset(train_df,data_transforms["train"])))["image"].shape)
print(next(iter(ImageDataset(train_df,data_transforms["train"])))["label"])

torch.Size([3, 384, 384])
tensor(0)


In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch
import torch.nn as nn
import torch.nn.functional as F

class ChannelAdaptiveBlock(nn.Module):
    """
    將任意輸入通道數（如 1, 2, 3...）標準化為固定輸出通道（通常為 3），
    並透過 Channel Attention 強化重要通道。
    """
    def __init__(self, max_in_channels=3, out_channels=3, reduction=4):
        super().__init__()
        self.max_in_channels = max_in_channels
        self.out_channels = out_channels

        # 防止 Linear 輸出為 0
        hidden_dim = max(1, max_in_channels // reduction)

        # 全域平均池 + 兩層 MLP 組成 Channel Attention
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.channel_attn = nn.Sequential(
            nn.Flatten(),  # [B, C, 1, 1] → [B, C]
            nn.Linear(max_in_channels, hidden_dim),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim, max_in_channels),
            nn.Sigmoid()  # 輸出每個通道的重要性 [0,1]
        )

        # 將 max_in_channels → out_channels（通常 = 3）
        self.proj = nn.Conv2d(max_in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        B, C, H, W = x.shape

        # 如果輸入通道數少於最大通道數，進行 zero-padding
        if C < self.max_in_channels:
            pad = torch.zeros((B, self.max_in_channels - C, H, W),
                              device=x.device, dtype=x.dtype)
            x = torch.cat([x, pad], dim=1)

        # 通道注意力：根據平均特徵強度加權每個通道
        attn = self.global_pool(x)  # [B, C, 1, 1]
        attn = self.channel_attn(attn)  # [B, C]
        attn = attn.view(B, self.max_in_channels, 1, 1)  # 對應形狀

        # 加權後再投影到固定通道數
        x = x * attn  # Channel-wise 重量
        x = self.proj(x)  # [B, out_channels, H, W]

        return x


In [13]:
class ChannelAdaptiveResNet(nn.Module):
    def __init__(self, base_model='resnet18', max_in_channels=3, out_classes=50):
        super().__init__()
        self.channel_adapter = ChannelAdaptiveBlock(max_in_channels=max_in_channels, out_channels=3)
        self.backbone = timm.create_model(base_model, pretrained=False, in_chans=3, num_classes=out_classes)

    def forward(self, x):
        x = self.channel_adapter(x)  # Normalize to fixed 3-channel input
        x = self.backbone(x)
        return x

In [14]:
model = timm.create_model('resnet18', pretrained=False,in_chans=3, num_classes=50)
model1 = ChannelAdaptiveResNet(max_in_channels=6, out_classes=50)


In [15]:
macs, params = get_model_complexity_info(model, (3, 384, 384), as_strings=True, print_per_layer_stat=False)
print(f"FLOPs: {macs}, Parameters: {params}")

FLOPs: 5.36 GMac, Parameters: 11.2 M


In [16]:
macs, params = get_model_complexity_info(model1, (3, 384, 384), as_strings=True, print_per_layer_stat=False)
print(f"FLOPs: {macs}, Parameters: {params}")

FLOPs: 5.37 GMac, Parameters: 11.2 M


In [17]:
loss = nn.CrossEntropyLoss()
def criterion(outputs, labels):
    return loss(outputs, labels)

In [18]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train()
    
    dataset_size = 0
    running_loss = 0.0
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, data in bar:
        images = data['image'].to(device, dtype=torch.float)
        labels = data['label'].to(device, dtype=torch.long)

        batch_size = images.size(0)
        
        with amp.autocast(enabled = True):
            outputs = model(images)
            
            loss = criterion(outputs, labels)
            
            loss = loss / CONFIG['n_accumulate']
            
        scaler.scale(loss).backward()
        
        if (step + 1) % CONFIG['n_accumulate'] == 0:

            scaler.unscale_(optimizer)
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()



            if scheduler is not None:
                scheduler.step()
        
        
                
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        bar.set_postfix(Epoch=epoch, Train_Loss=epoch_loss,
                        LR=optimizer.param_groups[0]['lr'])
    gc.collect()
    
    return epoch_loss

In [19]:
@torch.inference_mode()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    true_y = []
    pred_y = []
    pred_y_class=[]
    
    for step, data in bar:
        
        images = data['image'].to(device, dtype=torch.float)
        labels = data['label'].to(device, dtype=torch.long)
        
        batch_size = images.size(0)

        outputs = model(images)
        loss = criterion(outputs, labels)

        
        true_y.append(labels.cpu().numpy())
        
        tmp_pred=torch.nn.Softmax(dim=1)(outputs)
        pred_y.append(tmp_pred.cpu().numpy())
   
        
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        bar.set_postfix(Epoch=epoch, Valid_Loss=epoch_loss,
                        LR=optimizer.param_groups[0]['lr'])   
        
    

    true_y=np.concatenate(true_y)
    pred_y=np.concatenate(pred_y)
 
    
    gc.collect()
    

    auc = roc_auc_score(true_y,pred_y,multi_class='ovr', average='macro')
 


    return epoch_loss,auc,true_y,pred_y

In [20]:
@torch.inference_mode()
def get_score(model, dataloader, device):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader), disable=True)
    true_y = []
    pred_y = []
    pred_y_class=[]
    
    for step, data in bar:
        
        images = data['image'].to(device, dtype=torch.float)
        labels = data['label'].to(device, dtype=torch.long)
        
        batch_size = images.size(0)

        outputs = model(images)
        loss = criterion(outputs, labels)

        
        true_y.append(labels.cpu().numpy())
        
        tmp_pred=torch.nn.Softmax(dim=1)(outputs)
        pred_y.append(tmp_pred.cpu().numpy())
   
        
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
#         bar.set_postfix(Epoch=epoch, Valid_Loss=epoch_loss,
#                         LR=optimizer.param_groups[0]['lr'])   
        
    

    true_y=np.concatenate(true_y)
    pred_y=np.concatenate(pred_y)
 
    
    gc.collect()
    

    auc = roc_auc_score(true_y,pred_y,multi_class='ovr', average='macro')
    pred_labels = np.argmax(pred_y, axis=1)
    f1 = f1_score(true_y, pred_labels, average='macro')
    precision = precision_score(true_y, pred_labels, average='macro')
    recall = recall_score(true_y, pred_labels, average='macro')


    return auc,f1,precision,recall

In [21]:
def run_training(model, optimizer, scheduler,train_loader,valid_loader,device,num_epochs):

    if torch.cuda.is_available():
        print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
    
#     model=model.cuda()
    best_true_y,best_pred_y = [] ,[]
    start = time.time()
    
#     best_model_wts = copy.deepcopy(model.state_dict())
    best_epoch_auc = 0

    history = defaultdict(list)
    gc.collect()
    for epoch in range(1, num_epochs + 1): 
        gc.collect()
        train_epoch_loss = train_one_epoch(model, optimizer, scheduler, 
                                           train_loader,device,epoch=epoch)
        
        val_epoch_loss,auc,true_y,pred_y= valid_one_epoch(model, valid_loader,device,epoch=epoch)
#         print("Epoch:",epoch)
        print(val_epoch_loss,auc)
    
        history['Train Loss'].append(train_epoch_loss)
        history['Valid Loss'].append(val_epoch_loss)
        history['Valid Auc'].append(auc)

        if (auc >= best_epoch_auc) : #and (epoch >3):
            print(f"Validation Auc Improved ({best_epoch_auc} ---> {auc})")
            best_epoch_auc = auc
            
            PATH = hw_dir+'/model/'+f"job_{job}_model"+".bin"
            torch.save(model.state_dict(), PATH)
            best_true_y,best_pred_y = true_y,pred_y
            

            
            

    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))

    

    return history,best_true_y,best_pred_y
    #return model, history

# ResNet

In [22]:
job=1
is_amp = True
scaler = amp.GradScaler(enabled = is_amp)

model = model.to(CONFIG['device'])

In [23]:
train_dataset = ImageDataset(train_df,augment=data_transforms["train"])
valid_dataset = ImageDataset(valid_df,augment=data_transforms["valid"])
train_loader = DataLoader(train_dataset, batch_size=128, 
                          num_workers=8, shuffle=True, pin_memory=True, drop_last=True)
valid_loader = DataLoader(valid_dataset, batch_size=128, 
                          num_workers=8, shuffle=False, pin_memory=True)

In [24]:

optimizer = optim.AdamW(model.parameters(), lr=1.5e-3)
num_train_steps = int(len(train_loader) * 16)
num_warmup_steps = int(num_train_steps / 10)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_train_steps)

history,true_y,pred_y= run_training(model, optimizer, scheduler,train_loader,valid_loader,device=CONFIG['device'],num_epochs=16)

[INFO] Using GPU: NVIDIA GeForce RTX 3090



  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

3.73364487224155 0.7356311413454272
Validation Auc Improved (0 ---> 0.7356311413454272)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

3.388416595458984 0.8279969765684051
Validation Auc Improved (0.7356311413454272 ---> 0.8279969765684051)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

2.7596916929880777 0.895721844293273
Validation Auc Improved (0.8279969765684051 ---> 0.895721844293273)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

2.2165877066718207 0.9334794658604182
Validation Auc Improved (0.895721844293273 ---> 0.9334794658604182)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.9887660980224608 0.9472008062484254
Validation Auc Improved (0.9334794658604182 ---> 0.9472008062484254)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.7822319724824693 0.958745275888133
Validation Auc Improved (0.9472008062484254 ---> 0.958745275888133)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.769543577035268 0.9584126984126984


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.6325892996788025 0.9645099521289997
Validation Auc Improved (0.958745275888133 ---> 0.9645099521289997)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.6835241182645162 0.962605190224238


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.3408222119013469 0.9754144620811289
Validation Auc Improved (0.9645099521289997 ---> 0.9754144620811289)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.356457002957662 0.9761300075585789
Validation Auc Improved (0.9754144620811289 ---> 0.9761300075585789)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.2019986679818895 0.9797984378936759
Validation Auc Improved (0.9761300075585789 ---> 0.9797984378936759)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.1645827865600586 0.9808667170571931
Validation Auc Improved (0.9797984378936759 ---> 0.9808667170571931)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.1129164099693298 0.9822927689594357
Validation Auc Improved (0.9808667170571931 ---> 0.9822927689594357)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.116493329472012 0.9822877299067776


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.1096354375945197 0.9824640967498113
Validation Auc Improved (0.9822927689594357 ---> 0.9824640967498113)
Training complete in 0h 22m 13s


In [25]:
model_history,model_true_y,model_pred_y=history,true_y,pred_y
model_history

defaultdict(list,
            {'Train Loss': [3.517613368478381,
              2.9555108769219895,
              2.501563476647443,
              2.1902940830721063,
              1.95050737225575,
              1.753825263697126,
              1.5863705808334505,
              1.4476291283904783,
              1.3253653633449725,
              1.2140998227393578,
              1.1257413776538632,
              1.0478987715504913,
              0.9845765412819047,
              0.935653054279837,
              0.9019161751154463,
              0.884608553126756],
             'Valid Loss': [3.73364487224155,
              3.388416595458984,
              2.7596916929880777,
              2.2165877066718207,
              1.9887660980224608,
              1.7822319724824693,
              1.769543577035268,
              1.6325892996788025,
              1.6835241182645162,
              1.3408222119013469,
              1.356457002957662,
              1.2019986679818895,
             

In [26]:
model.load_state_dict(torch.load("/home/fateplsf/hw/multi_class/hw1/model/job_1_model.bin"))

<All keys matched successfully>

In [27]:
test_dataset = ImageDataset(test_df,augment=data_transforms["valid"])
test_loader = DataLoader(test_dataset, batch_size=64, 
                          num_workers=8, shuffle=False, pin_memory=True)

In [28]:
auc, f1, precision, recall = get_score(model, valid_loader, device=CONFIG['device'])
print(f"AUC = {auc:.4f}, F1 = {f1:.4f}, Precision = {precision:.4f}, Recall = {recall:.4f}")

AUC = 0.9825, F1 = 0.6454, Precision = 0.6639, Recall = 0.6511


In [29]:
auc, f1, precision, recall = get_score(model, test_loader, device=CONFIG['device'])
print(f"AUC = {auc:.4f}, F1 = {f1:.4f}, Precision = {precision:.4f}, Recall = {recall:.4f}")

AUC = 0.9865, F1 = 0.7162, Precision = 0.7351, Recall = 0.7222


# ChannelAdaptiveResNet

In [30]:
job=2
is_amp = True
scaler = amp.GradScaler(enabled = is_amp)

model1 = model1.to(CONFIG['device'])

In [31]:

optimizer = optim.AdamW(model1.parameters(), lr=1.5e-3)
num_train_steps = int(len(train_loader) * 16)
num_warmup_steps = int(num_train_steps / 10)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_train_steps)

history,true_y,pred_y= run_training(model1, optimizer, scheduler,train_loader,valid_loader,device=CONFIG['device'],num_epochs=16)

[INFO] Using GPU: NVIDIA GeForce RTX 3090



  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

4.019109725952148 0.7056890904509951
Validation Auc Improved (0 ---> 0.7056890904509951)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

3.3978395652770996 0.8330108339632148
Validation Auc Improved (0.7056890904509951 ---> 0.8330108339632148)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

4.58435894648234 0.7613504661123709


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

5.715073432922363 0.6964625850340136


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

10.978410311804877 0.5436533131771227


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

2.9253992043601142 0.8930713025951121
Validation Auc Improved (0.8330108339632148 ---> 0.8930713025951121)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

2.404698593351576 0.9289191232048375
Validation Auc Improved (0.8930713025951121 ---> 0.9289191232048375)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

3.19305229610867 0.8808264046359287


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

3.684644089804755 0.8133282942806752


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

3.4530734475453695 0.8854421768707483


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.3550343669785394 0.9755908289241624
Validation Auc Improved (0.9289191232048375 ---> 0.9755908289241624)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.6459865066740247 0.9663945578231292


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.3383305252922906 0.9790677752582515
Validation Auc Improved (0.9755908289241624 ---> 0.9790677752582515)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.0949731421470643 0.9840513983371127
Validation Auc Improved (0.9790677752582515 ---> 0.9840513983371127)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.057738229698605 0.9846359284454522
Validation Auc Improved (0.9840513983371127 ---> 0.9846359284454522)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.04475076701906 0.9851095993953136
Validation Auc Improved (0.9846359284454522 ---> 0.9851095993953136)
Training complete in 0h 25m 14s


In [32]:
model1.load_state_dict(torch.load("/home/fateplsf/hw/multi_class/hw1/model/job_2_model.bin"))

<All keys matched successfully>

In [33]:
auc, f1, precision, recall = get_score(model1, valid_loader, device=CONFIG['device'])
print(f"AUC = {auc:.4f}, F1 = {f1:.4f}, Precision = {precision:.4f}, Recall = {recall:.4f}")

AUC = 0.9851, F1 = 0.6519, Precision = 0.6633, Recall = 0.6578


In [34]:
auc, f1, precision, recall = get_score(model1, test_loader, device=CONFIG['device'])
print(f"AUC = {auc:.4f}, F1 = {f1:.4f}, Precision = {precision:.4f}, Recall = {recall:.4f}")

AUC = 0.9859, F1 = 0.7228, Precision = 0.7353, Recall = 0.7289


In [35]:
ds_all = ImageDataset_c(test_df, augment=data_transforms["valid"], channel_select=[0, 1, 2])

ds_b = ImageDataset_c(test_df, augment=data_transforms["valid"], channel_select=[0])
ds_g = ImageDataset_c(test_df, augment=data_transforms["valid"], channel_select=[1])
ds_r = ImageDataset_c(test_df, augment=data_transforms["valid"], channel_select=[2])

ds_bg = ImageDataset_c(test_df, augment=data_transforms["valid"], channel_select=[0,1])
ds_rg = ImageDataset_c(test_df, augment=data_transforms["valid"], channel_select=[1,2])
ds_br = ImageDataset_c(test_df, augment=data_transforms["valid"], channel_select=[0,2])

In [36]:
c_list=[ds_all,ds_b,ds_g,ds_r,ds_bg,ds_rg,ds_br]
c_names = ["channel_all", "channel_b", "channel_g", "channel_r", "channel_bg", "channel_rg", "channel_br"]

In [37]:
for i in range(len(c_list)):
    ds=c_list[i]
    ds_name=c_names[i]
    
    test_loader_ = DataLoader(ds, batch_size=64, 
                              num_workers=8, shuffle=False, pin_memory=True)
    auc, f1, precision, recall = get_score(model1, test_loader_, device=CONFIG['device'])
    print(f"{ds_name}:")
    print(f"AUC = {auc:.4f}, F1 = {f1:.4f}, Precision = {precision:.4f}, Recall = {recall:.4f}")

channel_all:
AUC = 0.9859, F1 = 0.7228, Precision = 0.7353, Recall = 0.7289
channel_b:
AUC = 0.5362, F1 = 0.0047, Precision = 0.0050, Recall = 0.0267
channel_g:
AUC = 0.5385, F1 = 0.0017, Precision = 0.0009, Recall = 0.0200
channel_r:
AUC = 0.5359, F1 = 0.0017, Precision = 0.0009, Recall = 0.0222
channel_bg:
AUC = 0.6083, F1 = 0.0068, Precision = 0.0059, Recall = 0.0267
channel_rg:
AUC = 0.5838, F1 = 0.0008, Precision = 0.0004, Recall = 0.0200
channel_br:
AUC = 0.6073, F1 = 0.0116, Precision = 0.0109, Recall = 0.0311


# ChannelAdaptiveResNet + Data augmentation using random channels

In [38]:
class RandomChannelSelect(A.ImageOnlyTransform):
    def __init__(self, min_channels=1, max_channels=3, always_apply=False, p=0.5):
        super().__init__(always_apply, p)
        self.min_channels = min_channels
        self.max_channels = max_channels

    def apply(self, img, **params):
        # 隨機保留通道數
        num_channels_to_keep = random.randint(self.min_channels, self.max_channels)
        # 隨機選通道（0=B, 1=G, 2=R）
        selected_channels = sorted(random.sample([0, 1, 2], num_channels_to_keep))
        img = img[:, :, selected_channels]

        # 若不足3通道，自動補 0 通道至3維（避免錯誤）
        while img.shape[2] < 3:
            zero_channel = np.zeros_like(img[:, :, 0:1])
            img = np.concatenate((img, zero_channel), axis=2)
        return img

In [39]:
data_transforms_c = {
    "train": A.Compose([
        A.Resize(384, 384),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=30, p=0.5),
        A.HueSaturationValue(hue_shift_limit=0.1, sat_shift_limit=0.1, val_shift_limit=0.1, p=0.5),
        A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),

        RandomChannelSelect(min_channels=1, max_channels=3, p=0.5),  

        ToTensorV2()
    ], p=1.),

    "valid": A.Compose([
        A.Resize(384, 384),
        ToTensorV2()
    ], p=1.)
}

In [40]:
job=3
is_amp = True
scaler = amp.GradScaler(enabled = is_amp)
model2 = ChannelAdaptiveResNet(max_in_channels=6, out_classes=50)
model2 = model2.to(CONFIG['device'])

In [41]:
train_dataset_c = ImageDataset(train_df,augment=data_transforms_c["train"])

train_loader_c = DataLoader(train_dataset_c, batch_size=128, 
                          num_workers=8, shuffle=True, pin_memory=True, drop_last=True)

In [42]:

optimizer = optim.AdamW(model2.parameters(), lr=1.5e-3)
num_train_steps = int(len(train_loader) * 16)
num_warmup_steps = int(num_train_steps / 10)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_train_steps)

history,true_y,pred_y= run_training(model2, optimizer, scheduler,train_loader_c,valid_loader,device=CONFIG['device'],num_epochs=16)

[INFO] Using GPU: NVIDIA GeForce RTX 3090



  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

3.8549001110924617 0.7223230032753842
Validation Auc Improved (0 ---> 0.7223230032753842)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

3.3115657477908664 0.8285109599395314
Validation Auc Improved (0.7223230032753842 ---> 0.8285109599395314)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

3.0364368062549167 0.8636381960191484
Validation Auc Improved (0.8285109599395314 ---> 0.8636381960191484)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

3.3799837504492865 0.8434366339128243


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

2.9356593730714584 0.8918820861678004
Validation Auc Improved (0.8636381960191484 ---> 0.8918820861678004)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

3.209692390229967 0.8744570420760897


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.8367080550723607 0.9563063744016127
Validation Auc Improved (0.8918820861678004 ---> 0.9563063744016127)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.7951275142033896 0.9606298815822626
Validation Auc Improved (0.9563063744016127 ---> 0.9606298815822626)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.8551243204540677 0.9582161753590325


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.7256792947981092 0.9662030738221213
Validation Auc Improved (0.9606298815822626 ---> 0.9662030738221213)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.5728326278262668 0.9706273620559336
Validation Auc Improved (0.9662030738221213 ---> 0.9706273620559336)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.6066005169020758 0.9677702191987906


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.3123121031125387 0.9778886369362558
Validation Auc Improved (0.9706273620559336 ---> 0.9778886369362558)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.179629242155287 0.982721088435374
Validation Auc Improved (0.9778886369362558 ---> 0.982721088435374)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.1005727503034803 0.9839606953892668
Validation Auc Improved (0.982721088435374 ---> 0.9839606953892668)


  0%|          | 0/494 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1.1022373655107287 0.9837641723356011
Training complete in 0h 25m 9s


In [43]:
model2.load_state_dict(torch.load("/home/fateplsf/hw/multi_class/hw1/model/job_3_model.bin"))

<All keys matched successfully>

In [44]:
auc, f1, precision, recall = get_score(model2, valid_loader, device=CONFIG['device'])
print(f"AUC = {auc:.4f}, F1 = {f1:.4f}, Precision = {precision:.4f}, Recall = {recall:.4f}")

AUC = 0.9840, F1 = 0.6218, Precision = 0.6411, Recall = 0.6267


In [45]:
for i in range(len(c_list)):
    ds=c_list[i]
    ds_name=c_names[i]
    
    test_loader_ = DataLoader(ds, batch_size=64, 
                              num_workers=8, shuffle=False, pin_memory=True)
    auc, f1, precision, recall = get_score(model2, test_loader_, device=CONFIG['device'])
    print(f"{ds_name}:")
    print(f"AUC = {auc:.4f}, F1 = {f1:.4f}, Precision = {precision:.4f}, Recall = {recall:.4f}")

channel_all:
AUC = 0.9836, F1 = 0.7029, Precision = 0.7169, Recall = 0.7067
channel_b:
AUC = 0.9725, F1 = 0.5998, Precision = 0.6161, Recall = 0.6089
channel_g:
AUC = 0.9761, F1 = 0.6216, Precision = 0.6353, Recall = 0.6289
channel_r:
AUC = 0.9708, F1 = 0.5655, Precision = 0.5824, Recall = 0.5756
channel_bg:
AUC = 0.9801, F1 = 0.6608, Precision = 0.6795, Recall = 0.6644
channel_rg:
AUC = 0.9790, F1 = 0.6776, Precision = 0.6899, Recall = 0.6867
channel_br:
AUC = 0.9801, F1 = 0.6730, Precision = 0.6985, Recall = 0.6778
