## import

In [5]:
import os
import numpy as np
from sklearn.model_selection import StratifiedKFold, KFold
import torch
import torchmetrics
import pytorch_lightning as pl
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as dsets
from torchvision.datasets import ImageFolder

from torch.utils.data import DataLoader
from torchvision import transforms, utils, models
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint
import numpy as np
import wandb
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

## モデルの定義

In [6]:

from hydra import initialize_config_dir, compose
with initialize_config_dir(config_dir=f"/home/hrdathaw/mygdev/src/0429/exp2-xray-gen/config"):
    cfg = compose(config_name='config')
    print(cfg)

fold_name =cfg.model.name
f_path = "/mnt/d/experiments/7422526/dev/0429/{}".format(fold_name)
os.makedirs(f_path, exist_ok=True)

{'model': {'name': 'exp1-oxford-test'}, 'train': {'path': '/home/hrdathaw/mygdev/data/chest_xray/train'}, 'test': {'path': '/home/hrdathaw/mygdev/data/chest_xray/test'}, 'params': {'batch_size': 32, 'num_class': 4, 'epochs': 40, 'm_name': 'vgg16', 'optim_name': 'SGD', 'seed': 42}, 'optim_params': {'lr': 0.01, 'momentum': 0.9}}


## データセット・データローダー

In [7]:
full = ImageFolder(root=cfg.train.path)
y = full.targets

In [8]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        # transforms.CenterCrop(224),
        # transforms.RandomHorizontalFlip(p=0.1),
        # transforms.RandomVerticalFlip(p=0.1),
        # transforms.RandomAutocontrast(p=0.1),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
    'valid': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
}

In [9]:
test_set = ImageFolder(root=cfg.test.path, transform=data_transforms['valid'])
test_loader = DataLoader(test_set, cfg.params.batch_size, shuffle=False, pin_memory=True)

In [10]:
len(test_set)

624

In [11]:
class MySubset(torch.utils.data.Dataset):
    def __init__(self, dataset, indices, transform=None):
        self.dataset = dataset
        self.indices = indices
        self.transform = transform

    def __getitem__(self, idx):
        img, label = self.dataset[self.indices[idx]]
        if self.transform:
            img = self.transform(img)

        return img, label

    def __len__(self):
        return len(self.indices)

In [12]:
sum_matrix = torch.zeros(2,2).cuda()

In [13]:
class mymodel(pl.LightningModule):

    def __init__(self,
    batch_size=cfg.params.batch_size, 
    num_class=cfg.params.num_class, 
    optim_hparams: dict = None,
    m_name = cfg.params.m_name,
    optim_name = cfg.params.optim_name,
    d_train=None,
    d_val=None,
    d_test=None,
    ):
        super().__init__()
        self.save_hyperparameters()
        self.batch_size = batch_size
        self.num_class = num_class
        self.m_name = m_name
        self.optim_name = optim_name
        self.conf_matrix = torchmetrics.ConfusionMatrix(num_class)
        # self.d_train = d_train
        # self.d_val = d_val
        # self.d_test = d_test

        if self.m_name == "vgg16":
            #vgg16
            self.model = models.vgg16(pretrained=True)
            for param in self.model.parameters():
                param.requires_grad=False
            num_feat = self.model.classifier[6].in_features
            self.model.classifier[6] = nn.Linear(num_feat, self.hparams.num_class)
            print(self.model)
            # for param in self.model.features.parameters():
            #     param.requires_grad = False
            # for param in self.model.avgpool.parameters():
            #     param.requires_grad = False
        
        elif self.m_name == 'resnet18':
            # resnet18
            self.model = models.resnet18(pretrained=True)
            for param in self.model.parameters():
                param.requires_grad=False
            num_feat = self.model.fc.in_features
            self.model.fc = nn.Linear(num_feat, self.hparams.num_class)
        
        else:
            assert False, f'不明なmodelです: "{self.m_name}"'

        # loss
        self.loss_module = nn.CrossEntropyLoss()
    
    def forward(self, x):
        x = self.model(x)
        return x

    def training_step(self, batch, batch_idx):
        imgs, labels = batch
        preds = self.model(imgs)
        loss = self.loss_module(preds, labels)
        acc = (preds.argmax(dim=-1) == labels).float().mean()

        self.log("train_acc", acc, on_step=False, on_epoch=True)
        self.log("train_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        out = self.forward(x)
        val_loss = F.cross_entropy(out, y)
        out_label = torch.argmax(out, dim=1)
        acc = torch.sum(y==out_label) *1.0 /len(y)
        self.log('val_loss', val_loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return {'val_loss': val_loss, 'val_acc': acc, 'out': out, 'targets': y}
    

    
    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        avg_acc = torch.stack([x['val_acc'] for x in outputs]).mean()
        self.log('avg_loss', avg_loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('avg_acc', avg_acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return {'avg_val_loss': avg_loss, 'val_acc': avg_acc}
    
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        out = self.forward(x)
        test_loss = F.cross_entropy(out, y)
        out_label = torch.argmax(out, dim=1)
        acc = torch.sum(y==out_label) *1.0 /len(y)
        self.log('test_loss', test_loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return {'test_loss': test_loss, 'test_acc': acc, 'preds':out, 'targets': y}
    
    def test_epoch_end(self, outputs) -> None:
        global sum_matrix
        preds = torch.cat([x["preds"] for x in outputs])
        targets = torch.cat([x["targets"] for x in outputs])
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        avg_acc = torch.stack([x['test_acc'] for x in outputs]).mean()
        # mat = self.conf_matrix(preds, targets)
        # sum_matrix += mat
        # names = ["Bacterialblight", "Blast", "Brownspot", "Tungro"]
        # df = pd.DataFrame(mat.cpu().numpy(), index=names, columns=names)
        # plt.figure(figsize = (10,7))
        # sns.set(font_scale=1.3)
        # # plt.xlabel('Predicted label')
        # # plt.ylabel('True label')
        # fig_ = sns.heatmap(df, annot=True, cmap='Blues', fmt="g")
        # fig_.set(xlabel='Predicited label', ylabel='True label')
        # fig_.get_figure()
        # # plt.close(fig_)
        # wandb.log({"confusion matrix(testset)": [wandb.Image(fig_)]})
        # df_sum = pd.DataFrame(sum_matrix.cpu().numpy(), index=names, columns=names)
        # plt.figure(figsize = (10,7))
        # sns.set(font_scale=1.3)
        # # plt.xlabel('Predicted label')
        # # plt.ylabel('True label')
        # fig_s = sns.heatmap(df_sum, annot=True, cmap='Blues', fmt="g")
        # fig_s.set(xlabel='Predicited label', ylabel='True label')
        # fig_s.get_figure()
        # # plt.close(fig_s)
        # wandb.log({"confusion matrix(sum)": [wandb.Image(fig_s)]})

        return {'avg_test_loss': avg_loss, 'test_acc': avg_acc, 'preds': preds, 'targets': targets}

    
    def configure_optimizers(self):
        if self.optim_name == "SGD":
            optimizer = torch.optim.SGD(self.parameters(), **self.hparams.optim_hparams)
        elif self.optim_name == "Adam":
            optimizer = torch.optim.Adam(self.parameters(), **self.hparams.optim_hparams)
        else:
            assert False, f'不明なOptimizerです: "{self.hparams.optim_name}"'
        return optimizer

In [14]:
from pytorch_lightning.callbacks import EarlyStopping

pl.seed_everything(cfg.params.seed)

Global seed set to 42


42

In [15]:
kf = KFold(n_splits=5, shuffle=True, random_state=2022)

cv = 0.0
cvt = 0.0

for fold, (train_idx, val_idx) in enumerate(kf.split(full)):
    net = mymodel(optim_hparams=cfg.optim_params)
    name =f"{fold_name}: fold-{fold}"
    save_path = r"{}\fold{}".format(f_path,fold)
    os.makedirs(save_path, exist_ok=True)
    ckpt = ModelCheckpoint(
    monitor="val_loss",
    mode="min",
    dirpath=save_path,
    filename="{epoch}--{val_loss:.3f}",
)
#     print(f"{train_idx}, {val_idx}")
    wandb_logger = WandbLogger(project=cfg.model.project, tags=["vgg16"], name=name)
    early = EarlyStopping(monitor="val_loss", patience=5)
    d_train = MySubset(full, train_idx, data_transforms['train'])
    d_val = MySubset(full, val_idx, data_transforms['valid'])
    train_loader = DataLoader(d_train, cfg.params.batch_size, shuffle=True, pin_memory=True)
    val_loader = DataLoader(d_val, cfg.params.batch_size, shuffle=False, pin_memory=True)
    
    trainer = pl.Trainer(gpus=1, max_epochs=cfg.params.epochs, logger=wandb_logger, callbacks=[early, ckpt])
    trainer.fit(model=net, train_dataloaders=train_loader, val_dataloaders=val_loader)
    cv += trainer.callback_metrics["avg_acc"].mean().item() /kf.n_splits
    trainer.test(net, ckpt_path="best", dataloaders=test_loader)
    test_acc = trainer.callback_metrics["test_acc"].item()
    cvt += test_acc / kf.n_splits
    del net
    # wandb.finish()
    if fold != 4:
        wandb.finish()
    else:
        # wandb.log({"CV_test": cvt, "CV": cv})
        wandb.finish()
    print(f"fold:{fold},test_acc:{test_acc}, cv_test: {cvt}, cv: {cv}")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

[34m[1mwandb[0m: Currently logged in as: [33mexpfasts[0m (use `wandb login --relogin` to force relogin)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type             | Params
-------------------------------------------------
0 | conf_matrix | ConfusionMatrix  | 0     
1 | model       | VGG              | 134 M 
2 | loss_module | CrossEntropyLoss | 0     
-------------------------------------------------
16.4 K    Trainable params
134 M     Non-trainable params
134 M     Total params
537.108   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 9:  52%|█████▏    | 85/164 [06:23<05:56,  4.51s/it, loss=0.866, v_num=c7ox, val_loss=0.544, val_acc=0.870, avg_loss=0.543, avg_acc=0.870, train_loss=0.857]   