In [1]:
import os
import sys
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import PIL
import cv2
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import v2
import timm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
"""
#label_csv is like this:
#
# image_id, probabilty_Si, probabilty_SWCNT

"""
class kanaiDataset(Dataset):
    def __init__(self, image_data_path, label_csv_path, transform=None):
        self.transform = transform
        self.label_df = pd.read_csv(label_csv_path, comment='#', header=None, sep=None, engine='python', encoding='cp932')

        self.images = []
        self.labels = []
        for i in range(len(self.label_df)):
            if not os.path.exists(os.path.join(image_data_path, str(self.label_df.iloc[i, 0])+'.tiff')):
                print(f"Image {str(self.label_df.iloc[i, 0])}.tiff not found in {image_data_path}")
                continue
            image = PIL.Image.open(os.path.join(image_data_path, str(self.label_df.iloc[i, 0])+'.tiff'))
            label = torch.tensor([self.label_df.iloc[i, 1], self.label_df.iloc[i, 2]])
            self.images.append(image)
            self.labels.append(label)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

In [4]:
transform = v2.Compose([
    v2.ToImage(), #torch Image型への変換。channel/height/widthの順番に変換される。
    v2.ToDtype(torch.float16, scale=True), #torch.float16型への変換。scale=Trueで0-1の範囲に変換される。
    #PLの測定データに関しては各ピクセルの値（各CCD素子の値）自体も重要な情報なので、scale=Falseにしても良いかもしれない。
])

In [5]:
model = timm.create_model('tf_efficientnetv2_b2.in1k', pretrained=True, in_chans=1)
print(model)

EfficientNet(
  (conv_stem): Conv2dSame(1, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNormAct2d(
    32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): ConvBnAct(
        (conv): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (aa): Identity()
        (drop_path): Identity()
      )
      (1): ConvBnAct(
        (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (aa): Identity()
        (drop_path):

In [6]:
model.classifier = torch.nn.Sequential(
    torch.nn.Dropout(0.2),
    torch.nn.Linear(1408, 2)
)
print(model)

EfficientNet(
  (conv_stem): Conv2dSame(1, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNormAct2d(
    32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): ConvBnAct(
        (conv): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (aa): Identity()
        (drop_path): Identity()
      )
      (1): ConvBnAct(
        (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (aa): Identity()
        (drop_path):

In [7]:
lr = 1e-4
epochs = 10

In [8]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [9]:
train_dataset = kanaiDataset(
    image_data_path='/home/kanai/kanai/PL/PL_2023_10_02/train/',
    label_csv_path='/home/kanai/kanai/PL/PL_2023_10_02/train.csv',
    transform=transform
)
validation_dataset = kanaiDataset(
    image_data_path='/home/kanai/kanai/PL/PL_2023_10_02/validation/',
    label_csv_path='/home/kanai/kanai/PL/PL_2023_10_02/validation.csv',
    transform=transform
)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
validation_dataloader = DataLoader(validation_dataset, batch_size=32, shuffle=False, num_workers=4)

FileNotFoundError: [Errno 2] No such file or directory: '/home/kanai/kanai/PL/PL_2023_10_02/train.csv'

In [None]:
#train
model = model.to(device)
model.train()

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_dataloader):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = torch.nn.functional.binary_cross_entropy_with_logits(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:  # Print every 10 batches
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 10:.3f}")
            running_loss = 0.0

    # Validation phase
    print("Validation phase")
    valid_loss = 0.0
    for i, (images, labels) in enumerate(validation_dataloader):
        images = images.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            outputs = model(images)
            loss = torch.nn.functional.binary_cross_entropy_with_logits(outputs, labels)
            valid_loss += loss.item()
        print(f"[{epoch + 1}, {i + 1}] validation loss: {loss.item():.3f}")
    print(f"Validation loss: {valid_loss / len(validation_dataloader):.3f}")

    # Save the model after each epoch
    torch.save(model.state_dict(), f"model_epoch_{epoch + 1}.pth")
    print(f"Model saved as model_epoch_{epoch + 1}.pth")

In [None]:
model = torch.load("model_epoch_10.pth")