# DataLoaderの実装

In [1]:
# Dateset関数の実装

from pathlib import Path
from torch.utils.data import Dataset
from PIL import Image
from torchvision import transforms as transforms
#from torchvision.io import read_image
import matplotlib.pyplot as plt

def _get_image_paths(image_dir):
    IMG_EXTENSIONS = [".jpg", "jpeg", ".png"]
    image_dir = Path(image_dir)
    image_paths = [
        p for p in image_dir.iterdir() if p.suffix in IMG_EXTENSIONS
    ]
    return image_paths

class Dataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_paths = _get_image_paths(image_dir)
        self.transform = transform

    def __getitem__(self, index):
        path = self.image_paths[index]
        image = Image.open(path)
        #image = read_image(path)
        #image = image.to(torch.float32)
        if transform is not None:
            image = self.transform(image)

        return image

    def __len__(self):
        return len(self.image_paths)

In [2]:
# DataLoaderの設定

from torch.utils.data import DataLoader

#DATA_DIR = "F:/ROHAN4600マルチモーダルデータベース/autoencoder_data"
DATA_DIR = "F:/ROHAN4600マルチモーダルデータベース/ROHAN4600_image/ROHAN4600_3001-3500/ROHAN4600_3226"
transform = transforms.ToTensor()

dataset = Dataset(DATA_DIR, transform)
batch_size = 64
data_loader = DataLoader(dataset, batch_size = batch_size, shuffle=True)

# モデルの実装

In [3]:
# オートエンコーダモデルの構築
import torch
import torch.nn as nn

class CNN_AutoEncoder(nn.Module):
    def __init__(self):
        super(CNN_AutoEncoder, self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 4, 7, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(4, 8, 7, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(8, 16, 7, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 8, 5, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Flatten(),
            nn.Linear(8*16*16, 512),
            nn.ReLU()
        )

        self.decoder_input_layer = nn.Linear(512, 8*16*16)
        self.relu = nn.ReLU()

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(8, 16, 4, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 8, 6, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(8, 4, 6, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(4, 3, 6, stride=2),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoder_output = self.encoder(x)  #.unsqueeze(0)
        decoder_input = self.relu(self.decoder_input_layer(encoder_output))
        decoder_input = decoder_input.view(-1, 8, 16, 16)
        decoder_output = self.decoder(decoder_input)
        return decoder_output

# 学習

In [4]:
# 学習モデル
net = CNN_AutoEncoder()
net.cuda()  # GPU対応
print(net)

CNN_AutoEncoder(
  (encoder): Sequential(
    (0): Conv2d(3, 4, kernel_size=(7, 7), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(4, 8, kernel_size=(7, 7), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(8, 16, kernel_size=(7, 7), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(16, 8, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Flatten(start_dim=1, end_dim=-1)
    (13): Linear(in_features=2048, out_features=512, bias=True)
    (14): ReLU()
  )
  (decoder_input_layer): Linear(in_features=512, out_features=2048, bias=True)
  (relu): ReLU()
  (decoder): Sequential(
    (0): Conv

In [44]:
from torch import optim

# 平均二乗誤差
loss_fnc = nn.MSELoss()

# 最適化アルゴリズム
optimizer = optim.Adam(net.parameters())

# 損失のログ
record_loss_train = []

# エポック数
epoch = 100

for i in range(epoch):
    net.train()
    loss_train = 0
    for j, x in enumerate(data_loader):
        x = x.cuda()  # GPU対応
        y = net(x)
        loss = loss_fnc(y, x)
        loss_train += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    loss_train /= j+1
    record_loss_train.append(loss_train)

    if i%1 == 0:
        print("Epoch:", i, "Loss_Train:", loss_train)

Epoch: 0 Loss_Train: 0.020768777801034352
Epoch: 1 Loss_Train: 0.00849809063753734
Epoch: 2 Loss_Train: 0.007289024680438969
Epoch: 3 Loss_Train: 0.00673032071079231
Epoch: 4 Loss_Train: 0.006403729962443726
Epoch: 5 Loss_Train: 0.006155197035210828
Epoch: 6 Loss_Train: 0.0059724771689313155
Epoch: 7 Loss_Train: 0.00581315324590024
Epoch: 8 Loss_Train: 0.005717492781372534
Epoch: 9 Loss_Train: 0.005594520763649294
Epoch: 10 Loss_Train: 0.005502074865055167
Epoch: 11 Loss_Train: 0.005437889768897245
Epoch: 12 Loss_Train: 0.005374288807312648
Epoch: 13 Loss_Train: 0.0053056266223494375
Epoch: 14 Loss_Train: 0.005248401734408819
Epoch: 15 Loss_Train: 0.00518934414592675
Epoch: 16 Loss_Train: 0.0051381935740614105
Epoch: 17 Loss_Train: 0.005102963070385158
Epoch: 18 Loss_Train: 0.005068965527203141
Epoch: 19 Loss_Train: 0.005018047984534254
Epoch: 20 Loss_Train: 0.004987132633363621
Epoch: 21 Loss_Train: 0.00495679071237747
Epoch: 22 Loss_Train: 0.0049192322870819935
Epoch: 23 Loss_Train: 

In [None]:
data_loader = DataLoader(dataset, batch_size = 1, shuffle=True)
dataiter = iter(data_loader)
image = next(dataiter)

#plt.imshow(image[0].permute(1, 2, 0))
#print(image)

net.eval()
x = image.cuda()
y = net(x)

#print(y)
y = y.squeeze().permute(1, 2, 0)
y = y.cpu().detach().numpy() 
plt.imshow(y)

In [46]:
MODEL_DIR = "F:/model/CNN_Autoencoder/model3"
#MODEL_DIR = "C:/Users/atsushi/Documents/model/CNN_Autoencoder"
torch.save(net.state_dict(), MODEL_DIR)

In [16]:
net.state_dict()

OrderedDict([('encoder.0.weight',
              tensor([[[[-0.0698,  0.0147, -0.0142, -0.0305,  0.0721,  0.0170, -0.0014],
                        [-0.0657,  0.0342,  0.0508,  0.0349, -0.0064, -0.0082,  0.0353],
                        [ 0.0564,  0.0211,  0.0262,  0.0449,  0.0322,  0.0776, -0.0068],
                        [ 0.0212,  0.0700, -0.0577,  0.0334,  0.0149,  0.0203,  0.0464],
                        [-0.0415, -0.0266,  0.0666,  0.0460,  0.0206, -0.0078,  0.0269],
                        [ 0.0666,  0.0597, -0.0175, -0.0129,  0.0168,  0.0078,  0.0154],
                        [ 0.0402,  0.0451, -0.0602, -0.0305,  0.0596,  0.0191, -0.0720]],
              
                       [[ 0.0594, -0.0791, -0.0571,  0.0385, -0.0087,  0.0746, -0.0421],
                        [ 0.0458, -0.0009, -0.0518,  0.0478,  0.0089,  0.0659, -0.0011],
                        [ 0.0510,  0.0297, -0.0052,  0.0674, -0.0380, -0.0147, -0.0632],
                        [ 0.0358, -0.0181, -0.0524,  0.0672,

In [48]:
#モデルのロード

In [20]:
MODEL_DIR = "F:/model/CNN_Autoencoder/model2"
net2 = CNN_AutoEncoder()
net2.load_state_dict(torch.load(MODEL_DIR))

<All keys matched successfully>

In [21]:
net2.cuda()

CNN_AutoEncoder(
  (encoder): Sequential(
    (0): Conv2d(3, 4, kernel_size=(7, 7), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(4, 8, kernel_size=(7, 7), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(8, 16, kernel_size=(7, 7), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(16, 8, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Flatten(start_dim=1, end_dim=-1)
    (13): Linear(in_features=2048, out_features=512, bias=True)
    (14): ReLU()
  )
  (decoder_input_layer): Linear(in_features=512, out_features=2048, bias=True)
  (relu): ReLU()
  (decoder): Sequential(
    (0): Conv

In [None]:
data_loader = DataLoader(dataset, batch_size = 1, shuffle=True)
dataiter = iter(data_loader)
image = next(dataiter)

#plt.imshow(image[0].permute(1, 2, 0))
#print(image)

net2.eval()
x = image.cuda()
y = net2(x)

#print(y)
y = y.squeeze().permute(1, 2, 0)
y = y.cpu().detach().numpy() 
plt.imshow(y)

特徴量抽出のテスト

In [35]:
z = x

In [36]:
net3 = net2.encoder
net3.cuda()

Sequential(
  (0): Conv2d(3, 4, kernel_size=(7, 7), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(4, 8, kernel_size=(7, 7), stride=(1, 1), padding=(1, 1))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(8, 16, kernel_size=(7, 7), stride=(1, 1), padding=(1, 1))
  (7): ReLU()
  (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (9): Conv2d(16, 8, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
  (10): ReLU()
  (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (12): Flatten(start_dim=1, end_dim=-1)
  (13): Linear(in_features=2048, out_features=512, bias=True)
  (14): ReLU()
)

In [39]:
net3.eval()
u = net3(z)
print(u)

tensor([[ 7.6017, 12.5838, 41.3027, 17.5866,  0.0000, 18.0359, 12.5808,  0.0000,
         14.0608,  0.0000,  0.0000,  0.0000,  0.0000,  2.2026,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000, 18.5866,
          0.0000,  0.0000,  0.0000, 29.7940,  0.0000, 16.5320,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  7.9653,  0.8599,  0.0000,  9.9595,  0.0000,
          0.0000,  0.0000,  4.6233,  1.3650,  0.0000,  2.4833,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000, 24.4335,  0.0000, 34.1650,  5.9277,
          0.0000,  0.0000,  0.0000, 23.8629,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000, 27.6442,  0.0000,  0.0000,  3.7913,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000, 22.5629, 19.0485,  0.0000,  0.0000,  0.0000,
         20.7438,  0.0000,  

特徴量抽出

In [90]:
import glob
import os
import numpy as np

IMG_DIR = "F:/ROHAN4600マルチモーダルデータベース/ROHAN4600_image/ROHAN4600_4001-4600/*"  # 可変
#OUTPUT_PATH = "F:/ROHAN4600マルチモーダルデータベース/img_data/"

folderlist = glob.glob(IMG_DIR)
print(len(folderlist))

600


In [88]:
from PIL import Image
from torchvision import transforms as transforms

transform = transforms.ToTensor()

i = 0
all = len(folderlist)

for folder in folderlist:
    i += 1
    foldername = os.path.basename(folder)
    savedir = OUTPUT_PATH + foldername + ".npy"
    #print(filename)
    #print(savedir)
    
    folder = folder + "/*"
    filelist = glob.glob(folder)
    #print(len(filelist))
    
    flag = 0
    img_data = None
    for filedir in filelist:
        image = Image.open(filedir)
        image = transform(image)
        image = image.cuda()
        x = net3(image.unsqueeze(0))
        
        if flag == 0:
            img_data = x
            flag = 1
            
        else:
            img_data = torch.cat((img_data, x), dim=0)
            
    img_data = img_data.to('cpu').detach().numpy()
    #print(img_data)
    
    np.save(savedir, img_data)
        
    if i%10 == 0:
        print(i, "/", all)

10 / 600
20 / 600
30 / 600
40 / 600
50 / 600
60 / 600
70 / 600
80 / 600
90 / 600
100 / 600
110 / 600
120 / 600
130 / 600
140 / 600
150 / 600
160 / 600
170 / 600
180 / 600
190 / 600
200 / 600
210 / 600
220 / 600
230 / 600
240 / 600
250 / 600
260 / 600
270 / 600
280 / 600
290 / 600
300 / 600
310 / 600
320 / 600
330 / 600
340 / 600
350 / 600
360 / 600
370 / 600
380 / 600
390 / 600
400 / 600
410 / 600
420 / 600
430 / 600
440 / 600
450 / 600
460 / 600
470 / 600
480 / 600
490 / 600
500 / 600
510 / 600
520 / 600
530 / 600
540 / 600
550 / 600
560 / 600
570 / 600
580 / 600
590 / 600
600 / 600


# データセット作成

In [38]:
#IMG_DIR = "F:/ROHAN4600マルチモーダルデータベース/ROHAN4600_image/ROHAN4600_4001-4600/*"  # 可変
#OUTPUT_PATH = "F:/ROHAN4600マルチモーダルデータベース/autoencoder_data/"

folderlist = glob.glob(IMG_DIR)
print(len(folderlist))

600


In [37]:
from PIL import Image
import random

i = 0
all = len(folderlist)
number = 20000

for folder in folderlist:
    i += 1
    folder = folder + "/*"
    filelist = glob.glob(folder)
    #print(len(filelist))
    
    random_number = random.sample(range(len(filelist)), 5)
    
    for img_number in random_number:
        image = Image.open(filelist[img_number])
        #print(filelist[img_number])
        
        save_dir = OUTPUT_PATH + str(number) + ".png"
        number += 1
        #print(save_dir)
        image.save(save_dir)
        
    if i%10 == 0:
        print(i, "/", all)

10 / 600
20 / 600
30 / 600
40 / 600
50 / 600
60 / 600
70 / 600
80 / 600
90 / 600
100 / 600
110 / 600
120 / 600
130 / 600
140 / 600
150 / 600
160 / 600
170 / 600
180 / 600
190 / 600
200 / 600
210 / 600
220 / 600
230 / 600
240 / 600
250 / 600
260 / 600
270 / 600
280 / 600
290 / 600
300 / 600
310 / 600
320 / 600
330 / 600
340 / 600
350 / 600
360 / 600
370 / 600
380 / 600
390 / 600
400 / 600
410 / 600
420 / 600
430 / 600
440 / 600
450 / 600
460 / 600
470 / 600
480 / 600
490 / 600
500 / 600
510 / 600
520 / 600
530 / 600
540 / 600
550 / 600
560 / 600
570 / 600
580 / 600
590 / 600
600 / 600
