# データローダのテスト 
画像とスカラー値を同時にあつかえるローダを作る

In [8]:
import os 
import numpy as np
import pandas as pd
from PIL import Image
import random
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'Meiryo'

import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## サンプルの作成
ファイルパス、スカラ値を4つ以上もつcsvを作る

In [48]:
import glob
g = glob.glob( r'./o_dataset/sample/*.png')
df = pd.DataFrame( g ).reset_index()
df.columns = ['id','fn']

class_order = [-2,-1,0,1,2]
p = np.random.choice(class_order, len(df))
rng = np.random.default_rng()
df['class'] = p
p = rng.integers(-2,3,len(df))
df['point'] = p
p = rng.integers(0,2,size=len(df))
df['isFirst'] = p.astype(bool)
o = rng.uniform(0.0,120.0,size=len(df))
df['dsc'] = o**0.5
df.head(3)

Unnamed: 0,id,fn,class,point,isFirst,dsc
0,0,./o_dataset/sample\geme21end1.png,-2,-2,True,3.10443
1,1,./o_dataset/sample\geme21end2.png,-2,-2,False,3.256055
2,2,./o_dataset/sample\geme21end3.png,2,1,False,7.521918


In [49]:
df.to_csv('d_dataset.csv',index=None)

## DataLoaderに転送する仕組み

In [60]:
csv_path = 'd_dataset.csv'

csv_df = pd.read_csv(csv_path,index_col=0)
csv_df['class'] = csv_df['class'].astype(np.float16)
csv_df['point'] = csv_df['point'].astype(np.float16)
csv_df['isFirst'] = csv_df['isFirst'].astype(np.float16)
csv_df['dsc'] = csv_df['dsc'].astype(np.float16)
display(csv_df.dtypes)
display(csv_df.head(5))

fn          object
class      float16
point      float16
isFirst    float16
dsc        float16
dtype: object

Unnamed: 0_level_0,fn,class,point,isFirst,dsc
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,./o_dataset/sample\geme21end1.png,-2.0,-2.0,1.0,3.103516
1,./o_dataset/sample\geme21end2.png,-2.0,-2.0,0.0,3.255859
2,./o_dataset/sample\geme21end3.png,2.0,1.0,0.0,7.523438
3,./o_dataset/sample\geme21end4.png,-2.0,2.0,1.0,9.195312
4,./o_dataset/sample\geme21end5.png,0.0,-2.0,1.0,6.675781


## class が目的、fn,point,isFirst,dsc が説明変数
説明変数を標準化する

In [97]:
# 標準化
from sklearn.preprocessing import StandardScaler
stdsc = StandardScaler()
##学習時の標準化したパラメータは、評価、本番時におなじ重みで標準化する処理が必要
x_train_df = csv_df.copy().drop(['fn','class'],axis=1)
x_train_std = stdsc.fit_transform(x_train_df)
display( x_train_std[:3] )
## DataFrameの値を入れ替え
qcl = csv_df.columns.to_list()
qcl.remove('fn')
qcl.remove('class')
print(qcl)
csv_df[qcl] = x_train_std
csv_df.head(5)

array([[-1.172 ,  0.832 , -1.201 ],
       [-1.172 , -1.202 , -1.147 ],
       [ 0.5474, -1.202 ,  0.3516]], dtype=float16)

['point', 'isFirst', 'dsc']


Unnamed: 0_level_0,fn,class,point,isFirst,dsc
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,./o_dataset/sample\geme21end1.png,-2.0,-1.171875,0.832031,-1.201172
1,./o_dataset/sample\geme21end2.png,-2.0,-1.171875,-1.202148,-1.147461
2,./o_dataset/sample\geme21end3.png,2.0,0.547363,-1.202148,0.351562
3,./o_dataset/sample\geme21end4.png,-2.0,1.120117,0.832031,0.939453
4,./o_dataset/sample\geme21end5.png,0.0,-1.171875,0.832031,0.053741


In [114]:
## 標準化オブジェクトはあとで本番にも使うので保存しておく
import pickle
pickle.dump(stdsc, open("stdsc_test.pkl", "wb"))
stdsc.n_features_in_, stdsc.mean_ , stdsc.var_ 

(3,
 array([ 0.00013247, -0.00013317,  0.00018727]),
 array([0.99929126, 1.00027434, 1.0000121 ]))

In [116]:
with open('stdsc_test.pkl','rb') as f:
    loadsc = pickle.load(f)
loadsc.n_features_in_, loadsc.mean_ , loadsc.var_ 

(3,
 array([ 0.00013247, -0.00013317,  0.00018727]),
 array([0.99929126, 1.00027434, 1.0000121 ]))

In [186]:
## 訓練画像のRGBそれぞれの平均と分散　＝　強化、転移学習はベースと同じ重みにすること！
## Tensorで読み込んだあとにかけるので、255で正規化すること!
srcimages = []
for path in csv_df['fn'].values:
    image = Image.open(path)
    image_data = np.array(image)
    normalized_image = image_data.astype(np.float32) / 255.0
    srcimages.append(normalized_image)
mean = np.mean(srcimages, axis=(0, 1, 2))
std = np.std(srcimages, axis=(0, 1, 2))
display('mean',mean.astype(float) , 'std',std , "ほどんど白なのでＲＧＢ全部1に近い","ほんと白が多いので平均のあたりに画素が集中する")

'mean'

array([0.94238877, 0.93328482, 0.96552068])

'std'

array([0.15986384, 0.17564335, 0.16773778], dtype=float32)

'ほどんど白なのでＲＧＢ全部1に近い'

'ほんと白が多いので平均のあたりに画素が集中する'

In [140]:
image = Image.open(r'./o_dataset/sample_red/A.png')
a = np.array(image)
display(a.shape , a[0,0])
func = transforms.ToTensor()
b = func(a)
display(b.shape , b[:,0,0])
## HWC=>CWH と正規化やってる、RGBの位置は変わらない。

(441, 549, 3)

array([237,  28,  36], dtype=uint8)

torch.Size([3, 441, 549])

tensor([0.9294, 0.1098, 0.1412])

## データローダを組んでみる

In [174]:
class ImgValPare(Dataset):
    def __init__(self, df, classcol , fncol , transform):
        
        self.label_list  = df[classcol].to_list()
        self.img_pathlist  = df[fncol].to_list()
        cols = df.columns.to_list()
        cols.remove(fncol)
        cols.remove(classcol)
        self.x_values = df[cols].values
        self.transform = transform

    def __len__(self):  
        return len( self.img_pathlist )
    
    def __getitem__(self, index):
        # 画像をPILとして読み込む
        #print(index)
        image = Image.open(self.img_pathlist[index])
        #image = image.convert("L") 
        #numpy_img = np.asarray(image, np.float32) / 255.0
        #tensor_img = np.expand_dims(image, axis=0) # このままだとNWHCの形式
        #extend = self.x_values[index]    
        label = self.label_list[index]
                             
        if self.transform is not None:
            ##print('use transform')
            image = self.transform(image)
        return image, label 
    pass

In [188]:
std_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.9423888, 0.9332848, 0.9655207), (0.15986384, 0.17564335, 0.16773778))
])
dataset = ImgValPare( csv_df , classcol = 'class' , fncol = 'fn',transform = std_transform)

In [189]:
batch_size = 4
## DataLoader worker (pid(s) 14912) exited unexpectedly メモリ不足
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)


In [225]:
images, labels = next(iter(train_loader))
display(images.shape , len(labels) )
j = images[0]
j = transforms.functional.to_pil_image(j)
display(type(j),j.size)
##plt.imshow(j)

torch.Size([4, 3, 540, 300])

4

PIL.Image.Image

(300, 540)

In [233]:
display(images.shape)
test_c = nn.Conv2d(in_channels=3, out_channels=16*3 , kernel_size = 3, padding=1 , stride=1)
d = test_c.forward( images )
d.shape

torch.Size([4, 3, 540, 300])

torch.Size([4, 48, 540, 300])

In [239]:
from torchvision import models 
import pprint
pprint.pprint([s for s in dir(models) if s[0].isupper()], compact=True)

['AlexNet', 'ConvNeXt', 'DenseNet', 'EfficientNet', 'GoogLeNet',
 'GoogLeNetOutputs', 'Inception3', 'InceptionOutputs', 'MNASNet', 'MobileNetV2',
 'MobileNetV3', 'RegNet', 'ResNet', 'ShuffleNetV2', 'SqueezeNet', 'VGG',
 'VisionTransformer']


In [254]:
from torchvision.models import MobileNetV2
Model = models.mobilenet_v2(pretrained=True)
Model

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to C:\Users\MITC17834B/.cache\torch\hub\checkpoints\mobilenet_v2-b0353104.pth


URLError: <urlopen error [WinError 10054] 既存の接続はリモート ホストに強制的に切断されました。>

このローダでNetを組んでみる

In [389]:
class Net( nn.Module ):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1_1 = nn.Conv2d(in_channels=1, out_channels=32 , kernel_size = 3, padding=1 , stride=1)
        self.conv1_2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2_1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.conv2_2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        ## ここで [32, 256, 4, 4]
        self.fc1 = nn.Linear(in_features= (256*4*4) + 1, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=1)
    def forward(self, x_and_extend):
        x =x_and_extend[0]
        x2 = x_and_extend[1]
        #print('in',x.shape)
        x = F.relu(self.conv1_1(x))
        #print('conv1_1',x.shape)
        x = F.relu(self.conv1_2(x))
        #print('conv1_2',x.shape)
        x = self.pool1(x)
        #print('pool1',x.shape , len(x.flatten()) )
        
        x = F.relu(self.conv2_1(x))
        x = F.relu(self.conv2_2(x))
        x = self.pool2(x)
        #print('pool2',x.shape ,  len(x.flatten()))
        
        x = x.view(-1, self.num_flat_features(x))
        print('view',x.shape , len(x.flatten()))
        print(x)
        print('x2',x2.shape )
        x3 = x2.unsqueeze(dim=1)
        print('x3',x3.shape )
        x = torch.cat([x, x3], dim=1) 
        print(x.dtype , x3.dtype)
        print('cat',x.shape , len(x.flatten()))
        print(x)
        x = self.fc1(x)
        print('fc1',x.shape ,  len(x.flatten()))
        x = self.fc2(x)
        print('fc2',x.shape ,  len(x.flatten()))
        
        return x  
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        #print(num_features)
        return num_features
    pass

In [390]:
net = Net()
print(net)

Net(
  (conv1_1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2_1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_2): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=4097, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
)


In [391]:
## 損失関数
criterion = nn.MSELoss()

In [392]:
## 最適化
optimizer = optim.SGD(net.parameters(), lr=0.01)



In [397]:
##https://venoda.hatenablog.com/entry/2020/10/14/071440
##https://aidiary.hatenablog.com/entry/20180221/1519223357

num_epochs = 30
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net = net.to(device)
net.train()
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch+1, num_epochs))
    print('-------------')
    net.train()
    # 損失和
    epoch_loss = 0.0
    # epochの正解数
    epoch_corrects = 0
    for inputs, labels ,extend in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        extend = extend.to(device,dtype=torch.float32)
        # 勾配を初期化する
        optimizer.zero_grad()
        # 学習時のみ勾配を計算させる設定にする
        with torch.set_grad_enabled(True):
            outputs = net([inputs,extend])
             # ラベルを予測
            _, preds = torch.max(outputs, 1)
            # 損失関数を使って損失を計算する
            loss = criterion(outputs.to(torch.float64), labels)
             # 誤差を逆伝搬する
            loss.backward()
            # パラメータを更新する
            optimizer.step()
            epoch_loss += loss.item() * inputs.size(0)
            epoch_corrects += torch.sum(preds == labels.data)
    # 1エポックでの損失を計算
    epoch_loss = epoch_loss / len(train_loader.dataset)
    # 1エポックでの正解率を計算
    epoch_acc = epoch_corrects.double() / len(train_loader.dataset)
    print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
    break
        

cpu
Epoch 1/30
-------------
view torch.Size([32, 4096]) 131072
tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0285, 0.0287, 0.0195],
        [0.0035, 0.0000, 0.0000,  ..., 0.0611, 0.0578, 0.0603],
        [0.0000, 0.0000, 0.0000,  ..., 0.0332, 0.0324, 0.0252],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0319, 0.0315, 0.0234],
        [0.0000, 0.0000, 0.0000,  ..., 0.0345, 0.0339, 0.0277],
        [0.0003, 0.0000, 0.0000,  ..., 0.0552, 0.0520, 0.0531]],
       grad_fn=<ViewBackward0>)
x2 torch.Size([32])
x3 torch.Size([32, 1])
torch.float32 torch.float32
cat torch.Size([32, 4097]) 131104
tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 2.8702e-02, 1.9480e-02,
         6.4000e+01],
        [3.4673e-03, 0.0000e+00, 0.0000e+00,  ..., 5.7821e-02, 6.0253e-02,
         4.9000e+03],
        [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 3.2382e-02, 2.5158e-02,
         3.6100e+02],
        ...,
        [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 3.1469e-02, 2.3442e-02,
         2.2500e+02],


In [395]:
extend.dtype

torch.float64

In [301]:
labels.dtype

torch.float64

In [265]:
type(labels.shape[0])

int

In [396]:
extend

tensor([3.6100e+02, 6.8880e+03, 9.6000e+03, 4.3560e+03, 5.2900e+02, 6.2500e+02,
        6.0840e+03, 3.6000e+01, 7.2240e+03, 8.8320e+03, 3.8440e+03, 2.5600e+02,
        5.7760e+03, 1.9360e+03, 0.0000e+00, 8.1000e+03, 9.0240e+03, 5.7600e+02,
        1.0000e+00, 7.9200e+03, 1.3690e+03, 7.8400e+02, 2.5000e+01, 1.2100e+02,
        3.0240e+03, 2.7040e+03, 5.9280e+03, 1.2960e+03, 8.4100e+02, 6.5600e+03,
        1.7640e+03, 9.2160e+03], dtype=torch.float64)