In [2]:
#libraries
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
from pathlib import Path
import glob
import re
from pyper.chemometrics.dataset import HyperSpectralDataset
import random
import cv2

import torch
from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import Flatten
from torch.nn import functional as F
import torch.optim as optim
import torchvision.transforms as tfs
import torch.cuda

from torch.utils.data import Dataset, DataLoader

In [3]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(DEVICE))



Using cpu device


In [4]:
#dataloader

imagedir=Path('E:\dataset')
filepath=pd.Series(list(imagedir.glob(r'**/FX10/*.h5')), name='Filepath').astype(str)
#determine the amount of seeds and seedtype
seedsnbr= pd.Series(filepath.apply(lambda x: os.path.split(x)[1]), name='#seeds').astype(str)
seedstype= pd.Series(filepath.apply(lambda x: os.path.split(x)[1]), name='type seeds').astype(str)
seedsnbr=pd.Series(seedsnbr.apply(lambda x: x.replace(".png","") )).astype(str)
seedstype=pd.Series(seedstype.apply(lambda x: re.split('(\d+)',x)[0] )).astype(str)
seedsnbr=pd.Series(seedsnbr.apply(lambda x: re.split('(\d+)',x)[1] )).astype(np.int32)
# determine the batch number
batchnbr=pd.Series(filepath.apply(lambda x: os.path.split(x)[0]), name='batch').astype(str)
batchnbr=pd.Series(batchnbr.apply(lambda x: os.path.split(x)[0]), name='batch').astype(str)
batchnbr_0=pd.Series(batchnbr.apply(lambda x: os.path.split(x)[1]), name='batch').astype(str)
batchnbr_0=pd.Series(batchnbr_0.apply(lambda x: re.split('(\d+)',x)[1] )).astype(np.int32)
# determine if mixture or pure
mixorpur=pd.Series(batchnbr.apply(lambda x: os.path.split(x)[0]), name='type').astype(str)
mixorpur=pd.Series(mixorpur.apply(lambda x: os.path.split(x)[1]), name='type').astype(str)
#makes sure that the amount of seeds is zero if dealing with pure sample
seedsnbr[mixorpur=='pure']=0

#make a list with all the data
dataset_0=pd.concat([filepath,seedsnbr,batchnbr_0,mixorpur,seedstype],axis=1)
dataset_0



Unnamed: 0,Filepath,#seeds,batch,type,type seeds
0,E:\dataset\mixtures\batch3\FX10\rogge2.h5,2,3,mixtures,rogge
1,E:\dataset\mixtures\batch3\FX10\rogge5.h5,5,3,mixtures,rogge
2,E:\dataset\mixtures\batch3\FX10\rogge8.h5,8,3,mixtures,rogge
3,E:\dataset\mixtures\batch3\FX10\rogge10.h5,10,3,mixtures,rogge
4,E:\dataset\mixtures\batch3\FX10\rogge13.h5,13,3,mixtures,rogge
...,...,...,...,...,...
160,E:\dataset\pure\batch12\FX10\haver8.h5,0,12,pure,haver
161,E:\dataset\pure\batch12\FX10\haver9.h5,0,12,pure,haver
162,E:\dataset\pure\batch12\FX10\haver10.h5,0,12,pure,haver
163,E:\dataset\pure\batch12\FX10\haver11.h5,0,12,pure,haver


In [5]:
filepath2='E:/dataset_numpy/'+batchnbr_0.astype(str)+'_'+(filepath.apply(lambda x: os.path.split(x)[1])).astype(str)
filepath2=(filepath2.apply(lambda x: x.replace('h5','npy'))).astype(str)

In [40]:
#dataloader model (uses numpy array as input instead of .png)


class CustomImageDataset(Dataset):
    def __init__(self, target, img_dir, transform=None):
        self.img_labels = target
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = self.img_dir[idx]
        label=self.img_labels[idx]
        image = np.load(img_path,allow_pickle=True)
        if self.transform:
            image = self.transform(image)
        

        return image,label

class ToTensor(object):

    def __call__(self, image):
        image = image
        # swap color axis because
        # numpy image: Height x Width x Color
        # torch image: Color x Height x Width
        image = np.transpose(image,(2,0,1))
        image=torch.from_numpy(image)

        return image

class Rescale(object):

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, image):
        image=image

        h, w = np.shape(image)[:2]
        if isinstance(self.output_size, int):
            if h > w:
                new_h, new_w = self.output_size * h / w, self.output_size
            else:
                new_h, new_w = self.output_size, self.output_size * w / h
        else:
            new_h, new_w = self.output_size

        new_h, new_w = int(new_h), int(new_w)

        image=np.float32(image)
        image=cv2.resize(image,(new_h,new_h),interpolation=cv2.INTER_AREA)
        

        # h and w are swapped for landmarks because for images,
        # x and y axes are axis 1 and 0 respectively
     

        return image


dim=np.shape(dataset_0)[0]
train_size=np.floor(0.85*dim)
test_size=dim-train_size
dim_rand=np.random.permutation(np.arange(dim))
idx_train=np.random.permutation(dim_rand[:int(train_size)])
idx_test=np.random.permutation(dim_rand[int(train_size):])

dataset_train=CustomImageDataset(img_dir=filepath2[idx_train],target=seedsnbr[idx_train],transform=tfs.Compose([Rescale(500),ToTensor()]))
print(torch.Tensor.size(dataset_train[0][0]))
dataset_test=CustomImageDataset(img_dir=filepath2[idx_test],target=seedsnbr[idx_test],transform=tfs.Compose([Rescale(500),ToTensor()]))
dataloader_train=DataLoader(dataset_train,batch_size=16,shuffle=True)
dataloader_test=DataLoader(dataset_test,batch_size=16,shuffle=True)


torch.Size([3, 500, 500])


In [41]:
#model
class Net(Module):
    def __init__(self):
        super().__init__()
        self.conv1 = Conv2d(3, 6, 5)
        self.pool = MaxPool2d(2, 2)
        self.conv2 = Conv2d(6, 16, 5)
        self.fc1 = Linear(16 * 5 * 5, 120)
        self.fc2 = Linear(120, 84)
        self.fc3 = Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()



In [42]:
#train model
criterion = torch.nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i,sample in enumerate(dataloader_train):
        # get the inputs; data is a list of [inputs, labels]
        input,labels=sample

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(input)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {sample + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')


RuntimeError: mat1 and mat2 shapes cannot be multiplied (16x238144 and 400x120)