In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

import os
import pandas as pd
import numpy as np
import copy
import time

import cv2

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [3]:
data_dir = './../../data/식물잎/splitted/'
train_dir = os.path.join(data_dir,'train')
val_dir = os.path.join(data_dir,'val')
test_dir = os.path.join(data_dir,'test')

In [4]:
cfg = {
    'image_size' : 256,
    'epochs' : 100,
    'lr' : 1e-3,
    'batch_size' : 128,
    'seed' :2023
}

np.random.seed(cfg['seed'])
torch.manual_seed(cfg['seed'])
torch.cuda.manual_seed_all(cfg['seed'])

data_transforms = {
    'train' : transforms.Compose([
        transforms.Resize([cfg['image_size'],cfg['image_size']]),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomCrop(224),
        transforms.ToTensor()
    ]),
    'val' : transforms.Compose([
        transforms.Resize([cfg['image_size'],cfg['image_size']]),
        transforms.RandomCrop(224),
        transforms.ToTensor()
    ]),
    'test' : transforms.Compose([
        transforms.Resize([cfg['image_size'],cfg['image_size']]),
        transforms.ToTensor()
    ])
}


train_ds = ImageFolder(root=train_dir, transform=data_transforms['train'])
val_ds = ImageFolder(root=val_dir, transform=data_transforms['val'])
test_ds = ImageFolder(root=test_dir, transform=data_transforms['test'])

from torch.utils.data import DataLoader

train_loader = DataLoader(train_ds, batch_size=cfg['batch_size'], shuffle=True, num_workers=3)
val_loader = DataLoader(val_ds, batch_size=cfg['batch_size'], shuffle=True, num_workers=3)
test_loader = DataLoader(test_ds, batch_size=cfg['batch_size'], shuffle=True, num_workers=3)



In [35]:
i[0].shape

torch.Size([128, 3, 224, 224])

In [5]:
for i in train_loader:
    print(i)
    break

[tensor([[[[0.5451, 0.6588, 0.4627,  ..., 0.4784, 0.5333, 0.5843],
          [0.5804, 0.5451, 0.6118,  ..., 0.4980, 0.5686, 0.6667],
          [0.5961, 0.5490, 0.4863,  ..., 0.5137, 0.4863, 0.5216],
          ...,
          [0.8863, 0.9098, 0.8863,  ..., 0.6941, 0.6510, 0.6078],
          [0.8902, 0.8941, 0.8863,  ..., 0.6980, 0.5922, 0.6510],
          [0.8784, 0.8549, 0.8627,  ..., 0.6706, 0.7176, 0.5804]],

         [[0.4902, 0.6039, 0.4078,  ..., 0.4314, 0.4863, 0.5373],
          [0.5255, 0.4902, 0.5569,  ..., 0.4510, 0.5216, 0.6196],
          [0.5412, 0.4941, 0.4314,  ..., 0.4667, 0.4392, 0.4745],
          ...,
          [0.8706, 0.8941, 0.8824,  ..., 0.6784, 0.6314, 0.5882],
          [0.8667, 0.8784, 0.8706,  ..., 0.6824, 0.5725, 0.6314],
          [0.8549, 0.8314, 0.8392,  ..., 0.6549, 0.6980, 0.5608]],

         [[0.4863, 0.6000, 0.4039,  ..., 0.4392, 0.4941, 0.5451],
          [0.5216, 0.4863, 0.5529,  ..., 0.4588, 0.5294, 0.6275],
          [0.5373, 0.4902, 0.4275,  ..., 

In [72]:
class alexnet(nn.Module):
    def __init__(self):
        super(alexnet, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=0)
        self.bn1 = nn.BatchNorm2d(num_features=96)
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=2, padding=2)
        self.bn2 = nn.BatchNorm2d(num_features=256)
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=4, padding=1)
        self.bn3 = nn.BatchNorm2d(num_features=384)
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=4, padding=1)
        self.bn4 = nn.BatchNorm2d(num_features=384)
        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=4, padding=1)
        self.bn5 = nn.BatchNorm2d(num_features=256)
        
        self.fc1 = nn.Linear(in_features=256*6*6, out_features=500)
        self.fc2 = nn.Linear(in_features=500, out_features=200)
        self.fc3 = nn.Linear(in_features=200, out_features=33)
        
    def forward(self, x):
        # 1 layer
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        
        # 2 layer
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        
        # 3 layer
        x = self.conv3(x)
        x = self.bn3(x)
        x = F.relu(x)
        
        # 4 layer
        x = self.conv4(x)
        x = self.bn4(x)
        x = F.relu(x)
        
        # 5 layer
        x = self.conv5(x)
        x = self.bn5(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        
        x = x.view(x.size(0), -1)
        # 6 layer
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.5)

        # 7 layer
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=0.5)
        
        x = F.relu(self.fc3(x))
        x = F.log_softmax(x, dim=1)
        
        return x
    

In [73]:
alexnet_model = alexnet().to(device)
optimizer = torch.optim.Adam(alexnet_model.parameters(), lr = cfg['lr'])

lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=4)

In [74]:
from torchsummary import summary

In [77]:
summary(alexnet_model, (1, 227, 227))

RuntimeError: Given groups=1, weight of size [96, 3, 11, 11], expected input[2, 1, 227, 227] to have 3 channels, but got 1 channels instead

In [14]:
def train(model=alexnet_model, train_loader=train_loader,
          optimizer=optimizer, lr_scheduler=lr_scheduler):

    model.train()
    for data, label in train_loader:
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        output = model(data)
        _, pred = torch.max(output, 1)
        loss = F.cross_entropy(output, label)
        loss.backward()
        optimizer.step()
        

In [15]:
def evaluate(model=alexnet_model, val_loader=val_loader,
            optimizer=optimizer, lr_scheduler=lr_scheduler):

    model.eval()
    test_loss = 0
    test_acc = 0
    with torch.no_grad():
        for data, label in val_loader:
            data, label = data.to(device), label.to(device)
            output = model(data)
            
            test_loss += F.cross_entropy(output, label, reduction='sum').item()
            
            pred = output.max(1, keepdim=True)[1]
            test_acc += pred.eq(label.view_as(pred)).sum().item()
        
    test_loss /= len(val_loader.dataset)
    test_acc = 100*test_acc/len(val_loader.dataset)
    
    return test_loss, test_acc

In [16]:
def train_model(model = alexnet_model, train_loader=train_loader, val_loader =val_loader,
               optimizer =optimizer, lr_scheduler = lr_scheduler , epochs = cfg['epochs']):
    
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())
    stop_num = 0
    
    for i in range(epochs):
        since = time.time()
        train()
        train_loss, train_acc = evaluate(model, train_loader)
        val_loss, val_acc = evaluate(model, val_loader)
        lr_scheduler.step(val_loss)
        
        if val_acc > best_acc :
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            stop_num = 0
        else :
            stop_num +=1
        
        end_time = time.time() - since
        
        print(f'--------------{i+1}-----------')
        print(f'train loss : {round(train_loss,3)},  acc : {round(train_acc,3)}%')
        print(f'  val loss : {round(train_loss,3)},  acc : {round(train_acc,3)}%')
        print(f'걸린시간 : {end_time}초')
        
        if stop_num >=10:
            print('조기종료')
            break
        
    model.load_state_dict(best_model_wts)
    return model

        

In [17]:
trained_model = train_model()

RuntimeError: Given input size: (256x1x1). Calculated output size: (256x0x0). Output size is too small