In [1]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
import glob
import PIL.Image
import os
import numpy as np
from torchvision import models

**<font size = 5 color = black>Custom Dataset </font>**

In [None]:
def get_x(path, width):
    """Gets the x value from the image filename"""
    return (float(int(path.split("_")[1])) - width/2) / (width/2)

def get_y(path, height):
    """Gets the y value from the image filename"""
    return (float(int(path.split("_")[2])) - height/2) / (height/2)

class XYDataset(torch.utils.data.Dataset):
    
    def __init__(self, directory, random_hflips=False):
        self.directory = directory
        self.random_hflips = random_hflips
        
        self.image_paths = glob.glob(os.path.join(self.directory, 'block', '*.jpg')) + glob.glob(os.path.join(self.directory, 'free', '*.jpg'))

        
        self.color_jitter = transforms.ColorJitter(0.3, 0.3, 0.3, 0.3)
    
    def __len__(self):
        return len(self.image_paths)
    
   
    
    def __getitem__(self, idx):
        if "block" in self.image_paths[idx]:
            label = 0.0
        if "free" in self.image_paths[idx]:
            label = 1.0
        
        image = PIL.Image.open(self.image_paths[idx])
        width, height = image.size
        x = float(get_x(os.path.basename(self.image_paths[idx]), width))
        y = float(get_y(os.path.basename(self.image_paths[idx]), height))
      
        if float(np.random.rand(1)) > 0.5:
            image = transforms.functional.hflip(image)
            x = -x
        
        image = self.color_jitter(image)
        image = transforms.functional.resize(image, (224, 224))
        image = transforms.functional.to_tensor(image)
        image = image.numpy()[::-1].copy()
        image = torch.from_numpy(image)
        image = transforms.functional.normalize(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        
        # Regression, Classification
        return image, torch.tensor([x, y]).float(), label

    
dataset = XYDataset('dataset_merge_v3', random_hflips=False)
print(dataset)

**<font size = 5 color black>Set train set and test set</font>**

**<font size = 5 color = black>報錯修正</font>**

<font color = blue>如果無法執行模型，並發現是報錯在loss.backward()的話
<br>清除dataset(包含)資料夾內部的資料夾的ipynb_checkpoints
<br>清除方式:
<br><font color = red>du -chd 1 | sort -h 進行查詢</font>
<br><font color = red>rm -rf .ipynb_checkpoints 進行清除</font>
<br><font color = blue>驗證，觀察train_dataset, test_dataset的總和是否為正確數值</font>

In [None]:
test_percent = 0.4
num_test = int(test_percent * len(dataset))
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [len(dataset) - num_test, num_test])
print(len(train_dataset), len(test_dataset))

**<font color = blue size = 6> Test data label value</font>**

In [7]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=0
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=0
)

In [8]:
class MultiOutputModel(torch.nn.Module):
    def __init__(self):
        super( MultiOutputModel, self).__init__()
        self.out1 = torch.nn.Linear(512, 2)
        self.out2 = torch.nn.Linear(512, 2)
        
    def forward(self, x):
        x_out1 = self.out1(x)
        x_out2 = self.out2(x) 
        return x_out1, x_out2

In [9]:
model = models.resnet18(pretrained=True)

In [10]:
model.fc = MultiOutputModel()
device = torch.device('cuda')
model = model.to(device)

In [None]:
import time 
start = time.time()

NUM_EPOCHS = 60
# NUM_EPOCHS = 30
BEST_MODEL_PATH = 'Merge_Function_Robot_final_v13.pth'
best_loss = 1e9
best_accuracy = 0.0

#Regression & Classification
optimizer = optim.Adam(model.parameters())

for epoch in range(NUM_EPOCHS):
    model.train()
    train_loss = 0.0
    
    for images, label_regression, label_classification in iter(train_loader):
        images = images.to(device)
  
        label_regression = label_regression.to(device)  
        label_classification = label_classification.long().to(device)
        
        optimizer.zero_grad()
    
        
        output1, output2 = model(images)   
        #Regression
        loss1 = F.mse_loss(output1, label_regression)
        #Classification
        loss2 = F.cross_entropy(output2, label_classification)
        
        train_loss += float(loss1)     
        loss_total = loss1 + loss2
        loss_total.backward()
        
        optimizer.step()

        
    train_loss /= len(train_loader)
     
    model.eval()
    test_loss = 0.0
    test_error_count = 0.0
    
    for images, label_regression, label_classification in iter(test_loader):
        images = images.to(device)
        label_regression = label_regression.to(device) 
        label_classification = label_classification.long().to(device) 
        
        output1, output2 = model(images)   
        
        # Regression
        loss = F.mse_loss(output1, label_regression)
        test_loss += float(loss)  
        #Classification
        test_error_count += float(torch.sum(torch.abs(label_classification - output2.argmax(1))))
        
    #Regression
    test_loss /= len(test_loader)
    #Classification
    test_accuracy = 1.0 - float(test_error_count) / float(len(test_dataset))
    
    print('%d: %f' % (epoch, test_accuracy))
    print('%f, %f' % (train_loss, test_loss))
    
    if (test_loss < best_loss) or (test_accuracy > best_accuracy):
        torch.save(model.state_dict(), BEST_MODEL_PATH)
        if (test_loss < best_loss):
            best_loss = test_loss
        if (test_accuracy > best_accuracy):
            best_accuracy = test_accuracy
end = time.time()
print("finish")    
second = (end - start)

minute = second // 60
hour = second // 60 //60
second = second % 60 % 60
print('this model training spends %0dhr %0dmin %0ds' % (hour, minute, second))