In [32]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np 
import sys
from torchinfo import summary
from ptflops import get_model_complexity_info

## Hyperparameters

In [13]:
num_epochs = 15
lr = 0.01

# Transforms

In [14]:
batch_size = 128
# data augmentation and normalization
transform_train = transforms.Compose([
                    transforms.Resize((128, 128)),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

transform_test = transforms.Compose([
                    transforms.Resize((128, 128)),
                    transforms.ToTensor(),
                    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_train1 = transforms.Compose([
                    transforms.Resize((128, 128)),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
])

transform_test1 = transforms.Compose([
                    transforms.Resize((128, 128)),
                    transforms.ToTensor(),
])

# Data Loader

In [15]:
class CustomImageDataset(Dataset):
    def __init__(self, txt_file, img_dir, transform=None, convert="RGB"):
        self.img_labels = []
        self.img_paths = []
        self.img_dir = img_dir
        self.transform = transform
        self.convert = convert

        with open(txt_file, 'r') as f:
            for line in f:
                path, label = line.strip().split(" ")
                self.img_paths.append(path)
                self.img_labels.append(int(label))
                #print(f"{path} , {label}")

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_paths[idx])
        image = Image.open(img_path).convert(self.convert)
        label = self.img_labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

img_dir = os.getcwd() + "/"
# 3 channels
train_data_3channels = CustomImageDataset(txt_file="train.txt", img_dir=img_dir, transform=transform_train, convert="RGB")
val_data_3channels = CustomImageDataset(txt_file="val.txt", img_dir=img_dir, transform=transform_train, convert="RGB")
test_data_3channels = CustomImageDataset(txt_file="test.txt", img_dir=img_dir, transform=transform_test, convert="RGB")
# 1 channels
# train_data_1channels = CustomImageDataset(txt_file="train.txt", img_dir=img_dir, transform=transform_train1, convert="L")
# val_data_1channels = CustomImageDataset(txt_file="val.txt", img_dir=img_dir, transform=transform_train1, convert="L")
# test_data_1channels = CustomImageDataset(txt_file="test.txt", img_dir=img_dir, transform=transform_test1, convert="L")
# DataLoader 3 channels
train_loader_3channels = DataLoader(dataset=train_data_3channels, batch_size=batch_size, shuffle=True)
val_loader_3channels = DataLoader(dataset=val_data_3channels, batch_size=batch_size, shuffle=False)
test_loader_3channels = DataLoader(dataset=test_data_3channels, batch_size=batch_size, shuffle=False)
# DataLoader 1 channels
# train_loader_1channels = DataLoader(dataset=train_data_1channels, batch_size=batch_size, shuffle=True)
# val_loader_1channels = DataLoader(dataset=val_data_1channels, batch_size=batch_size, shuffle=False)
# test_loader_1channels = DataLoader(dataset=test_data_1channels, batch_size=batch_size, shuffle=False)

print(f"Training Set length:{len(train_data_3channels)}, Validating Set length:{len(val_data_3channels)}")

test_num = len(test_data_3channels)
test_steps = len(test_loader_3channels)

Training Set length:63325, Validating Set length:450


# Model Define

In [25]:
class FourLayerModel(nn.Module):
    def __init__(self, in_channels=3):
        super(FourLayerModel, self,).__init__()
        self.in_channels = in_channels
        self.conv = nn.Sequential(
            # layer 1
            nn.Conv2d(self.in_channels, 64, (8, 8), (3, 3), (2, 2)),
            nn.ReLU(True),
            nn.MaxPool2d((3, 3), (1, 1)),

            # layer 2
            nn.Conv2d(64, 192, (5, 5), (1, 1), (2, 2)),
            nn.ReLU(True),
            nn.MaxPool2d((3, 3), (1, 1)),

            # layer 3
            nn.Conv2d(192, 384, (3, 3), (1, 1), (1, 1)),
            nn.ReLU(True),
            nn.Dropout(0.5),

        )

        self._initialize_weights()

    def _initialize_weights(self):
        # 创建一个临时的输入张量
        with torch.no_grad():
            temp_input = torch.zeros(1, self.in_channels, 128, 128)
            temp_output = self.conv(temp_input)
            self.conv_output_size = temp_output.numel()

        self.linear = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.conv_output_size, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 50),
            nn.Softmax(dim=1)
        )

    def forward(self, x):

        x = self.conv(x)
        x = self.linear(x)

        return x

# Train & Test Function

In [17]:
def train(model, train_loader, val_loader, epochs, learning_rate, device, model_name):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-05)

    loss = []
    train_error=[]
    val_error = []
    valdation_error = []
    train_loss = []
    valdation_loss = []
    train_accuraacy = []
    valdation_accuracy= []

    for epoch in range(epochs):
        train_loss = 0.0
        valid_loss = 0.0
        train_acc = 0.0
        valid_acc = 0.0
        correct = 0.
        total = 0.
        V_correct = 0.
        V_total = 0.
        max_val_acc = 0.0

        model.train()
        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, data in enumerate(train_bar):
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            logits= model(images)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * images.size(0)
            pred = logits.data.max(1, keepdim=True)[1]
            #print(pred)
            correct += np.sum(np.squeeze(pred.eq(labels.data.view_as(pred))).cpu().numpy())
            total += images.size(0)
            train_acc =  correct/total
            train_bar.desc = "train epoch[{}/{}]".format(epoch + 1, epochs)

        model.eval()
        with torch.no_grad():
            val_bar = tqdm(val_loader, file=sys.stdout)
            for val_data in val_bar:
                val_images, val_labels = val_data
                val_images, val_labels = val_images.to(device), val_labels.to(device)
                outputs= model(val_images)
                loss = criterion(outputs, val_labels)
                valid_loss += loss.item() * val_images.size(0)
                pred = outputs.data.max(1, keepdim=True)[1]
                V_correct += np.sum(np.squeeze(pred.eq(val_labels.data.view_as(pred))).cpu().numpy())
                V_total += val_images.size(0)
                val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1, epochs)

        train_loss = train_loss / len(train_loader.dataset)
        train_error.append(train_loss)
        valid_loss = valid_loss / len(val_loader.dataset)
        val_error.append(valid_loss)
        train_accuraacy.append( correct / total)
        valdation_accuracy.append(V_correct / V_total)
        if (V_correct / V_total) > max_val_acc:
            max_val_acc = V_correct / V_total
            torch.save(model.state_dict(), "./models/" + model_name + ".pth")

        print('\tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(train_loss, valid_loss))
        print('\tTrain Accuracy: %.3fd%% (%2d/%2d)\tValdation Accuracy: %.3fd%% (%2d/%2d) '% (100. * correct / total, correct, total, 100. * V_correct / V_total, V_correct, V_total))

    print('Finished Training') 


def test(model, test_loader ,device, type=None):
    criterion = nn.CrossEntropyLoss()
    acc = 0.0
    test_loss = 0.0

    model.eval()
    with torch.no_grad():
        test_bar = tqdm(test_loader, file=sys.stdout)
        for test_data in test_bar:
            test_images, test_labels = test_data
            test_images, test_labels = test_images.to(device), test_labels.to(device)
            
            outputs= model(test_images)
            loss = criterion(outputs, test_labels)

            predict_y = torch.max(outputs, dim=1)[1]
            acc += torch.eq(predict_y, test_labels.to(device)).sum().item()
            test_loss += loss.item()
            test_bar.desc = "test"

    test_accurate = acc / test_num
    print('test_loss: %.3f  test_accuracy: %.3f' %(test_loss / test_steps, test_accurate * 100))
    return test_loss / test_steps, test_accurate * 100.    

# 3 Channels 

## Train

In [30]:
model = FourLayerModel(3)
num_epochs = 10
lr = 0.001
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
train(model, train_loader_3channels, val_loader_3channels, epochs=num_epochs, learning_rate=lr, device=device, model_name="3_channels_four_layer_model")

train epoch[1/10]: 100%|██████████| 495/495 [02:07<00:00,  3.90it/s]
valid epoch[1/10]: 100%|██████████| 4/4 [00:00<00:00,  5.86it/s]
	Training Loss: 3.911632 	Validation Loss: 3.911017
	Train Accuracy: 2.493d% (1579/63325)	Valdation Accuracy: 2.222d% (10/450) 
train epoch[2/10]: 100%|██████████| 495/495 [02:05<00:00,  3.95it/s]
valid epoch[2/10]: 100%|██████████| 4/4 [00:00<00:00,  5.83it/s]
	Training Loss: 3.906503 	Validation Loss: 3.901314
	Train Accuracy: 2.814d% (1782/63325)	Valdation Accuracy: 2.889d% (13/450) 
train epoch[3/10]: 100%|██████████| 495/495 [02:02<00:00,  4.03it/s]
valid epoch[3/10]: 100%|██████████| 4/4 [00:00<00:00,  5.94it/s]
	Training Loss: 3.892344 	Validation Loss: 3.879102
	Train Accuracy: 4.745d% (3005/63325)	Valdation Accuracy: 6.667d% (30/450) 
train epoch[4/10]: 100%|██████████| 495/495 [02:04<00:00,  3.97it/s]
valid epoch[4/10]: 100%|██████████| 4/4 [00:00<00:00,  5.79it/s]
	Training Loss: 3.874203 	Validation Loss: 3.864804
	Train Accuracy: 6.885d% (43

## Test

In [31]:
test_model = FourLayerModel(3)
test_model.load_state_dict(torch.load(os.getcwd() + '/models/3_channels_four_layer_model.pth'))
test_model.to(device)
test_model.eval()
test(test_model, test_loader_3channels, device=device)

test: 100%|██████████| 4/4 [00:00<00:00,  5.88it/s]
test_loss: 3.798  test_accuracy: 12.667


(3.7982678413391113, 12.666666666666668)

In [34]:
model1 = FourLayerModel(3)
num_epochs = 15
lr = 0.001
device = "cuda" if torch.cuda.is_available() else "cpu"
model1.to(device)
train(model1, train_loader_3channels, val_loader_3channels, epochs=num_epochs, learning_rate=lr, device=device, model_name="3_channels_four_layer_model1")

train epoch[1/15]: 100%|██████████| 495/495 [02:03<00:00,  4.01it/s]
valid epoch[1/15]: 100%|██████████| 4/4 [00:00<00:00,  5.75it/s]
	Training Loss: 3.911531 	Validation Loss: 3.910545
	Train Accuracy: 2.626d% (1663/63325)	Valdation Accuracy: 2.000d% ( 9/450) 
train epoch[2/15]: 100%|██████████| 495/495 [02:05<00:00,  3.96it/s]
valid epoch[2/15]: 100%|██████████| 4/4 [00:00<00:00,  5.82it/s]
	Training Loss: 3.903065 	Validation Loss: 3.888396
	Train Accuracy: 3.463d% (2193/63325)	Valdation Accuracy: 5.333d% (24/450) 
train epoch[3/15]: 100%|██████████| 495/495 [02:03<00:00,  4.00it/s]
valid epoch[3/15]: 100%|██████████| 4/4 [00:00<00:00,  5.99it/s]
	Training Loss: 3.884199 	Validation Loss: 3.880554
	Train Accuracy: 5.677d% (3595/63325)	Valdation Accuracy: 6.222d% (28/450) 
train epoch[4/15]: 100%|██████████| 495/495 [02:02<00:00,  4.03it/s]
valid epoch[4/15]: 100%|██████████| 4/4 [00:00<00:00,  5.96it/s]
	Training Loss: 3.876852 	Validation Loss: 3.875721
	Train Accuracy: 6.452d% (40

In [35]:
test_model = FourLayerModel(3)
test_model.load_state_dict(torch.load(os.getcwd() + '/models/3_channels_four_layer_model1.pth'))
test_model.to(device)
test_model.eval()
test(test_model, test_loader_3channels, device=device)

test: 100%|██████████| 4/4 [00:00<00:00,  5.97it/s]
test_loss: 3.796  test_accuracy: 12.889


(3.796234905719757, 12.88888888888889)

In [36]:
model2 = FourLayerModel(3)
num_epochs = 20
lr = 0.001
device = "cuda" if torch.cuda.is_available() else "cpu"
model2.to(device)
train(model2, train_loader_3channels, val_loader_3channels, epochs=num_epochs, learning_rate=lr, device=device, model_name="3_channels_four_layer_model2")

train epoch[1/20]: 100%|██████████| 495/495 [02:04<00:00,  3.97it/s]
valid epoch[1/20]: 100%|██████████| 4/4 [00:00<00:00,  5.91it/s]
	Training Loss: 3.911024 	Validation Loss: 3.909868
	Train Accuracy: 2.099d% (1329/63325)	Valdation Accuracy: 2.000d% ( 9/450) 
train epoch[2/20]: 100%|██████████| 495/495 [02:02<00:00,  4.04it/s]
valid epoch[2/20]: 100%|██████████| 4/4 [00:00<00:00,  5.81it/s]
	Training Loss: 3.901221 	Validation Loss: 3.888909
	Train Accuracy: 3.714d% (2352/63325)	Valdation Accuracy: 5.333d% (24/450) 
train epoch[3/20]: 100%|██████████| 495/495 [02:02<00:00,  4.04it/s]
valid epoch[3/20]: 100%|██████████| 4/4 [00:00<00:00,  5.91it/s]
	Training Loss: 3.882996 	Validation Loss: 3.873598
	Train Accuracy: 5.878d% (3722/63325)	Valdation Accuracy: 7.111d% (32/450) 
train epoch[4/20]: 100%|██████████| 495/495 [02:02<00:00,  4.03it/s]
valid epoch[4/20]: 100%|██████████| 4/4 [00:00<00:00,  5.86it/s]
	Training Loss: 3.869102 	Validation Loss: 3.859172
	Train Accuracy: 7.447d% (47

In [37]:
test_model = FourLayerModel(3)
test_model.load_state_dict(torch.load(os.getcwd() + '/models/3_channels_four_layer_model2.pth'))
test_model.to(device)
test_model.eval()
test(test_model, test_loader_3channels, device=device)

test: 100%|██████████| 4/4 [00:00<00:00,  6.08it/s]
test_loss: 3.745  test_accuracy: 17.556


(3.7451440691947937, 17.555555555555554)

In [39]:
summary(model2, input_size=(128, 3, 128, 128))

Layer (type:depth-idx)                   Output Shape              Param #
FourLayerModel                           [128, 50]                 --
├─Sequential: 1-1                        [128, 384, 38, 38]        --
│    └─Conv2d: 2-1                       [128, 64, 42, 42]         12,352
│    └─ReLU: 2-2                         [128, 64, 42, 42]         --
│    └─MaxPool2d: 2-3                    [128, 64, 40, 40]         --
│    └─Conv2d: 2-4                       [128, 192, 40, 40]        307,392
│    └─ReLU: 2-5                         [128, 192, 40, 40]        --
│    └─MaxPool2d: 2-6                    [128, 192, 38, 38]        --
│    └─Conv2d: 2-7                       [128, 384, 38, 38]        663,936
│    └─ReLU: 2-8                         [128, 384, 38, 38]        --
│    └─Dropout: 2-9                      [128, 384, 38, 38]        --
├─Sequential: 1-2                        [128, 50]                 --
│    └─Flatten: 2-10                     [128, 554496]             --
│