In [21]:
import torch.nn
from datetime import datetime
import torchvision.models as models
import torch.nn.functional as F
import scipy.io
import torch
import numpy as np
from PIL import Image 
import os
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
from torch.utils.data import TensorDataset


In [22]:

def load_data(path_X, path_Y, num_data, data_mode="train"):
    data = scipy.io.loadmat(path_X) 
    print(data.keys())
    if data_mode == "train" or data_mode == "test":
        origin_X = np.array(data['x'].flat) # train
    elif data_mode == "dn":
        origin_X = np.array(data['denoise_x']) # denoise train
    elif data_mode == "dn2":
        origin_X = np.array(data['denoise2_x']) # denoise train

    data = scipy.io.loadmat(path_Y) 
    origin_Y = data['y'][0].reshape(num_data,-1)
    origin_Y_onehot= data['y_onehot'].reshape(num_data,4,19)
    
    print ("origin_X shape: "+str(origin_X.shape))
    print ("origin_Y shape: "+str(origin_Y.shape))
    print ("origin_Y_onehot shape: "+str(origin_Y_onehot.shape))
 
    return origin_X,origin_Y,origin_Y_onehot

In [23]:
def resize_img (o_data,write,save):  
    index=0
    p_data=[]
    for i in o_data:
        name='resize_data_image/resize_x_'+str(index)+'.jpg'
        img = Image.fromarray(i, 'RGB')
        img=img.resize((130,50))
        if os.path.isfile(name) and save:      
            print (name+" is existed")    
        elif save:
            img.save(name)
        if write:
            p_data.append(np.array(img))       
        index+=1
        
    p_data=np.array(p_data)   
    print (p_data.shape)
    return p_data

# Load Train Data

In [24]:


def get_data(mode="train"):
    num_data = 5000
    data_mode = mode
    path = "D:\\Casper\\OTHER\\Data\\identification code_database\\train.mat"
    if data_mode == "train":
        path2 = "D:\\Casper\\OTHER\\Data\\identification code_database\\train.mat"
    elif data_mode == "dn":
        path2 = "D:\\Casper\\OTHER\\Data\\identification code_database\\denoise_train.mat"
    elif data_mode == "dn2":
        path2 = "D:\\Casper\\OTHER\\Data\\identification code_database\\denoise_train2.mat"
    elif data_mode == "test":
        num_data = 3000
        path = "D:\\Casper\\OTHER\\Data\\identification code_database\\test.mat"
        path2 = "D:\\Casper\\OTHER\\Data\\identification code_database\\test.mat"

    train_rate=1 #change to 0.9
    origin_X,origin_Y,origin_Y_onehot=load_data(path2, path, num_data, data_mode)
    num_train_data=int(num_data*train_rate)
    print(origin_X.shape)

    if data_mode == "train" or data_mode == "test":
        resize_x = resize_img(origin_X,True,False) # train
    elif data_mode == "dn":
        resize_x = origin_X # denoise train
    elif data_mode == "dn2":
        resize_x = origin_X # denoise train
    print(num_data)
    train_x_orig=resize_x.reshape(num_data,50,130,-1)[0:num_train_data]
    # test_x_orig=resize_x.reshape(num_data,50,130,-1)[num_train_data:]

    x_train=train_x_orig.astype('float32')/255
    # x_test=test_x_orig.astype('float32')/255

    y_train_onehot=origin_Y_onehot[0:num_train_data]
    # y_test_onehot=origin_Y_onehot[num_train_data:]
    origin_X_tensor = torch.tensor(x_train, dtype=torch.float32)
    origin_Y_tensor = torch.tensor(y_train_onehot, dtype=torch.float32)

    origin_X_tensor_permuted = origin_X_tensor.permute(0, 3, 1, 2)
    train_X = origin_X_tensor_permuted
    train_Y = torch.argmax(origin_Y_tensor, dim=-1)
    dataset = TensorDataset(train_X, train_Y)

    return dataset

In [25]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class SimpleCharCNN(nn.Module):
    def __init__(self):
        super(SimpleCharCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)  # Assuming grayscale images
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, padding=2)
        self.fc1 = nn.Linear(24576, 512)  # Adjust the size according to your image size
        self.fc2 = nn.Linear(512, 76)  # 4 characters, each 26 possible letters

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [26]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleCharCNN(nn.Module):
    def __init__(self):
        super(SimpleCharCNN, self).__init__()
        
        # Convolutional layers
        self.conv11_W1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=5, stride=1, padding='same')
        self.conv12_W1 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1)
        self.max_pool1_W1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv23_W1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding='same')
        self.conv24_W1 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1)
        self.conv25_W1 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1)
        self.bn1_W1 = nn.BatchNorm2d(128)
        self.max_pool2_W1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv36_W1 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1, stride=1, padding='same')
        self.conv37_W1 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1)
        self.conv38_W1 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1)
        self.max_pool3_W1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv49_W1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding='same')
        self.conv410_W1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1)
        self.bn2_W1 = nn.BatchNorm2d(512)
        self.max_pool4_W1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(0.5)
        
        # Dense (Fully Connected) Layers for each output branch
        self.fc_branches = nn.ModuleList([
            nn.Sequential(
                nn.Linear(in_features=3072, out_features=128),
                nn.ReLU(),
                nn.Dropout(0.25),
                nn.Linear(in_features=128, out_features=128),
                nn.ReLU(),
                nn.Linear(in_features=128, out_features=19),
                nn.Softmax(dim=1)
            ) for _ in range(4)
        ])

    def forward(self, x):
        x = F.relu(self.conv11_W1(x))
        x = F.relu(self.conv12_W1(x))
        x = self.max_pool1_W1(x)
        
        x = F.relu(self.conv23_W1(x))
        x = F.relu(self.conv24_W1(x))
        x = F.relu(self.conv25_W1(x))
        x = self.bn1_W1(x)
        x = self.max_pool2_W1(x)
        
        x = F.relu(self.conv36_W1(x))
        x = F.relu(self.conv37_W1(x))
        x = F.relu(self.conv38_W1(x))
        x = self.max_pool3_W1(x)
        
        x = F.relu(self.conv49_W1(x))
        x = F.relu(self.conv410_W1(x))
        x = self.bn2_W1(x)
        x = self.max_pool4_W1(x)
        
        x = self.flatten(x)
        x = self.dropout(x)
        
        # Branch out to the four different dense layers
        outputs = torch.stack([branch(x) for branch in self.fc_branches], dim = 1)
        
        return outputs

In [27]:
import random
from torch.utils.data import DataLoader, Subset
from datetime import datetime
from tqdm import tqdm
import numpy as np
import torch
from torch import optim, nn
import time
def get_dataloaders(dataset, train_ratio, val_ratio, batch_size):
    train_dataset = dataset
    val_dataset = dataset
    test_dataset = dataset
    # obtain training indices that will be used for validation
    num_train = len(test_dataset)
    indices = list(range(num_train))
    print("--------- INDEX checking ---------")
    print(f"Original: {indices[:5]}")
    random.shuffle(indices)
    print(f"Shuffled: {indices[:5]}")
    print("--------- INDEX shuffled ---------\n")

    split_train = int(np.floor(train_ratio * num_train))
    split_val = split_train + int(np.floor(val_ratio * (num_train-split_train)))
    train_idx, val_idx, test_idx = indices[0:split_train], indices[split_train:split_val], indices[split_val:]
    merge_dataset = Subset(train_dataset, train_idx)

    train_loader = DataLoader(merge_dataset, batch_size=batch_size)
    val_loader = DataLoader(Subset(val_dataset, val_idx), batch_size=batch_size)
    test_loader = DataLoader(Subset(test_dataset, test_idx), batch_size=batch_size)
    
    # check dataset
    print(f"Total number of samples: {num_train} datapoints")
    print(f"Number of train samples: {len(train_loader)} batches/ {len(train_loader.dataset)} datapoints")
    print(f"Number of val samples: {len(val_loader)} batches/ {len(val_loader.dataset)} datapoints")
    print(f"Number of test samples: {len(test_loader)} batches/ {len(test_loader.dataset)} datapoints")
    print(f"")
    
    dataloaders = {
        "train": train_loader,
        "val": val_loader,
        "test": test_loader,
    }
    return dataloaders

In [35]:
def pprint(output = '\n', show_time = False): # print and fprint at the same time
    filename = "hw2-2-MAR27.txt"
    print(output)
    with open(filename, 'a') as f:
        if show_time:
            f.write(datetime.now().strftime("[%Y-%m-%d %H:%M:%S] "))

        f.write(str(output))
        f.write('\n')
pprint("build function", True)

build function


In [28]:
def count_parameters(model):
    total_num = 0
    for parameter in model.parameters():
        if parameter.requires_grad:
            total_num += parameter.numel() 
    return total_num

In [36]:
def train(model_lists, model_name, loaders, phases=['train'], reshape=False, save_weight=False):
    model = model_lists[model_name]()
    if "res" in model_name:
        # model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) # denoise train
        num_features = model.fc.in_features
        model.fc = torch.nn.Linear(num_features, 76)

    pprint(f"Training model: {model_name}")
    model_parameters_amount = count_parameters(model)  # Assume this function is defined elsewhere
    pprint(f"Total parameters: {model_parameters_amount:,}")

    model = model.cuda()
    criterion = nn.CrossEntropyLoss()
    lr = 0.001
    optimizer = optim.Adam(model.parameters(), lr=lr)
    pprint(f"Learning rate={lr}")
    epochs = 25

    start = time.time()
    for epoch in range(epochs):
        for phase in phases:
            running_loss = 0.0
            correct_predictions = [0, 0, 0, 0]  # Track correct predictions for each of the 4 targets
            total_samples = 0
            model.train() if phase == 'train' else model.eval()  # Simplified model mode setting

            for inputs, labels in tqdm(loaders[phase]):  # Iterate over data.
                inputs, labels = inputs.cuda(), labels.cuda()
                
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)  # [batch_size, 4, 19]
                    if reshape:
                        outputs = outputs.reshape(labels.shape[0], 4, -1)
                    loss = sum([criterion(outputs[:, i, :], labels[:, i]) for i in range(4)])  # Sum loss across all targets

                    if phase == 'train':  # backward + optimize only if in training phase
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item()
                for i in range(4):
                    _, predicted = torch.max(outputs[:, i, :], 1)
                    correct_predictions[i] += (predicted == labels[:, i]).sum().item()

                total_samples += labels.size(0)

            avg_loss = running_loss / total_samples
            top1_accuracy = [cp / total_samples * 100 for cp in correct_predictions]  # Accuracy per target
            pprint(f"Epoch [{epoch+1}/{epochs}], phase: {phase}, samples: {total_samples}, Loss: {avg_loss:.4f}, "
                  f"Top-1 Accuracies: {[f'{acc:.2f}%' for acc in top1_accuracy]}")

    end = time.time()
    pprint(f"Elapsed time: {end - start} seconds")

    if save_weight:
        torch.save(model.state_dict(), f'{model_name}.pt')  # It's often better to save state_dict
        pprint(f"Weight saved as: {model_name}.pt")

    return model

In [39]:
train_dataset = get_data('dn2')
model_list ={
    "SimpleCharCNN": lambda: SimpleCharCNN(),
}
model_name = "SimpleCharCNN"
phases = ['train', 'val']
loaders = get_dataloaders(train_dataset, 0.8, 0.5, 32)
test_dataset = get_data("test")

train(model_list, model_name, loaders, phases)

dict_keys(['__header__', '__version__', '__globals__', 'denoise2_x'])
origin_X shape: (5000, 50, 130)
origin_Y shape: (5000, 4)
origin_Y_onehot shape: (5000, 4, 19)
(5000, 50, 130)
5000
--------- INDEX checking ---------
Original: [0, 1, 2, 3, 4]
Shuffled: [1784, 2015, 3988, 139, 639]
--------- INDEX shuffled ---------

Total number of samples: 5000 datapoints
Number of train samples: 125 batches/ 4000 datapoints
Number of val samples: 16 batches/ 500 datapoints
Number of test samples: 16 batches/ 500 datapoints



dict_keys(['__header__', '__version__', '__globals__', 'y_onehot', 'x', 'y'])
origin_X shape: (3000,)
origin_Y shape: (3000, 4)
origin_Y_onehot shape: (3000, 4, 19)
(3000,)
(3000, 50, 130, 3)
3000
Training model: SimpleCharCNN
Total parameters: 4,845,196
Learning rate=0.001


100%|██████████| 125/125 [00:02<00:00, 52.10it/s]


Epoch [1/25], phase: train, samples: 4000, Loss: 0.3619, Top-1 Accuracies: ['11.15%', '12.85%', '12.45%', '10.78%']


100%|██████████| 16/16 [00:00<00:00, 123.11it/s]


Epoch [1/25], phase: val, samples: 500, Loss: 0.3780, Top-1 Accuracies: ['9.40%', '5.80%', '7.80%', '4.80%']


100%|██████████| 125/125 [00:02<00:00, 57.10it/s]


Epoch [2/25], phase: train, samples: 4000, Loss: 0.3489, Top-1 Accuracies: ['22.53%', '23.30%', '24.70%', '22.55%']


100%|██████████| 16/16 [00:00<00:00, 145.51it/s]


Epoch [2/25], phase: val, samples: 500, Loss: 0.3599, Top-1 Accuracies: ['25.00%', '15.60%', '25.40%', '19.20%']


100%|██████████| 125/125 [00:02<00:00, 56.80it/s]


Epoch [3/25], phase: train, samples: 4000, Loss: 0.3336, Top-1 Accuracies: ['37.03%', '36.55%', '36.38%', '34.08%']


100%|██████████| 16/16 [00:00<00:00, 160.05it/s]


Epoch [3/25], phase: val, samples: 500, Loss: 0.3473, Top-1 Accuracies: ['32.00%', '30.40%', '38.80%', '25.00%']


100%|██████████| 125/125 [00:02<00:00, 57.36it/s]


Epoch [4/25], phase: train, samples: 4000, Loss: 0.3283, Top-1 Accuracies: ['40.85%', '40.88%', '40.77%', '38.77%']


100%|██████████| 16/16 [00:00<00:00, 160.07it/s]


Epoch [4/25], phase: val, samples: 500, Loss: 0.3318, Top-1 Accuracies: ['44.60%', '47.80%', '41.00%', '43.60%']


100%|██████████| 125/125 [00:02<00:00, 56.88it/s]


Epoch [5/25], phase: train, samples: 4000, Loss: 0.3162, Top-1 Accuracies: ['52.20%', '51.08%', '49.28%', '48.05%']


100%|██████████| 16/16 [00:00<00:00, 160.07it/s]


Epoch [5/25], phase: val, samples: 500, Loss: 0.3212, Top-1 Accuracies: ['56.00%', '54.00%', '48.40%', '48.20%']


100%|██████████| 125/125 [00:02<00:00, 57.36it/s]


Epoch [6/25], phase: train, samples: 4000, Loss: 0.3064, Top-1 Accuracies: ['58.85%', '58.03%', '58.70%', '56.62%']


100%|██████████| 16/16 [00:00<00:00, 160.07it/s]


Epoch [6/25], phase: val, samples: 500, Loss: 0.3117, Top-1 Accuracies: ['60.40%', '57.80%', '62.80%', '58.00%']


100%|██████████| 125/125 [00:02<00:00, 56.82it/s]


Epoch [7/25], phase: train, samples: 4000, Loss: 0.3003, Top-1 Accuracies: ['61.68%', '62.70%', '65.18%', '62.50%']


100%|██████████| 16/16 [00:00<00:00, 159.96it/s]


Epoch [7/25], phase: val, samples: 500, Loss: 0.3058, Top-1 Accuracies: ['65.00%', '67.60%', '66.00%', '58.40%']


100%|██████████| 125/125 [00:02<00:00, 57.10it/s]


Epoch [8/25], phase: train, samples: 4000, Loss: 0.2963, Top-1 Accuracies: ['64.05%', '68.90%', '67.35%', '64.55%']


100%|██████████| 16/16 [00:00<00:00, 160.15it/s]


Epoch [8/25], phase: val, samples: 500, Loss: 0.3011, Top-1 Accuracies: ['67.60%', '70.60%', '69.80%', '64.20%']


100%|██████████| 125/125 [00:02<00:00, 57.29it/s]


Epoch [9/25], phase: train, samples: 4000, Loss: 0.2938, Top-1 Accuracies: ['65.38%', '70.45%', '69.38%', '67.17%']


100%|██████████| 16/16 [00:00<00:00, 160.02it/s]


Epoch [9/25], phase: val, samples: 500, Loss: 0.3022, Top-1 Accuracies: ['68.20%', '67.80%', '68.40%', '63.80%']


100%|██████████| 125/125 [00:02<00:00, 57.36it/s]


Epoch [10/25], phase: train, samples: 4000, Loss: 0.2920, Top-1 Accuracies: ['66.30%', '74.00%', '70.70%', '67.35%']


100%|██████████| 16/16 [00:00<00:00, 160.02it/s]


Epoch [10/25], phase: val, samples: 500, Loss: 0.2997, Top-1 Accuracies: ['69.60%', '71.40%', '70.40%', '65.00%']


100%|██████████| 125/125 [00:02<00:00, 56.63it/s]


Epoch [11/25], phase: train, samples: 4000, Loss: 0.2914, Top-1 Accuracies: ['66.47%', '74.15%', '72.17%', '67.00%']


100%|██████████| 16/16 [00:00<00:00, 159.93it/s]


Epoch [11/25], phase: val, samples: 500, Loss: 0.2982, Top-1 Accuracies: ['68.00%', '75.20%', '72.80%', '64.60%']


100%|██████████| 125/125 [00:02<00:00, 56.85it/s]


Epoch [12/25], phase: train, samples: 4000, Loss: 0.2886, Top-1 Accuracies: ['68.55%', '75.83%', '75.62%', '68.97%']


100%|██████████| 16/16 [00:00<00:00, 159.95it/s]


Epoch [12/25], phase: val, samples: 500, Loss: 0.3025, Top-1 Accuracies: ['68.00%', '72.60%', '68.80%', '58.20%']


100%|██████████| 125/125 [00:02<00:00, 56.58it/s]


Epoch [13/25], phase: train, samples: 4000, Loss: 0.2895, Top-1 Accuracies: ['67.65%', '75.02%', '74.98%', '68.05%']


100%|██████████| 16/16 [00:00<00:00, 160.04it/s]


Epoch [13/25], phase: val, samples: 500, Loss: 0.2975, Top-1 Accuracies: ['69.60%', '73.40%', '75.80%', '63.60%']


100%|██████████| 125/125 [00:02<00:00, 56.90it/s]


Epoch [14/25], phase: train, samples: 4000, Loss: 0.2875, Top-1 Accuracies: ['69.00%', '77.88%', '76.88%', '68.88%']


100%|██████████| 16/16 [00:00<00:00, 160.08it/s]


Epoch [14/25], phase: val, samples: 500, Loss: 0.2950, Top-1 Accuracies: ['71.00%', '79.00%', '75.60%', '65.20%']


100%|██████████| 125/125 [00:02<00:00, 57.10it/s]


Epoch [15/25], phase: train, samples: 4000, Loss: 0.2867, Top-1 Accuracies: ['70.23%', '78.60%', '77.58%', '68.60%']


100%|██████████| 16/16 [00:00<00:00, 160.02it/s]


Epoch [15/25], phase: val, samples: 500, Loss: 0.2963, Top-1 Accuracies: ['70.20%', '76.40%', '75.20%', '64.60%']


100%|██████████| 125/125 [00:02<00:00, 57.41it/s]


Epoch [16/25], phase: train, samples: 4000, Loss: 0.2870, Top-1 Accuracies: ['70.05%', '78.25%', '77.08%', '68.50%']


100%|██████████| 16/16 [00:00<00:00, 160.05it/s]


Epoch [16/25], phase: val, samples: 500, Loss: 0.2963, Top-1 Accuracies: ['71.80%', '76.80%', '76.00%', '61.80%']


100%|██████████| 125/125 [00:02<00:00, 57.62it/s]


Epoch [17/25], phase: train, samples: 4000, Loss: 0.2863, Top-1 Accuracies: ['70.75%', '78.50%', '77.58%', '69.27%']


100%|██████████| 16/16 [00:00<00:00, 160.05it/s]


Epoch [17/25], phase: val, samples: 500, Loss: 0.2946, Top-1 Accuracies: ['71.40%', '78.40%', '77.00%', '65.60%']


100%|██████████| 125/125 [00:02<00:00, 57.31it/s]


Epoch [18/25], phase: train, samples: 4000, Loss: 0.2860, Top-1 Accuracies: ['70.47%', '79.55%', '78.05%', '69.10%']


100%|██████████| 16/16 [00:00<00:00, 159.97it/s]


Epoch [18/25], phase: val, samples: 500, Loss: 0.2954, Top-1 Accuracies: ['72.40%', '77.20%', '76.00%', '64.40%']


100%|██████████| 125/125 [00:02<00:00, 57.10it/s]


Epoch [19/25], phase: train, samples: 4000, Loss: 0.2860, Top-1 Accuracies: ['70.38%', '79.27%', '78.40%', '68.95%']


100%|██████████| 16/16 [00:00<00:00, 160.04it/s]


Epoch [19/25], phase: val, samples: 500, Loss: 0.2950, Top-1 Accuracies: ['72.00%', '76.80%', '77.20%', '64.80%']


100%|██████████| 125/125 [00:02<00:00, 57.05it/s]


Epoch [20/25], phase: train, samples: 4000, Loss: 0.2858, Top-1 Accuracies: ['70.28%', '79.60%', '78.60%', '69.33%']


100%|██████████| 16/16 [00:00<00:00, 145.50it/s]


Epoch [20/25], phase: val, samples: 500, Loss: 0.2956, Top-1 Accuracies: ['71.80%', '75.20%', '76.20%', '65.20%']


100%|██████████| 125/125 [00:02<00:00, 57.62it/s]


Epoch [21/25], phase: train, samples: 4000, Loss: 0.2863, Top-1 Accuracies: ['70.33%', '78.57%', '78.30%', '69.00%']


100%|██████████| 16/16 [00:00<00:00, 159.97it/s]


Epoch [21/25], phase: val, samples: 500, Loss: 0.2954, Top-1 Accuracies: ['71.80%', '76.60%', '77.80%', '63.40%']


100%|██████████| 125/125 [00:02<00:00, 56.38it/s]


Epoch [22/25], phase: train, samples: 4000, Loss: 0.2861, Top-1 Accuracies: ['70.10%', '79.42%', '78.15%', '69.15%']


100%|██████████| 16/16 [00:00<00:00, 160.06it/s]


Epoch [22/25], phase: val, samples: 500, Loss: 0.2945, Top-1 Accuracies: ['72.20%', '77.40%', '77.00%', '65.40%']


100%|██████████| 125/125 [00:02<00:00, 57.37it/s]


Epoch [23/25], phase: train, samples: 4000, Loss: 0.2858, Top-1 Accuracies: ['70.50%', '79.77%', '78.15%', '69.25%']


100%|██████████| 16/16 [00:00<00:00, 159.99it/s]


Epoch [23/25], phase: val, samples: 500, Loss: 0.2951, Top-1 Accuracies: ['73.60%', '77.20%', '75.20%', '64.20%']


100%|██████████| 125/125 [00:02<00:00, 57.36it/s]


Epoch [24/25], phase: train, samples: 4000, Loss: 0.2850, Top-1 Accuracies: ['71.43%', '80.47%', '78.85%', '69.77%']


100%|██████████| 16/16 [00:00<00:00, 145.50it/s]


Epoch [24/25], phase: val, samples: 500, Loss: 0.2941, Top-1 Accuracies: ['72.60%', '77.00%', '78.20%', '65.60%']


100%|██████████| 125/125 [00:02<00:00, 56.85it/s]


Epoch [25/25], phase: train, samples: 4000, Loss: 0.2852, Top-1 Accuracies: ['71.83%', '79.33%', '78.80%', '69.73%']


100%|██████████| 16/16 [00:00<00:00, 160.05it/s]

Epoch [25/25], phase: val, samples: 500, Loss: 0.2929, Top-1 Accuracies: ['73.40%', '79.80%', '78.80%', '65.40%']
Elapsed time: 57.589746952056885 seconds





SimpleCharCNN(
  (conv11_W1): Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1), padding=same)
  (conv12_W1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (max_pool1_W1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv23_W1): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=same)
  (conv24_W1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv25_W1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (bn1_W1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (max_pool2_W1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv36_W1): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), padding=same)
  (conv37_W1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1))
  (conv38_W1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1))
  (max_pool3_W1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv49_W1): Conv2d(256, 512, kernel_si

In [38]:
model_list ={
    "SimpleCharCNN": lambda: SimpleCharCNN(),
    "resnet18": lambda: models.resnet18(weights = models.ResNet18_Weights.DEFAULT),
    "resnet152": lambda: models.resnet152(weights = models.ResNet152_Weights.DEFAULT),
    # "r6_btnk": lambda: mod_resnet(Bottleneck, [2, 2, 0, 0], channel_num_list=[16, 16, 16], num_classes=76)
    "r6_btnk": lambda: mod_resnet(Bottleneck, [2, 2, 0, 0], channel_num_list=[8, 4, 8], num_classes=76)
}
model_name = "r6_btnk"
phases = ['train', 'val']
loaders = get_dataloaders(train_dataset, 0.8, 0.5, 32)
train(model_list, model_name, loaders, phases, reshape=True)

--------- INDEX checking ---------
Original: [0, 1, 2, 3, 4]
Shuffled: [3032, 3195, 1239, 2887, 2873]
--------- INDEX shuffled ---------

Total number of samples: 5000 datapoints
Number of train samples: 125 batches/ 4000 datapoints
Number of val samples: 16 batches/ 500 datapoints
Number of test samples: 16 batches/ 500 datapoints

Training model: r6_btnk
Total parameters: 7,284
Learning rate=0.001


100%|██████████| 125/125 [00:01<00:00, 86.05it/s] 


Epoch [1/25], phase: train, samples: 4000, Loss: 0.3679, Top-1 Accuracies: ['7.05%', '6.60%', '7.15%', '7.70%']


100%|██████████| 16/16 [00:00<00:00, 266.75it/s]


Epoch [1/25], phase: val, samples: 500, Loss: 0.3727, Top-1 Accuracies: ['8.80%', '7.20%', '8.20%', '10.40%']


100%|██████████| 125/125 [00:01<00:00, 107.80it/s]


Epoch [2/25], phase: train, samples: 4000, Loss: 0.3597, Top-1 Accuracies: ['9.78%', '9.93%', '10.65%', '11.38%']


100%|██████████| 16/16 [00:00<00:00, 399.62it/s]


Epoch [2/25], phase: val, samples: 500, Loss: 0.3647, Top-1 Accuracies: ['10.00%', '10.60%', '10.60%', '15.80%']


100%|██████████| 125/125 [00:01<00:00, 119.09it/s]


Epoch [3/25], phase: train, samples: 4000, Loss: 0.3516, Top-1 Accuracies: ['11.18%', '13.05%', '12.88%', '16.53%']


100%|██████████| 16/16 [00:00<00:00, 400.13it/s]


Epoch [3/25], phase: val, samples: 500, Loss: 0.3573, Top-1 Accuracies: ['11.20%', '11.20%', '9.80%', '16.00%']


100%|██████████| 125/125 [00:01<00:00, 108.72it/s]


Epoch [4/25], phase: train, samples: 4000, Loss: 0.3426, Top-1 Accuracies: ['12.70%', '14.57%', '15.47%', '19.23%']


100%|██████████| 16/16 [00:00<00:00, 228.70it/s]


Epoch [4/25], phase: val, samples: 500, Loss: 0.3517, Top-1 Accuracies: ['12.00%', '14.00%', '13.60%', '15.60%']


100%|██████████| 125/125 [00:01<00:00, 105.91it/s]


Epoch [5/25], phase: train, samples: 4000, Loss: 0.3334, Top-1 Accuracies: ['13.88%', '16.07%', '17.57%', '21.82%']


100%|██████████| 16/16 [00:00<00:00, 533.52it/s]


Epoch [5/25], phase: val, samples: 500, Loss: 0.3418, Top-1 Accuracies: ['14.00%', '16.40%', '13.60%', '17.80%']


100%|██████████| 125/125 [00:01<00:00, 110.57it/s]


Epoch [6/25], phase: train, samples: 4000, Loss: 0.3256, Top-1 Accuracies: ['15.25%', '18.15%', '18.65%', '23.50%']


100%|██████████| 16/16 [00:00<00:00, 320.13it/s]


Epoch [6/25], phase: val, samples: 500, Loss: 0.3366, Top-1 Accuracies: ['15.80%', '17.40%', '15.00%', '19.80%']


100%|██████████| 125/125 [00:01<00:00, 105.08it/s]


Epoch [7/25], phase: train, samples: 4000, Loss: 0.3189, Top-1 Accuracies: ['16.35%', '18.90%', '19.95%', '25.65%']


100%|██████████| 16/16 [00:00<00:00, 400.18it/s]


Epoch [7/25], phase: val, samples: 500, Loss: 0.3344, Top-1 Accuracies: ['16.60%', '18.00%', '14.60%', '19.60%']


100%|██████████| 125/125 [00:01<00:00, 111.64it/s]


Epoch [8/25], phase: train, samples: 4000, Loss: 0.3128, Top-1 Accuracies: ['17.45%', '19.57%', '20.55%', '27.32%']


100%|██████████| 16/16 [00:00<00:00, 320.15it/s]


Epoch [8/25], phase: val, samples: 500, Loss: 0.3321, Top-1 Accuracies: ['17.80%', '17.00%', '15.20%', '23.40%']


100%|██████████| 125/125 [00:01<00:00, 105.97it/s]


Epoch [9/25], phase: train, samples: 4000, Loss: 0.3074, Top-1 Accuracies: ['18.50%', '20.42%', '20.72%', '29.20%']


100%|██████████| 16/16 [00:00<00:00, 399.65it/s]


Epoch [9/25], phase: val, samples: 500, Loss: 0.3270, Top-1 Accuracies: ['17.40%', '18.40%', '15.60%', '23.20%']


100%|██████████| 125/125 [00:01<00:00, 108.58it/s]


Epoch [10/25], phase: train, samples: 4000, Loss: 0.3025, Top-1 Accuracies: ['19.35%', '20.95%', '21.75%', '31.45%']


100%|██████████| 16/16 [00:00<00:00, 533.52it/s]


Epoch [10/25], phase: val, samples: 500, Loss: 0.3229, Top-1 Accuracies: ['16.80%', '18.40%', '14.40%', '26.20%']


100%|██████████| 125/125 [00:01<00:00, 111.65it/s]


Epoch [11/25], phase: train, samples: 4000, Loss: 0.2979, Top-1 Accuracies: ['20.15%', '21.45%', '21.93%', '33.50%']


100%|██████████| 16/16 [00:00<00:00, 266.76it/s]


Epoch [11/25], phase: val, samples: 500, Loss: 0.3173, Top-1 Accuracies: ['18.00%', '20.40%', '13.80%', '27.40%']


100%|██████████| 125/125 [00:01<00:00, 106.87it/s]


Epoch [12/25], phase: train, samples: 4000, Loss: 0.2937, Top-1 Accuracies: ['20.62%', '22.02%', '22.27%', '35.70%']


100%|██████████| 16/16 [00:00<00:00, 400.15it/s]


Epoch [12/25], phase: val, samples: 500, Loss: 0.3148, Top-1 Accuracies: ['17.40%', '21.40%', '15.20%', '31.00%']


100%|██████████| 125/125 [00:01<00:00, 111.64it/s]


Epoch [13/25], phase: train, samples: 4000, Loss: 0.2896, Top-1 Accuracies: ['20.90%', '21.82%', '22.50%', '37.38%']


100%|██████████| 16/16 [00:00<00:00, 400.13it/s]


Epoch [13/25], phase: val, samples: 500, Loss: 0.3083, Top-1 Accuracies: ['18.00%', '23.00%', '16.20%', '34.00%']


100%|██████████| 125/125 [00:01<00:00, 116.02it/s]


Epoch [14/25], phase: train, samples: 4000, Loss: 0.2857, Top-1 Accuracies: ['21.52%', '21.70%', '22.68%', '39.20%']


100%|██████████| 16/16 [00:00<00:00, 400.55it/s]


Epoch [14/25], phase: val, samples: 500, Loss: 0.3022, Top-1 Accuracies: ['18.60%', '21.40%', '16.80%', '34.80%']


100%|██████████| 125/125 [00:01<00:00, 120.23it/s]


Epoch [15/25], phase: train, samples: 4000, Loss: 0.2818, Top-1 Accuracies: ['21.82%', '21.88%', '22.88%', '41.42%']


100%|██████████| 16/16 [00:00<00:00, 400.06it/s]


Epoch [15/25], phase: val, samples: 500, Loss: 0.2993, Top-1 Accuracies: ['18.20%', '20.80%', '16.60%', '34.20%']


100%|██████████| 125/125 [00:01<00:00, 123.80it/s]


Epoch [16/25], phase: train, samples: 4000, Loss: 0.2780, Top-1 Accuracies: ['22.48%', '22.15%', '22.93%', '43.77%']


100%|██████████| 16/16 [00:00<00:00, 400.10it/s]


Epoch [16/25], phase: val, samples: 500, Loss: 0.2926, Top-1 Accuracies: ['20.00%', '21.40%', '17.80%', '38.80%']


100%|██████████| 125/125 [00:00<00:00, 128.91it/s]


Epoch [17/25], phase: train, samples: 4000, Loss: 0.2744, Top-1 Accuracies: ['22.95%', '22.43%', '23.03%', '45.90%']


100%|██████████| 16/16 [00:00<00:00, 533.50it/s]


Epoch [17/25], phase: val, samples: 500, Loss: 0.2902, Top-1 Accuracies: ['19.20%', '22.60%', '17.20%', '41.60%']


100%|██████████| 125/125 [00:01<00:00, 123.80it/s]


Epoch [18/25], phase: train, samples: 4000, Loss: 0.2709, Top-1 Accuracies: ['23.60%', '23.05%', '23.60%', '48.08%']


100%|██████████| 16/16 [00:00<00:00, 228.67it/s]


Epoch [18/25], phase: val, samples: 500, Loss: 0.2873, Top-1 Accuracies: ['21.40%', '21.40%', '17.60%', '43.20%']


100%|██████████| 125/125 [00:01<00:00, 120.03it/s]


Epoch [19/25], phase: train, samples: 4000, Loss: 0.2675, Top-1 Accuracies: ['23.75%', '23.62%', '23.77%', '50.08%']


100%|██████████| 16/16 [00:00<00:00, 533.60it/s]


Epoch [19/25], phase: val, samples: 500, Loss: 0.2803, Top-1 Accuracies: ['20.40%', '23.20%', '18.60%', '47.20%']


100%|██████████| 125/125 [00:01<00:00, 116.86it/s]


Epoch [20/25], phase: train, samples: 4000, Loss: 0.2641, Top-1 Accuracies: ['24.00%', '23.60%', '23.35%', '51.92%']


100%|██████████| 16/16 [00:00<00:00, 400.18it/s]


Epoch [20/25], phase: val, samples: 500, Loss: 0.2782, Top-1 Accuracies: ['22.40%', '22.80%', '19.40%', '48.20%']


100%|██████████| 125/125 [00:01<00:00, 105.97it/s]


Epoch [21/25], phase: train, samples: 4000, Loss: 0.2609, Top-1 Accuracies: ['24.50%', '24.32%', '23.38%', '53.60%']


100%|██████████| 16/16 [00:00<00:00, 400.01it/s]


Epoch [21/25], phase: val, samples: 500, Loss: 0.2768, Top-1 Accuracies: ['20.80%', '21.80%', '18.80%', '49.00%']


100%|██████████| 125/125 [00:01<00:00, 106.88it/s]


Epoch [22/25], phase: train, samples: 4000, Loss: 0.2578, Top-1 Accuracies: ['25.27%', '24.35%', '23.67%', '54.85%']


100%|██████████| 16/16 [00:00<00:00, 533.51it/s]


Epoch [22/25], phase: val, samples: 500, Loss: 0.2726, Top-1 Accuracies: ['21.60%', '21.80%', '19.00%', '50.40%']


100%|██████████| 125/125 [00:01<00:00, 104.30it/s]


Epoch [23/25], phase: train, samples: 4000, Loss: 0.2548, Top-1 Accuracies: ['25.97%', '24.45%', '24.20%', '56.62%']


100%|██████████| 16/16 [00:00<00:00, 533.50it/s]


Epoch [23/25], phase: val, samples: 500, Loss: 0.2679, Top-1 Accuracies: ['23.40%', '22.00%', '20.00%', '53.60%']


100%|██████████| 125/125 [00:01<00:00, 106.87it/s]


Epoch [24/25], phase: train, samples: 4000, Loss: 0.2520, Top-1 Accuracies: ['26.62%', '24.52%', '24.40%', '58.38%']


100%|██████████| 16/16 [00:00<00:00, 320.09it/s]


Epoch [24/25], phase: val, samples: 500, Loss: 0.2666, Top-1 Accuracies: ['23.80%', '23.20%', '19.00%', '55.40%']


100%|██████████| 125/125 [00:01<00:00, 104.20it/s]


Epoch [25/25], phase: train, samples: 4000, Loss: 0.2493, Top-1 Accuracies: ['27.35%', '24.60%', '24.43%', '60.22%']


100%|██████████| 16/16 [00:00<00:00, 400.40it/s]

Epoch [25/25], phase: val, samples: 500, Loss: 0.2656, Top-1 Accuracies: ['23.00%', '22.40%', '18.40%', '57.80%']
Elapsed time: 29.52579379081726 seconds





mod_resnet(
  (conv1): Conv2d(3, 8, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(8, 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(4, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm

In [None]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = F.relu(out)

        return out
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=1000):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x
class mod_resnet(nn.Module):
    def __init__(self, block, layers, channel_num_list, num_classes=1000):
        super(mod_resnet, self).__init__()
        self.in_channels = channel_num_list[0]
        self.conv1 = nn.Conv2d(3, channel_num_list[0], kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(channel_num_list[0])
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, channel_num_list[1], layers[0])
        self.layer2 = self._make_layer(block, channel_num_list[2], layers[1], stride=2)
        # self.layer3 = self._make_layer(block, channel_num_list[3], layers[2], stride=2)
        # self.layer4 = self._make_layer(block, channel_num_list[4], layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(channel_num_list[-1] * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        # x = self.layer3(x)
        # x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x