In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import torchvision
import torchvision.transforms as transforms
from d2l import torch as d2l
import numpy as np
import pandas as pd
from ptflops import get_model_complexity_info
from pathlib import Path
import os
import time
import pynvml
import threading
import queue# type: ignore
from torch.cuda.amp import GradScaler
from vit_code import ViT
from CNN_info import *

In [2]:
'''find the Model path'''
# find the current path
from pathlib import Path

# find the current path
current_path = Path.cwd()
print('The current path is:', current_path)

# # find the data path
# data_path = Path(current_path / 'ModelsData')
# print('The data path is:', data_path)

The current path is: /home/GreenAI/a5000


In [3]:
# create a function for all the models to run
# image channel for fashion mnist 
channel_f = 1
# image channel for cifar100 and cifar10
channel_c = 3

# number of labels for fashion mnist
num_labels_f = 10
# number of labels for cifar100 
num_labels_c100 = 100
# number of labels for cifar10
num_labels_c10 = 10

In [4]:
vit_f = ViT(num_classes = num_labels_f,
            channels = channel_f)
vit_c100 = ViT(num_classes = num_labels_c100,
            channels = channel_c)
vit_c10 = ViT(num_classes = num_labels_c10,
            channels = channel_c)
with torch.cuda.device(0):
    macs_f, params_f = get_model_complexity_info(vit_f, (channel_f, 224, 224), as_strings=True,
                                            print_per_layer_stat=True, verbose=True)
    macs_c100, params_c100 = get_model_complexity_info(vit_c100, (channel_c, 224, 224), as_strings=True,
                                                        print_per_layer_stat=True, verbose=True)
    macs_c10, params_c10 = get_model_complexity_info(vit_c10, (channel_c, 224, 224), as_strings=True,
                                                        print_per_layer_stat=True, verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity of ViT-F: ', macs_f))
    print('{:<30}  {:<8}'.format('Number of parameters of ViT-F: ', params_f))
    print('{:<30}  {:<8}'.format('Computational complexity of ViT-C100: ', macs_c100))
    print('{:<30}  {:<8}'.format('Number of parameters of ViT-C100: ', params_c100))
    print('{:<30}  {:<8}'.format('Computational complexity of ViT-C10: ', macs_c10))
    print('{:<30}  {:<8}'.format('Number of parameters of ViT-C10: ', params_c10))

ViT(
  51.45 M, 99.899% Params, 2.57 GMac, 98.787% MACs, 
  (to_patch_embedding): Sequential(
    1.05 M, 2.046% Params, 51.53 MMac, 1.980% MACs, 
    (0): Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=32, p2=32)
    (1): LayerNorm(2.05 k, 0.004% Params, 50.18 KMac, 0.002% MACs, (1024,), eps=1e-05, elementwise_affine=True)
    (2): Linear(1.05 M, 2.038% Params, 51.43 MMac, 1.976% MACs, in_features=1024, out_features=1024, bias=True)
    (3): LayerNorm(2.05 k, 0.004% Params, 50.18 KMac, 0.002% MACs, (1024,), eps=1e-05, elementwise_affine=True)
  )
  (dropout): Dropout(0, 0.000% Params, 0.0 Mac, 0.000% MACs, p=0.1, inplace=False)
  (transformer): Transformer(
    50.38 M, 97.833% Params, 2.52 GMac, 96.806% MACs, 
    (norm): LayerNorm(2.05 k, 0.004% Params, 51.2 KMac, 0.002% MACs, (1024,), eps=1e-05, elementwise_affine=True)
    (layers): ModuleList(
      (0-5): 6 x ModuleList(
        (0): Attention(
          4.2 M, 8.150% Params, 209.82 MMac, 8.063% MACs, 
          (norm): 

In [5]:
batch_size = [128]
epochs = [5]
rounds = 1

In [6]:
lr = 0.01
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('The device is:', device)

The device is: cuda


In [7]:
sampling_interval = 0.002 # 2ms

In [8]:
datasets = ['fashion_mnist', 'cifar100', 'cifar10']

In [9]:
# create model list according to models_name order
models_f_list = [vit_f]

models_c100_list = [vit_c100]

models_c10_list = [vit_c10]

In [10]:
# for i in range(len(models_f_list)):
#     # for each start, clear the cache in the gpu 
#     torch.cuda.empty_cache()
#     # for each start, clear the memory in the gpu using the torch
#     net = models_f_list[i]
#     main_folder = os.path.join(current_path, datasets[0])  
#     # main_folder = os.path.join(main_folder, datasets[1])  
#     # main_folder = os.path.join(main_folder, datasets[2])  

#     # transfer the main_folder to Path
#     main_folder = Path(main_folder)

#     print('The folder is:', main_folder)
#     if main_folder.exists():
#         print("文件存在。")
#     else:
#         os.makedirs(main_folder)
#         print("文件不存在，已创建。")
#         print("文件创建于：", main_folder)
#     for epoch in epochs:
#         for batch in batch_size:
#             for round in range(rounds):
#                 train_model(main_folder, batch, epoch, round, lr, device, sampling_interval, net, datasets[0])

In [11]:
for i in range(len(models_c100_list)):
    # for each start, clear the cache in the gpu 
    torch.cuda.empty_cache()
    # for each start, clear the memory in the gpu using the torch
    net = models_c100_list[i]
    main_folder = os.path.join(current_path, datasets[1])  
    # main_folder = os.path.join(main_folder, datasets[1])  
    # main_folder = os.path.join(main_folder, datasets[2])  

    # transfer the main_folder to Path
    main_folder = Path(main_folder)

    print('The folder is:', main_folder)
    if main_folder.exists():
        print("文件存在。")
    else:
        os.makedirs(main_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", main_folder)
    for epoch in epochs:
        for batch in batch_size:
            for round in range(rounds):
                train_model(main_folder, batch, epoch, round, lr, device, sampling_interval, net, datasets[1])

The folder is: /home/GreenAI/a5000/cifar100
文件存在。
The epoch is set: 5, batch is set: 128, is in 1th running
The folder path is: /home/GreenAI/a5000/cifar100/E5_B128_R0_SR2


100.0%


the shape of the 0 batch of the train_iter is: torch.Size([128, 3, 224, 224])
the shape of the 1 batch of the train_iter is: torch.Size([128, 3, 224, 224])
the shape of the 2 batch of the train_iter is: torch.Size([128, 3, 224, 224])
The number of batches is: (391,)
training on cuda
The epoch is: 1
The batch is: 1
The batch is: 2
The batch is: 3
The batch is: 4
The batch is: 5
The batch is: 6
The batch is: 7
The batch is: 8
The batch is: 9
The batch is: 10
The batch is: 11
The batch is: 12
The batch is: 13
The batch is: 14
The batch is: 15
The batch is: 16
The batch is: 17
The batch is: 18
The batch is: 19
The batch is: 20
The batch is: 21
The batch is: 22
The batch is: 23
The batch is: 24
The batch is: 25
The batch is: 26
The batch is: 27
The batch is: 28
The batch is: 29
The batch is: 30
The batch is: 31
The batch is: 32
The batch is: 33
The batch is: 34
The batch is: 35
The batch is: 36
The batch is: 37
The batch is: 38
The batch is: 39
The batch is: 40
The batch is: 41
The batch is

In [12]:
for i in range(len(models_c10_list)):
    # for each start, clear the cache in the gpu 
    torch.cuda.empty_cache()
    # for each start, clear the memory in the gpu using the torch
    net = models_c10_list[i]
    main_folder = os.path.join(current_path, datasets[2])  
    # main_folder = os.path.join(main_folder, datasets[1])  
    # main_folder = os.path.join(main_folder, datasets[2])  

    # transfer the main_folder to Path
    main_folder = Path(main_folder)

    print('The folder is:', main_folder)
    if main_folder.exists():
        print("文件存在。")
    else:
        os.makedirs(main_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", main_folder)
    for epoch in epochs:
        for batch in batch_size:
            for round in range(rounds):
                train_model(main_folder, batch, epoch, round, lr, device, sampling_interval, net, datasets[2])

The folder is: /home/GreenAI/a5000/cifar10
文件存在。
The epoch is set: 5, batch is set: 128, is in 1th running
The folder path is: /home/GreenAI/a5000/cifar10/E5_B128_R0_SR2


100.0%


the shape of the 0 batch of the train_iter is: torch.Size([128, 3, 224, 224])
the shape of the 1 batch of the train_iter is: torch.Size([128, 3, 224, 224])
the shape of the 2 batch of the train_iter is: torch.Size([128, 3, 224, 224])
The number of batches is: (391,)
training on cuda
The epoch is: 1
The batch is: 1
The batch is: 2
The batch is: 3
The batch is: 4
The batch is: 5
The batch is: 6
The batch is: 7
The batch is: 8
The batch is: 9
The batch is: 10
The batch is: 11
The batch is: 12
The batch is: 13
The batch is: 14
The batch is: 15
The batch is: 16
The batch is: 17
The batch is: 18
The batch is: 19
The batch is: 20
The batch is: 21
The batch is: 22
The batch is: 23
The batch is: 24
The batch is: 25
The batch is: 26
The batch is: 27
The batch is: 28
The batch is: 29
The batch is: 30
The batch is: 31
The batch is: 32
The batch is: 33
The batch is: 34
The batch is: 35
The batch is: 36
The batch is: 37
The batch is: 38
The batch is: 39
The batch is: 40
The batch is: 41
The batch is