<a href="https://colab.research.google.com/github/chenyq121/760-2022S2/blob/main/dexin_optunaBaselineCNNTest_8bin.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#mounted to google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torchvision
import os
import torch.utils.data
import matplotlib.pyplot as plt
from PIL import Image



# data processing 
class ClassifyDataset(torch.utils.data.Dataset):
    def __init__(self,root_path,data_file,img_size=120):
        self.data_files=np.loadtxt(data_file,dtype=np.str)
        self.root_path=root_path
        self.class_list=os.listdir(
            os.path.join(root_path,'')
        )
        self.transforms=torchvision.transforms.Compose(
            [
                torchvision.transforms.Resize((img_size,img_size)), 
                torchvision.transforms.ToTensor()
            ]
        )

    def __getitem__(self, item):
        data_file=self.data_files[item]
        data_file=os.path.join(self.root_path,data_file)
        # get the image
        img=Image.open(data_file).convert('RGB') # three channels
        # get the label(in this case the label is the folder name)
        tmp=data_file.split('/')
        label_name=tmp[-2]
        print("label_name:",label_name)
        label=self.class_list.index(label_name)
        print("label:",label)
        # prepare the images and label
        img=self.transforms(img)
        label=torch.tensor(label)
        print("torch_label:",label)
        return img,label

    def __len__(self):
        return len(self.data_files)



# model building
class CNet(nn.Module):
    def __init__(self,num_classes=21, n_layer = 1, kernel_size = 1):
        super(CNet,self).__init__()
        self.convList = nn.ModuleList()

        for i in range(1, n_layer):
          input = 2 ** (i + 3)
          output = 2 ** (i + 4)
          if i == 1:
            input = 3
            
          self.convList.append(
              nn.Sequential(
                #input, output, kernel size, step, padding
                nn.Conv2d(input, output, kernel_size, 1, padding=1),
                nn.BatchNorm2d(output), # batch normalization
                nn.ReLU()
              )
          )

        self.pool = nn.AvgPool2d(2, 2)
        self.fclayer=nn.Sequential(
                nn.Linear(2 * input, 2 * output),
                nn.ReLU(),
                nn.Linear(2 * output, num_classes)
        )
        self.avg_pool=nn.AdaptiveAvgPool2d((1, 1))
        self.softmax=nn.Softmax(dim=1)

    def forward(self,x):
      x = self.convList[0](x)
      x = self.pool(x)
      for conv in self.convList[1:]:
        x = conv(x)
        x = self.pool(x)
      x = self.avg_pool(x)
      x = torch.flatten(x,1)
      logits=self.fclayer(x)
      prob=self.softmax(logits)
      return logits,prob

    


# training
def training(model,root_path,train_data_file,batch_size,lr, epoch_num):
    # get training data
    train_dataset=ClassifyDataset(root_path,train_data_file)
    train_dataloader=torch.utils.data.DataLoader(train_dataset,batch_size,shuffle=True,num_workers=0)


    # loss function and optimizer
    criterion=nn.CrossEntropyLoss()
    optimizer=torch.optim.Adam(model.parameters(),lr=lr)

    total_loss=[]

    for epoch in range(epoch_num):
        print(epoch+1,"epoch:")
        total_train_loss=0
        res_num=len(train_dataset)
        for data in train_dataloader:

            if (res_num - batch_size) > 0:
                cnt=batch_size
                res_num = res_num - batch_size
            else:
                cnt=res_num
                res_num = 0

            ## GPU
            train_img,train_label=data
            train_img=train_img.to(device)
            train_label=train_label.to(device)

            ## get loss result
            train_logits,train_prob=model(train_img)
            train_loss=criterion.forward(train_logits,train_label)

            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
            
            train_pred=torch.argmax(train_prob,dim=1)
            print("pre_label:",train_pred)
            train_acc=(train_pred==train_label).float()
            train_acc=torch.mean(train_acc)
            print('loss:',train_loss.item(), 'acc:', train_acc.item())
            total_train_loss=total_train_loss+train_loss*cnt

        total_train_loss=total_train_loss/len(train_dataset)
        total_loss.append(total_train_loss.item())

    state_dict=model.state_dict()
    torch.save(state_dict,'/content/drive/MyDrive/model/60-20-20model_8bin_120-optune.pth')

    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.plot(total_loss)
    plt.legend(['train loss'])
    plt.show()

# test
def test(model,root_path,test_data_file):
    state_dict = torch.load('/content/drive/MyDrive/model/60-20-20model_8bin_120-optune.pth')
    model.load_state_dict(state_dict, strict=False)
    model.eval() # no BatchNormalization and Dropout

    test_dataset=ClassifyDataset(root_path,test_data_file)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False, num_workers=0)

    criterion = nn.CrossEntropyLoss()

    res_num=len(test_dataset)
    total_acc=0

    for data in test_dataloader:
      with torch.no_grad():

        if (res_num-batch_size)>0:
            cnt=batch_size
            res_num = res_num - batch_size
        else:
            cnt=res_num
            res_num = 0

        test_img,test_label=data
        test_img=test_img.to(device)
        test_label=test_label.to(device)

        test_logits,test_prob=model(test_img)
        test_loss=criterion.forward(test_logits,test_label)

        # Top1 accuracy: correct number/total number
        test_pred = torch.argmax(test_prob, dim=1)
        # print(test_pred)
        # print(test_label)
        print("pre_label:",test_pred)
        print("test_label:",test_label)

        #print(type(test_pred))
        #print(type(test_label))

        px_dict = {"true_label": test_label.cpu().data.numpy(),
                   "pred_label": test_pred.cpu().data.numpy()}
        px = pd.DataFrame(px_dict)

        #test_diff = (test_pred - test_label).float()
        #print('diff:', test_diff)
        test_acc = (test_pred == test_label).float()
        # print(test_acc)
        test_acc = torch.mean(test_acc)
        total_acc = total_acc + test_acc * cnt

        print('loss:',test_loss.item(), 'top1:',test_acc.item() )

    total_acc=total_acc/len(test_dataset)
    # return total_acc.item()
    print('\n')
    print('Top-1 Accuracy:',total_acc.item())

    return px


# def objective(trial):

#     params = {
#               'n_layer': trial.suggest_int('n_layer', 3, 5, step = 2),
#               'kernel_size': trial.suggest_int('kernel_size', 3, 5, step = 2),
#               'lr': trial.suggest_categorical('lr', [0.1, 0.01, 0.001]),
#               'epoch_num': trial.suggest_categorical('epoch_num', [10, 50, 100])
#               }

#     # get model and put model on the device
#     model=CNet(n_layer = params['n_layer'], kernel_size = params['kernel_size'])
#     model = model.to(device)

#     training(model,root_path,train_data_file,batch_size,lr = params['lr'], epoch_num = params['epoch_num'])
#     # torch.cuda.empty_cache()
#     return test(model,root_path,test_data_file)




In [None]:
# main function
if __name__ == '__main__':

    root_path = r'/content/drive/MyDrive/original/'
    #train_data_file=r'/content/drive/MyDrive/original/8bintrainval.txt'
    test_data_file=r'/content/drive/MyDrive/original/8bintest.txt'
    batch_size=2000
    # lr=best_trial.params['lr'] #learning rate
    # epoch_num=best_trial.params['epoch_num']
    # layers=best_trial.params['n_layer']
    # kernals=best_trial.params['kernel_size']
    # lr=0.01 #learning rate
    # epoch_num=10
    layers=5
    kernals=5
    device='cuda:0'
    # # get model and put model on the device
    model=CNet(n_layer = layers, kernel_size = kernals)
    model.to(device)

    #training(model,root_path,train_data_file,batch_size,lr,epoch_num)
    torch.cuda.empty_cache()
    px = test(model, root_path,test_data_file)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  app.launch_new_instance()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
label: 3
torch_label: tensor(3)
label_name: bin3
label: 2
torch_label: tensor(2)
label_name: bin8
label: 7
torch_label: tensor(7)
label_name: bin4
label: 3
torch_label: tensor(3)
label_name: bin6
label: 5
torch_label: tensor(5)
label_name: bin6
label: 5
torch_label: tensor(5)
label_name: bin6
label: 5
torch_label: tensor(5)
label_name: bin1
label: 0
torch_label: tensor(0)
label_name: bin5
label: 4
torch_label: tensor(4)
label_name: bin7
label: 6
torch_label: tensor(6)
label_name: bin1
label: 0
torch_label: tensor(0)
label_name: bin4
label: 3
torch_label: tensor(3)
label_name: bin5
label: 4
torch_label: tensor(4)
label_name: bin7
label: 6
torch_label: tensor(6)
label_name: bin6
label: 5
torch_label: tensor(5)
label_name: bin7
label: 6
torch_label: tensor(6)
label_name: bin1
label: 0
torch_label: tensor(0)
label_name: bin4
label: 3
torch_label: tensor(3)
label_name: bin2
label: 1
torch_label: tensor(1)
label_name: bin3
labe

In [None]:
px

Unnamed: 0,true_label,pred_label
0,5,3
1,7,0
2,1,2
3,6,6
4,0,7
...,...,...
1750,7,7
1751,5,3
1752,1,0
1753,1,3


In [None]:
ids = list()
with open('/content/drive/MyDrive/original/4bintest.txt', 'r') as f:
  id_lst = f.readlines()
  for each in id_lst:
    id = each.split("/")[1].split(".")[0]
    ids.append(id)
print(ids)

['6087', '9618', '4809', '2828', '2057', '385', '9134', '5355', '8804', '7952', '4009', '4229', '8118', '7466', '1364', '3772', '5717', '45', '4200', '4921', '8892', '7331', '7382', '9420', '8217', '193', '1439', '3554', '9255', '3251', '3378', '8531', '7233', '9152', '7181', '7036', '5835', '7992', '267', '2368', '105', '1471', '800', '3207', '9688', '8877', '7815', '5391', '6890', '3512', '9327', '4754', '6303', '7555', '5279', '9268', '1495', '4670', '8059', '4018', '8747', '5000', '6336', '8838', '4979', '4863', '3371', '6167', '7983', '1179', '4767', '3777', '5405', '2814', '1883', '9476', '5599', '7943', '4883', '9736', '4360', '7390', '6813', '1631', '5690', '2221', '266', '7454', '3706', '4435', '6108', '3717', '4580', '6908', '1147', '8137', '9852', '7422', '5645', '2588', '1535', '2367', '3874', '377', '6275', '1223', '8053', '6481', '7226', '7446', '9385', '3337', '8907', '7808', '1547', '6148', '2968', '2951', '5384', '8399', '8753', '3278', '8032', '5893', '6257', '5323', 

In [None]:
px.insert(0, "ID", ids,True)

In [None]:
px["pred_label"].unique()

array([3, 0, 2, 6, 7, 4, 1, 5])

In [None]:
px["true_label"].unique()

array([5, 7, 1, 6, 0, 4, 2, 3])

In [None]:
px["difference"] = abs(px["pred_label"] - px["true_label"])

In [None]:
px

Unnamed: 0,ID,true_label,pred_label,difference
0,6087,5,3,2
1,9618,7,0,7
2,4809,1,2,1
3,2828,6,6,0
4,2057,0,7,7
...,...,...,...,...
1750,9233,7,7,0
1751,3978,5,3,2
1752,8122,1,0,1
1753,5903,1,3,2


In [None]:
px.to_csv(path_or_buf="/content/drive/MyDrive/eight_bin_diff_result.csv", index=False)

In [None]:
res = px[px["difference"]==0]

In [None]:
len(res)

237

In [None]:
correct_id = res["ID"]

In [None]:
correct_id.to_list()

['2828',
 '7952',
 '8118',
 '7382',
 '3251',
 '7181',
 '7036',
 '5835',
 '800',
 '8877',
 '4754',
 '1495',
 '4670',
 '8059',
 '7943',
 '9736',
 '7390',
 '7454',
 '4580',
 '6415',
 '7213',
 '7364',
 '1330',
 '8124',
 '8483',
 '2165',
 '7100',
 '6500',
 '5366',
 '3160',
 '4666',
 '7302',
 '8516',
 '9686',
 '72',
 '1696',
 '2098',
 '3193',
 '8421',
 '7698',
 '9468',
 '562',
 '9413',
 '7433',
 '6388',
 '4239',
 '2547',
 '2778',
 '7145',
 '8093',
 '7337',
 '9710',
 '8345',
 '6536',
 '4760',
 '7869',
 '2772',
 '4888',
 '9462',
 '189',
 '1931',
 '7577',
 '3440',
 '69',
 '7633',
 '2346',
 '281',
 '4851',
 '49',
 '201',
 '1726',
 '1942',
 '9976',
 '7293',
 '518',
 '7188',
 '6019',
 '7764',
 '6716',
 '8383',
 '5853',
 '8078',
 '4361',
 '3537',
 '5293',
 '8994',
 '2395',
 '5051',
 '7768',
 '9380',
 '7818',
 '3540',
 '1685',
 '2300',
 '5592',
 '5371',
 '5195',
 '3669',
 '8096',
 '4491',
 '3807',
 '4196',
 '3816',
 '6116',
 '2219',
 '4362',
 '6142',
 '4096',
 '6387',
 '7251',
 '1470',
 '4579',
 '39