<a href="https://colab.research.google.com/github/giangkarry/Machine-Learning/blob/main/train_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!git clone https://github.com/giangkarry/Machine-Learning.git

Cloning into 'Machine-Learning'...
remote: Enumerating objects: 8355, done.[K
remote: Counting objects: 100% (26/26), done.[K
remote: Compressing objects: 100% (14/14), done.[K
remote: Total 8355 (delta 14), reused 22 (delta 12), pack-reused 8329[K
Receiving objects: 100% (8355/8355), 488.03 MiB | 28.15 MiB/s, done.
Resolving deltas: 100% (16/16), done.
Checking out files: 100% (8330/8330), done.


In [11]:
import torch
from torch import nn
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
from collections import namedtuple
from sklearn.metrics import classification_report, accuracy_score
from matplotlib.pyplot import figure

In [4]:
classes = ('2C', '3C', '4C')
TrainTest = namedtuple('TrainTest', ['train', 'test'])
traindir = '/content/Machine-Learning/DATA_CHAMBER_2021/train'
testdir = '/content/Machine-Learning/DATA_CHAMBER_2021/test'
mean_, std_ = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]

#raw_img
def raw_img(img_size):
  transform_train = transforms.Compose([
        transforms.Resize((img_size,img_size)), 
        transforms.ToTensor()
    ])
  transform_test = transforms.Compose([
        transforms.Resize((img_size,img_size)), 
        transforms.ToTensor()
    ])
  return transform_train, transform_test

# preprocess_image
def preprocess_img(img_size):
  transform_train = transforms.Compose([
       transforms.Resize((img_size, img_size)),
       transforms.CenterCrop((img_size,img_size)),
       transforms.ToTensor(),
       transforms.Normalize(mean=mean_, std=std_)                                 
                                        
  ])

  transform_test = transforms.Compose([
        transforms.Resize((img_size,img_size)), 
        transforms.ToTensor()
    ])
  return transform_train, transform_test

# augmentation_image
def augmentation_img(img_size):
  transform_train = transporms.Compose([
       transforms.Resize((img_size, img_size)),  
       transforms.RandomCrop(size=(size,size), padding=4,),
       transforms.RandomHorizontalFlip(),
       transforms.RandomVerticalFlip(),
       transforms.ColorJitter(brightness=0.3, contrast=0.1),
       transforms.ToTensor()                               
  ])

  transform_test = transforms.Compose([
        transforms.Resize((img_size,img_size)), 
        transforms.ToTensor()
    ])
  return transform_train, transform_test


In [21]:
#hàm chuẩn bị dữ liệu
def prepare_data(img_size):
  #transform_train, transform_test = raw_img(img_size)
  transform_train, transform_test = preprocess_img(img_size)
  #transform_train, transform_test = augmentation_img(img_size)
  
  trainset = torchvision.datasets.ImageFolder(root= traindir, transform=transform_train)
  testset  = torchvision.datasets.ImageFolder(root=testdir, transform=transform_test)
  return TrainTest (
      train = trainset,
      test = testset
  )

#hàm chuẩn bị dữ liệu theo batch đưa vào model
def prepare_loader(datasets):
    batch_size = 32
    num_workers = 4
    trainloader = torch.utils.data.DataLoader(
        dataset=datasets.train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    testloader = torch.utils.data.DataLoader(
        dataset=datasets.test, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    return TrainTest(
        train=trainloader,
        test=testloader
    )

In [6]:
#train
def get_trainer(model):
  loss = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
  return loss, optimizer

#hàm train trong mỗi epoch
def train_epoch(epoch, model, loader, loss_func, optimizer, device):
    model.train()
    running_loss = 0.0
    running_accuracy = 0.0
    #reporting_step = 42
    for i, data in enumerate(loader):
      images, labels = data
      images, labels = images.to(device), labels.to(device)
      optimizer.zero_grad()
      outputs = model(images)
      loss = loss_func(outputs, labels)
      loss.backward()
      optimizer.step()
      _,preds = torch.max(outputs, 1)
      running_loss += loss.item() * images.size(0)
      running_accuracy += torch.sum(preds == labels.data)

      train_loss = running_loss/len(loader.dataset)
      train_accuracy = running_accuracy/len(loader.dataset)
    
    #print(f"training loss: {train_loss: 0.4f}, training_acc: {train_accuracy: 0.4f}")
    return train_loss, train_accuracy
        
        #if i % reporting_step == reporting_step-1:
            
        #    print(f"Epoch {epoch} Step {i} ave_loss {running_loss/reporting_step:0.4f}")
        #    running_loss = 0.0
    

In [14]:
#hàm test trong mỗi epoch
def test_epoch(epoch, model, loader, device):
    model.eval()

    ypred = []
    ytrue = []
    for i, (images, labels) in enumerate(loader):
      images, labels = images.to(device), labels.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs, dim=1)
      ypred += list(predicted.cpu().numpy())
      ytrue += list(labels.cpu().numpy())
    return ypred, ytrue

In [15]:
#thực thi
def main(model = None, img_size = None):
  datasets = prepare_data(img_size)
  loaders = prepare_loader(datasets)
  #print("Tập train: ", len(datasets.train))
  #print("Tập test: ", len(datasets.test))
  #print("class: ", datasets.test.class_to_idx)
  training_loss, training_accuracy = [], []
  
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  PATH = ''
  if model == 'vgg16':
    model = torchvision.models.vgg16()
    model.classifier[-1] = torch.nn.modules.Linear(in_features=4096, out_features=3)
    PATH = './vgg16.pth'
  
  elif (model == 'vgg19'):
    model = torchvision.models.vgg19()
    model.classifier[-1] = torch.nn.modules.Linear(in_features=4096, out_features=3)
    PATH = './vgg19.pth'
  
  elif model == 'resnet50':
    model = torchvision.models.resnet50()
    model.fc = torch.nn.modules.Linear(in_features=2048, out_features=3)
    PATH = './resnet50.pth'
  
  
  n_epoch = 8
  model.to(device)
  
  loss, optimizer = get_trainer(model)
  for epoch in range(n_epoch):
    #train
    #train_epoch(epoch, model, loaders.train, loss, optimizer, device)
    train_loss, train_acc = train_epoch(epoch, model, loaders.train, loss, optimizer, device)
    print(f"training loss: {train_loss: 0.4f}, training_accuracy: {train_acc: 0.4f}")
    training_loss.append(train_loss)
    training_accuracy.append(train_acc)
    #test
    ypred, ytrue = test_epoch(epoch, model, loaders.test, device)
    print(classification_report(ytrue, ypred, target_names=classes))
    print(accuracy_score(ytrue, ypred))
    
    #torch.save(model.state_dict(), PATH)
  return model, training_loss, training_accuracy

In [None]:
#VGG16 size 32
model, training_loss, training_accuracy=main('vgg16', 32)
model

epoch = [1, 2, 3, 4, 5, 6, 7, 8]


plt.plot(epoch, training_loss, label='training_loss')
plt.plot(epoch, training_accuracy, label='training_accuracy')
plt.plot()
plt.xlabel("epochs")
plt.ylabel("loss & accuracy")
#plt.title("VGG16_raw(32x32)")
plt.title("VGG16_preprocess(32x32)")
plt.legend()
plt.show()


In [None]:
#VGG16
model, training_loss, training_accuracy=main('vgg16', 224)
model

epoch = [1, 2, 3, 4, 5, 6, 7, 8]


plt.plot(epoch, training_loss, label='training_loss')
plt.plot(epoch, training_accuracy, label='training_accuracy')
plt.plot()
plt.xlabel("epochs")
plt.ylabel("loss & accuracy")
plt.title("VGG16_preprocess(224x224)")
plt.legend()
plt.show()

In [None]:
#VGG19_32
model, training_loss, training_accuracy=main('vgg19', 32)
model

epoch = [1, 2, 3, 4, 5, 6, 7, 8]


plt.plot(epoch, training_loss, label='training_loss')
plt.plot(epoch, training_accuracy, label='training_accuracy')
plt.plot()
plt.xlabel("epochs")
plt.ylabel("loss & accuracy")
plt.title("VGG19_preprocess(32x32)")
plt.legend()
plt.show()

In [None]:
#VGG19_224
model, training_loss, training_accuracy=main('vgg19', 224)
model

epoch = [1, 2, 3, 4, 5, 6, 7, 8]


plt.plot(epoch, training_loss, label='training_loss')
plt.plot(epoch, training_accuracy, label='training_accuracy')
plt.plot()
plt.xlabel("epochs")
plt.ylabel("loss & accuracy")
plt.title("VGG19_preprocess(224)")
plt.legend()
plt.show()

In [None]:
#Resnet50_32
model, training_loss, training_accuracy=main('resnet50', 32)
model

epoch = [1, 2, 3, 4, 5, 6, 7, 8]


plt.plot(epoch, training_loss, label='training_loss')
plt.plot(epoch, training_accuracy, label='training_accuracy')
plt.plot()
plt.xlabel("epochs")
plt.ylabel("loss & accuracy")
plt.title("Resnet50_preprocess(32x32)")
plt.legend()
plt.show()

In [None]:
#Resnet50
model = main('resnet50', 224)