In [2]:
from PIL import Image
import numpy as np
import os
import glob
import numpy as np
import pandas as pd

import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

import openpyxl
from tqdm import tqdm


In [3]:
os.getcwd()

'/home2/jh981017/myubai/machinelearning'

In [4]:
# 각 섹션별 이미지가 담겨 있는 폴더의 경로들을 가져옴

section_folders = glob.glob('/home2/jh981017/myubai/NaverNews/*')
section_folders

['/home2/jh981017/myubai/NaverNews/economy',
 '/home2/jh981017/myubai/NaverNews/life',
 '/home2/jh981017/myubai/NaverNews/politics',
 '/home2/jh981017/myubai/NaverNews/science',
 '/home2/jh981017/myubai/NaverNews/society',
 '/home2/jh981017/myubai/NaverNews/world']

In [5]:
root = '/home2/jh981017/myubai/NaverNews'
sections = os.listdir(root)
sections

['economy', 'life', 'politics', 'science', 'society', 'world']

In [6]:
text_paths = []
for folder, section in zip(section_folders, sections):
  text_path = folder + '/' + section + 'text1' + '.xlsx'
  text_paths.append(text_path)

text_paths

['/home2/jh981017/myubai/NaverNews/economy/economytext1.xlsx',
 '/home2/jh981017/myubai/NaverNews/life/lifetext1.xlsx',
 '/home2/jh981017/myubai/NaverNews/politics/politicstext1.xlsx',
 '/home2/jh981017/myubai/NaverNews/science/sciencetext1.xlsx',
 '/home2/jh981017/myubai/NaverNews/society/societytext1.xlsx',
 '/home2/jh981017/myubai/NaverNews/world/worldtext1.xlsx']

In [7]:
# 각 섹션별 사용할 데이터의 인덱스가 담겨 있는 딕셔너리

idx_dictionary = {}

for section, text_path in zip(sections, text_paths):
  text = pd.read_excel(text_path)
  idx_section = list(text['idx'])

  idx_dictionary[section] = idx_section

In [8]:
len(idx_dictionary['economy'])

1888

In [9]:
np.random.seed(602)

cv_idx_dictionary = {}

for section in sections:
  cv_idx_section = list(np.random.choice(idx_dictionary[section], size = 1200, replace = False))
  cv_idx_section.sort()

  cv_idx_dictionary[section] = cv_idx_section

In [10]:
len(cv_idx_dictionary['economy'])

1200

In [11]:
new_idx_dictionary = {}

for section in sections:
  new_idx_section = [i for i in idx_dictionary[section] if i not in cv_idx_dictionary[section]]

  new_idx_dictionary[section] = new_idx_section

In [12]:
len(new_idx_dictionary['economy'])

688

In [13]:
len(idx_dictionary['economy'])

1888

In [14]:
# 라벨에 해당하는 y값 매칭하기 (0 ~ 5)

# label_to_y = {section : idx for idx, section in enumerate(sections)}
# label_to_y

In [15]:
label_to_y = {
    'politics': 0,
    'society': 1,
    'science': 2,
    'life': 3,
    'world': 4,
    'economy': 5
}
label_to_y

{'politics': 0,
 'society': 1,
 'science': 2,
 'life': 3,
 'world': 4,
 'economy': 5}

In [16]:
# 분석에 사용할 모든 데이터들의 경로를 불러온다.

cv_data = []
new_data = []

for section_folder in section_folders:

  # 각 섹션 이름 가져와서 인덱스랑 합하기
  section = os.path.basename(section_folder)

  cv_indicies = cv_idx_dictionary[section]
  new_indicies = new_idx_dictionary[section]

  y = label_to_y[section]


  for cv_idx in cv_indicies:
    imgname = section + str(cv_idx) + '.jpg'
    imgpath = os.path.join(section_folder, imgname)

    data = []
    data.append(cv_idx)
    data.append(imgpath)
    data.append(y)

    cv_data.append(data)


  for new_idx in new_idx_dictionary[section]:
    imgname = section + str(new_idx) + '.jpg'
    imgpath = os.path.join(section_folder, imgname)

    data = []
    data.append(cv_idx)
    data.append(imgpath)
    data.append(y)

    new_data.append(data)

In [17]:
cv_data[0:10]

[[3, '/home2/jh981017/myubai/NaverNews/economy/economy3.jpg', 5],
 [6, '/home2/jh981017/myubai/NaverNews/economy/economy6.jpg', 5],
 [9, '/home2/jh981017/myubai/NaverNews/economy/economy9.jpg', 5],
 [13, '/home2/jh981017/myubai/NaverNews/economy/economy13.jpg', 5],
 [16, '/home2/jh981017/myubai/NaverNews/economy/economy16.jpg', 5],
 [17, '/home2/jh981017/myubai/NaverNews/economy/economy17.jpg', 5],
 [18, '/home2/jh981017/myubai/NaverNews/economy/economy18.jpg', 5],
 [22, '/home2/jh981017/myubai/NaverNews/economy/economy22.jpg', 5],
 [23, '/home2/jh981017/myubai/NaverNews/economy/economy23.jpg', 5],
 [25, '/home2/jh981017/myubai/NaverNews/economy/economy25.jpg', 5]]

In [18]:
len(cv_data)

7200

In [19]:
len(new_data)

5720

In [20]:
# 이미지 데이터셋 만들기

class RoBaMFImageDataset(Dataset):
  def __init__(self, dataset, img_idx = 1, label_idx = 2, transform = None):
    super(RoBaMFImageDataset, self).__init__()

    self.imgpaths = [i[img_idx] for i in dataset]
    self.y = [np.int32(i[label_idx]) for i in dataset]
    self.transform = transform


  def __len__(self):

    return len(self.imgpaths)


  def __getitem__(self, idx):
    imgpath = self.imgpaths[idx]

    img = Image.open(imgpath).convert('RGB')
    img = self.transform(img)

    target = self.y[idx]

    return img, target

In [21]:
# 사용할 모형에 맞게 이미지를 transform & normalize

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    #transforms.Resize(256),
    #transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    #transforms.Resize(256),
    #transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [22]:
class ImageModel(nn.Module):
  def __init__(self, mobilenetv2):
    super(ImageModel, self).__init__()

    self.mobilenetv2 = mobilenetv2

    self.fc = nn.Sequential(
              nn.Linear(1000, 1024),
              nn.ReLU(),
              nn.Linear(1024, 1024),
              nn.ReLU(),
              nn.Linear(1024, 6),
              nn.Softmax(1)
              )


  def forward(self, img):
    x = self.mobilenetv2(img)
    x = self.fc(x)

    return x

In [23]:
# 모형의 가중치 업데이트를 위한 모듈

def model_train(model, data_loader, loss_fn, optimizer, device):

  model.train()
  size = len(data_loader.dataset)

  progress_bar = tqdm(data_loader)

  corr = 0
  running_loss = 0

  for X, y in progress_bar:
    X, y = X.to(device), y.long().to(device)

    # 예측하고 크로스엔트로피 계산
    pred = model(X)
    loss = loss_fn(pred, y)

    # 그래티언트 초기화
    optimizer.zero_grad()

    # 역전파 알고리즘에 의한 그래디언트 계산
    loss.backward()

    # 그래디언트를 이용한 업데이트
    optimizer.step()

    # accuracy 계산을 위한 정답 개수 계산
    corr += (pred.argmax(1) == y).type(torch.float).sum().item()

    # 평균 크로스엔트로피 계산을 위한 합
    running_loss += loss.item() * X.size(0)


  # accuracy
  accuracy = corr / size
  running_loss = running_loss / size

  return accuracy, running_loss

In [24]:
# 모형 평가를 위한 모듈

def model_evaluate(model, data_loader, loss_fn, device):

  size = len(data_loader.dataset)
  model.eval()

  with torch.no_grad():
    corr = 0
    running_loss = 0

    for X, y in data_loader:
      X, y = X.to(device), y.long().to(device)


      # 예측 확률 계산
      pred = model(X)
      loss = loss_fn(pred, y)

      # accuracy 계산을 위한 정답 개수 계산
      corr += (pred.argmax(1) == y).type(torch.float).sum().item()

      # 평균 크로스엔트로피 계산을 위한 합
      running_loss += loss.item() * X.size(0)

  # accuracy
  accuracy = corr / size
  running_loss = running_loss / size

  return accuracy, running_loss

Stratified K-fold CV

In [25]:
#lr=0.2, patence=3, factor=0.3/ adamw, rmsprop/ lr한번 조절해보자!/loss fuction도..? 0.25앵간했음
#RBGA로 안나타나는 png파일을
#from PIL import Image

#image = Image.open('path_to_your_image.png')
#image = image.convert('RGBA')
#위 방식처럼 rgba방식으로 표현가능

In [26]:
from sklearn.model_selection import StratifiedKFold

cv = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 602)

In [27]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [28]:
df_cv_data = pd.DataFrame(cv_data)
df_new_data = pd.DataFrame(new_data)

In [29]:
list_train_idx = []
list_test_idx = []

for train_idx, test_idx in cv.split(df_cv_data[1], df_cv_data[2]):
  list_train_idx.append(train_idx)
  list_test_idx.append(test_idx)

In [51]:
max_epoch = 10  ### 에포크 수정 ###

fold = 0

list_test_history = []

for train_idx, test_idx in zip(list_train_idx, list_test_idx):
  fold += 1


  # 모델 초기화
  mobilenetv2 = models.mobilenet_v2(pretrained = 'IMAGENET1K_V2')
  model = ImageModel(mobilenetv2)

  if torch.cuda.is_available():
    model.cuda()


  train_data = [cv_data[i] for i in train_idx]
  test_data = [cv_data[i] for i in test_idx]

  train_dataset = RoBaMFImageDataset(dataset = train_data, img_idx = 1, label_idx = 2, transform = train_transform)
  test_dataset = RoBaMFImageDataset(dataset = test_data, img_idx = 1, label_idx = 2, transform = test_transform)

  train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True)
  test_loader = DataLoader(test_dataset, batch_size = 32, shuffle = True)


  optimizer = optim.SGD(model.parameters(), lr = 3.66 * 1e-3)
  scheduler = ReduceLROnPlateau(optimizer, mode = 'min', patience = 3, factor = 0.3)
  loss_fn = nn.CrossEntropyLoss()


  test_history = []
  for epoch in range(max_epoch):

    # 각 에포크별 모형 훈련 -> train accuracy와 손실함수 반환
    train_accuracy, train_loss = model_train(model, train_loader, loss_fn, optimizer, device)

    # 시험 데이터에 모형 적합 -> valiation accuray와 손실함수 반환
    val_accuracy, val_loss = model_evaluate(model, test_loader, loss_fn, device)

    scheduler.step(train_loss)

    test_history.append(val_accuracy)

    print(f'''fold {fold:d},  epoch {epoch + 1:02d} -------------------------------------------------- \n
            train_accuracy: {train_accuracy:.5f}, train_loss: {train_loss:.5f}, val_accuracy: {val_accuracy:.5f}, val_loss: {val_loss:.5f} \n\n''')

  print(f'''=========================================================================== \n
              fold {fold:d}  Ended.  \n
              =========================================================================== \n ''')


  # 모형 가중치 저장
  torch.save(model.state_dict(), f'/home2/jh981017/myubai/machinelearning/Model Weights/ImageWeight{fold}.pth')

  list_test_history.append(test_history)


100%|██████████| 180/180 [00:43<00:00,  4.14it/s]


fold 1,  epoch 01 -------------------------------------------------- 

            train_accuracy: 0.21510, train_loss: 1.78646, val_accuracy: 0.23125, val_loss: 1.78036 




100%|██████████| 180/180 [00:41<00:00,  4.33it/s]


fold 1,  epoch 02 -------------------------------------------------- 

            train_accuracy: 0.25729, train_loss: 1.76511, val_accuracy: 0.24931, val_loss: 1.75890 




100%|██████████| 180/180 [00:41<00:00,  4.34it/s]


fold 1,  epoch 03 -------------------------------------------------- 

            train_accuracy: 0.28212, train_loss: 1.74051, val_accuracy: 0.25903, val_loss: 1.74003 




100%|██████████| 180/180 [00:41<00:00,  4.34it/s]


fold 1,  epoch 04 -------------------------------------------------- 

            train_accuracy: 0.30990, train_loss: 1.71823, val_accuracy: 0.28542, val_loss: 1.72380 




100%|██████████| 180/180 [00:41<00:00,  4.34it/s]


fold 1,  epoch 05 -------------------------------------------------- 

            train_accuracy: 0.33264, train_loss: 1.69942, val_accuracy: 0.31389, val_loss: 1.70782 




100%|██████████| 180/180 [00:41<00:00,  4.33it/s]


fold 1,  epoch 06 -------------------------------------------------- 

            train_accuracy: 0.35677, train_loss: 1.68178, val_accuracy: 0.32708, val_loss: 1.69379 




100%|██████████| 180/180 [00:41<00:00,  4.34it/s]


fold 1,  epoch 07 -------------------------------------------------- 

            train_accuracy: 0.37622, train_loss: 1.66391, val_accuracy: 0.34722, val_loss: 1.68115 




100%|██████████| 180/180 [00:41<00:00,  4.33it/s]


fold 1,  epoch 08 -------------------------------------------------- 

            train_accuracy: 0.39149, train_loss: 1.64672, val_accuracy: 0.36389, val_loss: 1.66944 




100%|██████████| 180/180 [00:41<00:00,  4.33it/s]


fold 1,  epoch 09 -------------------------------------------------- 

            train_accuracy: 0.41302, train_loss: 1.62785, val_accuracy: 0.36944, val_loss: 1.65768 




100%|██████████| 180/180 [00:41<00:00,  4.35it/s]


fold 1,  epoch 10 -------------------------------------------------- 

            train_accuracy: 0.42569, train_loss: 1.61349, val_accuracy: 0.38125, val_loss: 1.64639 



              fold 1  Ended.  

 


100%|██████████| 180/180 [00:40<00:00,  4.40it/s]


fold 2,  epoch 01 -------------------------------------------------- 

            train_accuracy: 0.20451, train_loss: 1.78694, val_accuracy: 0.24792, val_loss: 1.78026 




100%|██████████| 180/180 [00:41<00:00,  4.38it/s]


fold 2,  epoch 02 -------------------------------------------------- 

            train_accuracy: 0.28368, train_loss: 1.76861, val_accuracy: 0.27778, val_loss: 1.75800 




100%|██████████| 180/180 [00:41<00:00,  4.37it/s]


fold 2,  epoch 03 -------------------------------------------------- 

            train_accuracy: 0.30677, train_loss: 1.73779, val_accuracy: 0.28681, val_loss: 1.72999 




100%|██████████| 180/180 [00:41<00:00,  4.37it/s]


fold 2,  epoch 04 -------------------------------------------------- 

            train_accuracy: 0.31788, train_loss: 1.70989, val_accuracy: 0.30556, val_loss: 1.71200 




100%|██████████| 180/180 [00:41<00:00,  4.37it/s]


fold 2,  epoch 05 -------------------------------------------------- 

            train_accuracy: 0.34965, train_loss: 1.68695, val_accuracy: 0.32083, val_loss: 1.69605 




100%|██████████| 180/180 [00:41<00:00,  4.39it/s]


fold 2,  epoch 06 -------------------------------------------------- 

            train_accuracy: 0.36719, train_loss: 1.66843, val_accuracy: 0.35000, val_loss: 1.68173 




100%|██████████| 180/180 [00:40<00:00,  4.39it/s]


fold 2,  epoch 07 -------------------------------------------------- 

            train_accuracy: 0.39427, train_loss: 1.64887, val_accuracy: 0.35625, val_loss: 1.67116 




100%|██████████| 180/180 [00:40<00:00,  4.41it/s]


fold 2,  epoch 08 -------------------------------------------------- 

            train_accuracy: 0.41441, train_loss: 1.62841, val_accuracy: 0.35833, val_loss: 1.66171 




100%|██████████| 180/180 [00:41<00:00,  4.32it/s]


fold 2,  epoch 09 -------------------------------------------------- 

            train_accuracy: 0.43785, train_loss: 1.60782, val_accuracy: 0.36181, val_loss: 1.65706 




100%|██████████| 180/180 [00:41<00:00,  4.35it/s]


fold 2,  epoch 10 -------------------------------------------------- 

            train_accuracy: 0.45156, train_loss: 1.59227, val_accuracy: 0.36458, val_loss: 1.65298 



              fold 2  Ended.  

 


100%|██████████| 180/180 [00:41<00:00,  4.39it/s]


fold 3,  epoch 01 -------------------------------------------------- 

            train_accuracy: 0.22743, train_loss: 1.78409, val_accuracy: 0.27778, val_loss: 1.77380 




100%|██████████| 180/180 [00:41<00:00,  4.38it/s]


fold 3,  epoch 02 -------------------------------------------------- 

            train_accuracy: 0.27431, train_loss: 1.76336, val_accuracy: 0.30833, val_loss: 1.74627 




100%|██████████| 180/180 [00:41<00:00,  4.35it/s]


fold 3,  epoch 03 -------------------------------------------------- 

            train_accuracy: 0.29462, train_loss: 1.73423, val_accuracy: 0.31736, val_loss: 1.71768 




100%|██████████| 180/180 [00:41<00:00,  4.36it/s]


fold 3,  epoch 04 -------------------------------------------------- 

            train_accuracy: 0.31667, train_loss: 1.70896, val_accuracy: 0.33056, val_loss: 1.69976 




100%|██████████| 180/180 [00:41<00:00,  4.35it/s]


fold 3,  epoch 05 -------------------------------------------------- 

            train_accuracy: 0.34167, train_loss: 1.68795, val_accuracy: 0.35347, val_loss: 1.68369 




100%|██████████| 180/180 [00:41<00:00,  4.35it/s]


fold 3,  epoch 06 -------------------------------------------------- 

            train_accuracy: 0.37031, train_loss: 1.66834, val_accuracy: 0.36528, val_loss: 1.67170 




100%|██████████| 180/180 [00:41<00:00,  4.34it/s]


fold 3,  epoch 07 -------------------------------------------------- 

            train_accuracy: 0.39549, train_loss: 1.64555, val_accuracy: 0.37569, val_loss: 1.65775 




100%|██████████| 180/180 [00:41<00:00,  4.38it/s]


fold 3,  epoch 08 -------------------------------------------------- 

            train_accuracy: 0.41285, train_loss: 1.62807, val_accuracy: 0.38194, val_loss: 1.64851 




100%|██████████| 180/180 [00:40<00:00,  4.40it/s]


fold 3,  epoch 09 -------------------------------------------------- 

            train_accuracy: 0.43108, train_loss: 1.60992, val_accuracy: 0.38403, val_loss: 1.64296 




100%|██████████| 180/180 [00:41<00:00,  4.38it/s]


fold 3,  epoch 10 -------------------------------------------------- 

            train_accuracy: 0.44826, train_loss: 1.59253, val_accuracy: 0.39514, val_loss: 1.63572 



              fold 3  Ended.  

 


100%|██████████| 180/180 [00:41<00:00,  4.38it/s]


fold 4,  epoch 01 -------------------------------------------------- 

            train_accuracy: 0.21163, train_loss: 1.78586, val_accuracy: 0.24792, val_loss: 1.77899 




100%|██████████| 180/180 [00:41<00:00,  4.37it/s]


fold 4,  epoch 02 -------------------------------------------------- 

            train_accuracy: 0.28628, train_loss: 1.76906, val_accuracy: 0.31042, val_loss: 1.75383 




100%|██████████| 180/180 [00:41<00:00,  4.37it/s]


fold 4,  epoch 03 -------------------------------------------------- 

            train_accuracy: 0.30920, train_loss: 1.73885, val_accuracy: 0.31458, val_loss: 1.71828 




100%|██████████| 180/180 [00:41<00:00,  4.39it/s]


fold 4,  epoch 04 -------------------------------------------------- 

            train_accuracy: 0.32205, train_loss: 1.71184, val_accuracy: 0.33542, val_loss: 1.69988 




100%|██████████| 180/180 [00:40<00:00,  4.40it/s]


fold 4,  epoch 05 -------------------------------------------------- 

            train_accuracy: 0.34514, train_loss: 1.69363, val_accuracy: 0.36111, val_loss: 1.68399 




100%|██████████| 180/180 [00:40<00:00,  4.41it/s]


fold 4,  epoch 06 -------------------------------------------------- 

            train_accuracy: 0.36684, train_loss: 1.67305, val_accuracy: 0.37014, val_loss: 1.66927 




100%|██████████| 180/180 [00:41<00:00,  4.37it/s]


fold 4,  epoch 07 -------------------------------------------------- 

            train_accuracy: 0.39010, train_loss: 1.65104, val_accuracy: 0.37986, val_loss: 1.65522 




100%|██████████| 180/180 [00:42<00:00,  4.26it/s]


fold 4,  epoch 08 -------------------------------------------------- 

            train_accuracy: 0.40417, train_loss: 1.63588, val_accuracy: 0.38472, val_loss: 1.64779 




100%|██████████| 180/180 [00:41<00:00,  4.36it/s]


fold 4,  epoch 09 -------------------------------------------------- 

            train_accuracy: 0.42743, train_loss: 1.61398, val_accuracy: 0.39306, val_loss: 1.63976 




100%|██████████| 180/180 [00:41<00:00,  4.37it/s]


fold 4,  epoch 10 -------------------------------------------------- 

            train_accuracy: 0.44010, train_loss: 1.59917, val_accuracy: 0.39306, val_loss: 1.63564 



              fold 4  Ended.  

 


100%|██████████| 180/180 [00:40<00:00,  4.41it/s]


fold 5,  epoch 01 -------------------------------------------------- 

            train_accuracy: 0.20677, train_loss: 1.78680, val_accuracy: 0.24514, val_loss: 1.77957 




100%|██████████| 180/180 [00:40<00:00,  4.40it/s]


fold 5,  epoch 02 -------------------------------------------------- 

            train_accuracy: 0.25729, train_loss: 1.76893, val_accuracy: 0.27222, val_loss: 1.75538 




100%|██████████| 180/180 [00:40<00:00,  4.41it/s]


fold 5,  epoch 03 -------------------------------------------------- 

            train_accuracy: 0.28889, train_loss: 1.74616, val_accuracy: 0.29167, val_loss: 1.73222 




100%|██████████| 180/180 [00:40<00:00,  4.40it/s]


fold 5,  epoch 04 -------------------------------------------------- 

            train_accuracy: 0.31771, train_loss: 1.72264, val_accuracy: 0.31597, val_loss: 1.71046 




100%|██████████| 180/180 [00:41<00:00,  4.39it/s]


fold 5,  epoch 05 -------------------------------------------------- 

            train_accuracy: 0.32969, train_loss: 1.70296, val_accuracy: 0.33889, val_loss: 1.69360 




100%|██████████| 180/180 [00:40<00:00,  4.40it/s]


fold 5,  epoch 06 -------------------------------------------------- 

            train_accuracy: 0.35278, train_loss: 1.68478, val_accuracy: 0.35139, val_loss: 1.67799 




100%|██████████| 180/180 [00:40<00:00,  4.41it/s]


fold 5,  epoch 07 -------------------------------------------------- 

            train_accuracy: 0.37309, train_loss: 1.66550, val_accuracy: 0.37708, val_loss: 1.66437 




100%|██████████| 180/180 [00:40<00:00,  4.42it/s]


fold 5,  epoch 08 -------------------------------------------------- 

            train_accuracy: 0.38993, train_loss: 1.64552, val_accuracy: 0.38611, val_loss: 1.65245 




100%|██████████| 180/180 [00:40<00:00,  4.41it/s]


fold 5,  epoch 09 -------------------------------------------------- 

            train_accuracy: 0.41128, train_loss: 1.62969, val_accuracy: 0.39931, val_loss: 1.64102 




100%|██████████| 180/180 [00:40<00:00,  4.40it/s]


fold 5,  epoch 10 -------------------------------------------------- 

            train_accuracy: 0.42500, train_loss: 1.61254, val_accuracy: 0.39583, val_loss: 1.63153 



              fold 5  Ended.  

 


In [42]:
df_test_history = pd.DataFrame(list_test_history)
df_test_history.to_csv('/home2/jh981017/myubai/machinelearning/Baseline CVs/Image Baseline.csv')

New Data Prediction

In [43]:
new_data[1]

[2231, '/home2/jh981017/myubai/NaverNews/economy/economy15.jpg', 5]

In [1]:
max_epoch = 15   ### 에포크 수정 ###

# 모델 초기화
mobilenetv2 = models.mobilenet_v2(weights = 'DEFAULT')
# mobilenetv2 = models.mobilenet_v2(pretrained = 'IMAGENET1K_V2')
model = ImageModel(mobilenetv2)

for i, (name, param) in enumerate(model.named_parameters()):
  param.requires_grad = False
  if i == 155:
    break

if torch.cuda.is_available():
  model.cuda()


train_data = cv_data
test_data = new_data

train_dataset = RoBaMFImageDataset(dataset = train_data, img_idx = 1, label_idx = 2, transform = train_transform)
test_dataset = RoBaMFImageDataset(dataset = test_data, img_idx = 1, label_idx = 2, transform = test_transform)

train_loader = DataLoader(train_dataset, batch_size = 16, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = 16, shuffle = True)

optimizer = optim.SGD(model.parameters(), lr = 3.66 * 1e-3)
# optimizer = optim.Adam(model.parameters(), lr = 0.2)
scheduler = ReduceLROnPlateau(optimizer, mode = 'min', patience = 3, factor = 0.3)
loss_fn = nn.CrossEntropyLoss()


test_history = []
for epoch in range(max_epoch):

  # 각 에포크별 모형 훈련 -> train accuracy와 손실함수 반환
  train_accuracy, train_loss = model_train(model, train_loader, loss_fn, optimizer, device)

  # 시험 데이터에 모형 적합 -> valiation accuray와 손실함수 반환
  val_accuracy, val_loss = model_evaluate(model, test_loader, loss_fn, device)

  scheduler.step(train_loss)

  test_history.append(val_accuracy)

  print(f'''epoch {epoch + 1:02d} -------------------------------------------------- \n
          train_accuracy: {train_accuracy:.5f}, train_loss: {train_loss:.5f}, val_accuracy: {val_accuracy:.5f}, val_loss: {val_loss:.5f} \n\n''')



NameError: name 'models' is not defined

In [49]:
df_test_history = pd.DataFrame(test_history)
df_test_history.to_csv('/home2/jh981017/myubai/machinelearning/Baseline CVs/Image Baseline_new.csv')

In [50]:
df_test_history

Unnamed: 0,0
0,0.200874
1,0.243706
2,0.256643
3,0.29021
4,0.28951
5,0.290559
6,0.31451
7,0.308042
8,0.329196
9,0.344406
