In [6]:
from datetime import datetime
from time import time
import numpy as np
import shutil, random, os, sys, torch
from glob import glob
import wandb
from torch.utils.data import DataLoader
from torchvision import datasets
from sklearn.model_selection import train_test_split
prj_dir = os.getcwd()
sys.path.append(prj_dir)

from modules.utils import load_yaml, get_logger
from modules.metrics import get_metric_function
from modules.earlystoppers import EarlyStopper
from modules.losses import get_loss_function
from modules.optimizers import get_optimizer
from modules.schedulers import get_scheduler
from modules.scalers import get_image_scaler
from modules.transforms import get_transform_function
from modules.datasets import get_dataset_function
from modules.recorders import Recorder
from modules.trainer import Trainer
from models.utils import get_model

In [None]:
"""Train
"""
from datetime import datetime
from time import time
import numpy as np
import shutil, random, os, sys, torch
from glob import glob
import wandb
from torch.utils.data import DataLoader
from torchvision import datasets
from sklearn.model_selection import train_test_split
prj_dir = os.getcwd()
sys.path.append(prj_dir)

from modules.utils import load_yaml, get_logger
from modules.metrics import get_metric_function
from modules.earlystoppers import EarlyStopper
from modules.losses import get_loss_function
from modules.optimizers import get_optimizer
from modules.schedulers import get_scheduler
from modules.scalers import get_image_scaler
from modules.transforms import get_transform_function
from modules.datasets import get_dataset_function
from modules.recorders import Recorder
from modules.trainer import Trainer
from models.utils import get_model

# Load config
config_path = os.path.join(prj_dir, 'config', 'train.yaml')
config = load_yaml(config_path)

# Set train serial: ex) 20211004
train_serial = datetime.now().strftime("%Y%m%d_%H%M%S")
train_serial = 'debug' if config['debug'] else train_serial

wandb.init(
        project=config['project_name'],
        config={
            "architecture": config['model']['model_name'],
            "dataset": config['dataset_name'],
            "notes": config['wandb_note'],
        },
        name=config['run_name'],
)

# Set random seed, deterministic
torch.cuda.manual_seed(config['seed'])
torch.manual_seed(config['seed'])
np.random.seed(config['seed'])
random.seed(config['seed'])
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Set device(GPU/CPU)
os.environ['CUDA_VISIBLE_DEVICES'] = str(config['gpu_num'])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create train result directory and set logger
train_result_dir = os.path.join(prj_dir, 'results', 'train', train_serial)
os.makedirs(train_result_dir, exist_ok=True)

# Set logger
logging_level = 'debug' if config['verbose'] else 'info'
logger = get_logger(name='train',
                    file_path=os.path.join(train_result_dir, 'train.log'),
                    level=logging_level)


# Set data directory
train_dirs = os.path.join(prj_dir, 'data', 'train')

# Load data and create dataset for train 
# Load image scaler
# train_img_paths = glob(os.path.join(train_dirs, 'x', '*.png'))
# train_img_paths, val_img_paths = train_test_split(train_img_paths, test_size=config['val_size'], random_state=config['seed'], shuffle=True)
transforms = get_transform_function(config['transform_name'],config=config)

train_dataset = get_dataset_function(config['dataset_name'])
val_dataset = get_dataset_function(config['dataset_name'])
train_dataset = train_dataset(config['train_data_path'],transform=transforms)
val_dataset = val_dataset(config['val_data_path'],transform=transforms)

# train_dataset = train_dataset(paths=train_img_paths,
#                         input_size=[config['input_width'], config['input_height']],
#                         scaler=get_image_scaler(config['scaler']),
#                         logger=logger)
# val_dataset = val_dataset(paths=val_img_paths,
#                         input_size=[config['input_width'], config['input_height']],
#                         scaler=get_image_scaler(config['scaler']),
                        # logger=logger)
# Create data loader
train_dataloader = DataLoader(dataset=train_dataset,
                            batch_size=config['batch_size'],
                            num_workers=config['num_workers'], 
                            shuffle=config['shuffle'],
                            drop_last=config['drop_last'])
                            
val_dataloader = DataLoader(dataset=val_dataset,
                            batch_size=config['batch_size'],
                            num_workers=config['num_workers'], 
                            shuffle=False,
                            drop_last=config['drop_last'])

logger.info(f"Load dataset, train: {len(train_dataset)}, val: {len(val_dataset)}")

# Load model
model = get_model(model_str=config['model']['model_name'])
model = model(**config['model']['args']).to(device)
logger.info(f"Load model architecture: {config['model']['model_name']}")

# Set optimizer
optimizer = get_optimizer(optimizer_str=config['optimizer']['name'])
optimizer = optimizer(model.parameters(), **config['optimizer']['args'])

# Set Scheduler
scheduler = get_scheduler(scheduler_str=config['scheduler']['name'])
scheduler = scheduler(optimizer=optimizer, **config['scheduler']['args'])

# Set loss function
loss_func = get_loss_function(loss_function_str=config['loss']['name'])
loss_func = loss_func(**config['loss']['args'])

# Set metric
metric_funcs = {metric_name:get_metric_function(metric_name,device) for metric_name in config['metrics']}
logger.info(f"Load optimizer:{config['optimizer']['name']}, scheduler: {config['scheduler']['name']}, loss: {config['loss']['name']}, metric: {config['metrics']}")

# Set trainer
trainer = Trainer(model=model,
                optimizer=optimizer,
                scheduler=scheduler,
                loss_func=loss_func,
                metric_funcs=metric_funcs,
                device=device,
                logger=logger)
logger.info(f"Load trainer")

# Set early stopper
early_stopper = EarlyStopper(patience=config['earlystopping_patience'],
                            logger=logger)
# Set recorder
recorder = Recorder(record_dir=train_result_dir,
                    model=model,
                    optimizer=optimizer,
                    scheduler=scheduler,
                    logger=logger)
logger.info("Load early stopper, recorder")

# Recorder - save train config
shutil.copy(config_path, os.path.join(recorder.record_dir, 'train.yaml'))

# Train
print("START TRAINING")
logger.info("START TRAINING")
for epoch_id in range(config['n_epochs']):
    
    # Initiate result row
    row = dict()
    row['epoch_id'] = epoch_id
    row['train_serial'] = train_serial
    row['lr'] = trainer.scheduler.get_last_lr()

    # Train
    print(f"Epoch {epoch_id}/{config['n_epochs']} Train..")
    logger.info(f"Epoch {epoch_id}/{config['n_epochs']} Train..")
    tic = time()
    trainer.train(dataloader=train_dataloader, epoch_index=epoch_id)
    toc = time()
    # Write tarin result to result row
    row['train_loss'] = trainer.loss  # Loss
    for metric_name, metric_score in trainer.scores.items():
        row[f'train_{metric_name}'] = metric_score

    row['train_elapsed_time'] = round(toc-tic, 1)
    # Clear
    trainer.clear_history()

    # Validation
    print(f"Epoch {epoch_id}/{config['n_epochs']} Validation..")
    logger.info(f"Epoch {epoch_id}/{config['n_epochs']} Validation..")
    tic = time()
    trainer.validate(dataloader=val_dataloader, epoch_index=epoch_id)
    toc = time()
    row['val_loss'] = trainer.loss
    # row[f"val_{config['metric']}"] = trainer.score
    for metric_name, metric_score in trainer.scores.items():
        row[f'val_{metric_name}'] = metric_score
    row['val_elapsed_time'] = round(toc-tic, 1)
    trainer.clear_history()

    # Performance record - row
    recorder.add_row(row)
    recorder.wandb_mlflow_log(model=model, log_dict=row, sample_image=None)
    
    # Performance record - plot
    recorder.save_plot(config['plot'])

    # Check early stopping
    early_stopper.check_early_stopping(row[config['earlystopping_target']])
    if early_stopper.patience_counter == 0:
        recorder.save_weight(epoch=epoch_id)
        
    if early_stopper.stop:
        print(f"Epoch {epoch_id}/{config['n_epochs']}, Stopped counter {early_stopper.patience_counter}/{config['earlystopping_patience']}")
        logger.info(f"Epoch {epoch_id}/{config['n_epochs']}, Stopped counter {early_stopper.patience_counter}/{config['earlystopping_patience']}")
        break

print("END TRAINING")
logger.info("END TRAINING")

In [74]:
%pip install -q "openvino>=2023.1.0"

Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
idx = "20240411_110001"
BASE_PATH = os.path.join('/home/hojun/Documents/project/ma2024/pytorch_template/results/train/',idx)
val_path = "/home/hojun/Documents/project/ma2024/pytorch_template/dataset/val/"
test_path = "/home/hojun/Documents/project/ma2024/pytorch_template/dataset/test/"
os.makedirs(os.path.join(BASE_PATH,"bin"),exist_ok=True)


In [2]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
test_datagen = ImageDataGenerator(rescale=1./255)

test_set = test_datagen.flow_from_directory(val_path,
                                            target_size=(224, 224),
                                            batch_size=1,
                                            shuffle=True,
                                            class_mode='categorical')

2024-04-11 10:52:01.149246: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Found 967 images belonging to 4 classes.


In [3]:
import pickle

# key 와 value 값을 바꾸어 줍니다.
class4 = dict()
for key,value in test_set.class_indices.items():
    class4[value] = key

with open(os.path.join(BASE_PATH,'bin/class4.pickle'), 'wb') as f:
    pickle.dump(class4, f)

In [4]:
import openvino
import openvino as ov
from openvino.tools.mo import convert_model
import torch
model = torch.load(os.path.join(BASE_PATH,'model.pth'))
print(os.path.join(BASE_PATH,'model.pth'))
model.eval()
dummy_input = torch.randn(1, 3, 224, 224).cuda()
torch.onnx.export(model, (dummy_input, ), os.path.join(BASE_PATH,'bin/model.onnx'))
ov_model = convert_model(os.path.join(BASE_PATH,'bin/model.onnx'))
ov.save_model(ov_model, os.path.join(BASE_PATH,'bin/model.xml'))

/home/hojun/Documents/project/ma2024/pytorch_template/results/train/20240411_105007/model.pth


  assert condition, message


In [5]:
# --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------
core = ov.Core()

In [6]:
# --------------------------- Step 2. Read a model --------------------------------------------------------------------
# (.xml and .bin files) or (.onnx file)
model = core.read_model(os.path.join(BASE_PATH,'bin/model.xml'))

if len(model.inputs) != 1: print('Sample supports only single input topologies')
if len(model.outputs) != 1: print('Sample supports only single output topologies')

In [7]:
import os

# Get a list of all files in the directory
file_list = [f for f in os.listdir(test_path) if os.path.isfile(os.path.join(test_path, f))]

# Print the list of files
print("List of files in the directory:")
for file in file_list:
    print(file)

List of files in the directory:
khHyOj.jpg
58JMlG.jpg
52ZRFq.jpg
j_9B__.jpg
ckT57b.jpg
OU36zZ.jpg
_f2O2_.jpg
yWvPcR.jpg
3vzzvg.jpg
_zD2Jf.jpg
42cu0T.jpg
_RT0Y_.jpg
773_U_.jpg
2pQp_R.jpg
9EQ6_x.jpg
_YSzxr.jpg
y__z2X.jpg
IDf_aa.jpg
kb5Z12.jpg
6_FL4m.jpg
q8_f_K.jpg
uo7Cl9.jpg
emNl9g.jpg
Iyt754.jpg
222749.jpg
RD_crm.jpg
0i1vdc.jpg
__y21L.jpg
RpZ_tR.jpg
X2j_kg.jpg
Ez2_Zh.jpg
grfG9G.jpg
u_Q6d_.jpg
y5WgM5.jpg
XE6LE3.jpg
g_93g0.jpg
4phAXg.jpg
3Vd_nf.jpg
_XiB9M.jpg
3h6qMg.jpg
2K36D6.jpg
Yoh_7B.jpg
q_n3t8.jpg
17yo_A.jpg
2vO1ZL.jpg
rav22s.jpg
a7_wmZ.jpg
HG7lIZ.jpg
y8_4X3.jpg
zsn_HU.jpg
L6057C.jpg
_THcjc.jpg
kH3xjZ.jpg
X5_262.jpg
eFEk47.jpg
nUP60N.jpg
FabyeZ.jpg
SE4Cs2.jpg
95M4J9.jpg
p66_xg.jpg
pGgH1S.jpg
gwT5N2.jpg
e91eeP.jpg
KMU78U.jpg
RGkb_Z.jpg
4alBsX.jpg
LGq_iH.jpg
G6xE_B.jpg
X55109.jpg
8_1zBw.jpg
jW7Sv2.jpg
_34HAl.jpg
HUu_C_.jpg
s2HisM.jpg
3c3f2e.jpg
480doX.jpg
7U5_Xh.jpg
d_04v8.jpg
7_q7Sy.jpg
kTDUe0.jpg
523ke_.jpg
H6hU79.jpg
cD92Wa.jpg
pu3p3_.jpg
84pk7N.jpg
E6F9E6.jpg
137492.jpg
h3Z5k2.jpg


In [8]:
import pickle

with open(os.path.join(BASE_PATH,'bin/class4.pickle'), 'rb') as f:
    print(os.path.join(BASE_PATH,'bin/class4.pickle'))
    labels = pickle.load(f)

/home/hojun/Documents/project/ma2024/pytorch_template/results/train/20240411_105007/bin/class4.pickle


In [9]:
import pandas as pd
import cv2
import numpy as np

# --------------------------- Step 4. Loading model to the device -----------------------------------------------------
print('Loading the model to the plugin')
compiled_model = core.compile_model(model, 'CPU')
img_height = 224

result = []

res = open(os.path.join(BASE_PATH,"result.txt"), "w")

print('Starting inference in synchronous mode')
cnt = 0;

for file in file_list:
    # --------------------------- Step 5. Set up input --------------------------------------------------------------------
    # Read input image
    image_path = os.path.join(test_path, file)
    org_image = cv2.imread(image_path)
    image = cv2.resize(org_image, (img_height,img_height))
    #img = cv2.resize(img, (224,224), fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # image = image / 255.

    # Add N dimension
    nchw_tensor = np.expand_dims(image, 0)

    # NHWC -> NCHW
    input_tensor = np.transpose(nchw_tensor, (0,3,1,2))#(0,3,1,2))
    

    # --------------------------- Step 6. Create infer request and do inference synchronously -----------------------------
    results = compiled_model.infer_new_request({0:input_tensor})

    # --------------------------- Step 7. Process output ------------------------------------------------------------------
    predictions = next(iter(results.values()))
    output_node_name = next(iter(results.keys())) #
    #for (k,v) in results.items():
    #  print(k,'=',v)

    #print("values: ",end=' ')
    #print(results.values())

    # Change a shape of a numpy.ndarray with results to get another one with one dimension
    probs = predictions.reshape(-1)
    #print(outs)
    #print("probs: ",end=' ')
    #print(probs)

    # Get an array of 4 class IDs in descending order of probability
    top_4 = np.argsort(probs)[-4:][::-1]
    print(top_4)

    #res = res[output_node_name]
    #idx = np.argsort(res[0])[-1]
    #prob = res[0][idx]*100

    header = 'class_id            probability'
    print(f'Image path: {image_path}')
    print('Top 4 results: ')
    print(header)
    print('-' * len(header))

    for class_id in top_4:
        probability_indent = ' ' * (len('class_id           ') - len(labels[class_id]) + 1)
        print(f'{labels[class_id]}{probability_indent}{probs[class_id]*100:.7f}')

    #print(labels[top_4[0]], probs[top_4[0]]*100)
    #print(labels[top_4[1]], probs[top_4[1]]*100)
    #result.append(top_4[0])
    #res.write(file+':'+str(labels[top_4[0]])+'\n')
    res.write(file+':top1 = ('+labels[top_4[0]]+')'+str(probs[top_4[0]]*100)+', top2 = ('+labels[top_4[1]]+')'+str(probs[top_4[1]]*100)+'\n')

#res.write(str(result)+'\n')
res.close()

Loading the model to the plugin
Starting inference in synchronous mode
[3 1 2 0]
Image path: /home/hojun/Documents/project/ma2024/pytorch_template/dataset/test/khHyOj.jpg
Top 4 results: 
class_id            probability
-------------------------------
Pneumonia           596.8863964
NoPneumonia         7.8007139
Osteoarthritis      -241.8120146
NoOsteoarthritis    -303.2352448
[1 3 2 0]
Image path: /home/hojun/Documents/project/ma2024/pytorch_template/dataset/test/58JMlG.jpg
Top 4 results: 
class_id            probability
-------------------------------
NoPneumonia         94.8370636
Pneumonia           73.6230433
Osteoarthritis      -33.7906867
NoOsteoarthritis    -123.2651830
[0 2 3 1]
Image path: /home/hojun/Documents/project/ma2024/pytorch_template/dataset/test/52ZRFq.jpg
Top 4 results: 
class_id            probability
-------------------------------
NoOsteoarthritis    362.7389431
Osteoarthritis      346.4933395
Pneumonia           -202.8681755
NoPneumonia         -514.7834301
[0 2

In [1]:
import os
idx = "20240411_164123"
BASE_PATH = os.path.join('/home/hojun/Documents/project/ma2024/pytorch_template/results/train/',idx)
val_path = "/home/hojun/Documents/project/ma2024/pytorch_template/dataset/val/"
test_path = "/home/hojun/Documents/project/ma2024/pytorch_template/dataset/test/"
os.makedirs(os.path.join(BASE_PATH,"bin"),exist_ok=True)
from tensorflow.keras.preprocessing.image import ImageDataGenerator
test_datagen = ImageDataGenerator(rescale=1./255)

test_set = test_datagen.flow_from_directory(val_path,
                                            target_size=(224, 224),
                                            batch_size=1,
                                            shuffle=True,
                                            class_mode='categorical')
import pickle

# key 와 value 값을 바꾸어 줍니다.
class4 = dict()
for key,value in test_set.class_indices.items():
    class4[value] = key

with open(os.path.join(BASE_PATH,'bin/class4.pickle'), 'wb') as f:
    pickle.dump(class4, f)
import openvino
import openvino as ov
from openvino.tools.mo import convert_model
import torch
model = torch.load(os.path.join(BASE_PATH,'model.pth'))
print(os.path.join(BASE_PATH,'model.pth'))
model.eval()
dummy_input = torch.randn(1, 3, 224, 224).cuda()
torch.onnx.export(model, (dummy_input, ), os.path.join(BASE_PATH,'bin/model.onnx'))
ov_model = convert_model(os.path.join(BASE_PATH,'bin/model.onnx'))
# ov_model = convert_model(model)
ov.save_model(ov_model, os.path.join(BASE_PATH,'bin/model.xml'))
# --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------
core = ov.Core()
# --------------------------- Step 2. Read a model --------------------------------------------------------------------
# (.xml and .bin files) or (.onnx file)
model = core.read_model(os.path.join(BASE_PATH,'bin/model.xml'))

if len(model.inputs) != 1: print('Sample supports only single input topologies')
if len(model.outputs) != 1: print('Sample supports only single output topologies')
import os

# Get a list of all files in the directory
file_list = [f for f in os.listdir(test_path) if os.path.isfile(os.path.join(test_path, f))]
import pickle

with open(os.path.join(BASE_PATH,'bin/class4.pickle'), 'rb') as f:
    print(os.path.join(BASE_PATH,'bin/class4.pickle'))
    labels = pickle.load(f)
import pandas as pd
import cv2
import numpy as np

# --------------------------- Step 4. Loading model to the device -----------------------------------------------------
print('Loading the model to the plugin')
compiled_model = core.compile_model(model, 'CPU')
img_height = 224

result = []

res = open(os.path.join(BASE_PATH,"result.txt"), "w")

print('Starting inference in synchronous mode')
cnt = 0;

for file in file_list:
    # --------------------------- Step 5. Set up input --------------------------------------------------------------------
    # Read input image
    image_path = os.path.join(test_path, file)
    org_image = cv2.imread(image_path)
    image = cv2.resize(org_image, (img_height,img_height))
    #img = cv2.resize(img, (224,224), fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image / 255.

    # Add N dimension
    nchw_tensor = np.expand_dims(image, 0)

    # NHWC -> NCHW
    input_tensor = np.transpose(nchw_tensor, (0,3,1,2))#(0,3,1,2))
    

    # --------------------------- Step 6. Create infer request and do inference synchronously -----------------------------
    results = compiled_model.infer_new_request({0:input_tensor})

    # --------------------------- Step 7. Process output ------------------------------------------------------------------
    predictions = next(iter(results.values()))
    output_node_name = next(iter(results.keys())) #
    #for (k,v) in results.items():
    #  print(k,'=',v)

    #print("values: ",end=' ')
    #print(results.values())

    # Change a shape of a numpy.ndarray with results to get another one with one dimension
    probs = predictions.reshape(-1)
    #print(outs)
    #print("probs: ",end=' ')
    print(probs)

    # Get an array of 4 class IDs in descending order of probability
    top_4 = np.argsort(probs)[-4:][::-1]
    print(top_4)

    #res = res[output_node_name]
    #idx = np.argsort(res[0])[-1]
    #prob = res[0][idx]*100

    header = 'class_id            probability'
    print(f'Image path: {image_path}')
    print('Top 4 results: ')
    print(header)
    print('-' * len(header))

    for class_id in top_4:
        probability_indent = ' ' * (len('class_id           ') - len(labels[class_id]) + 1)
        print(f'{labels[class_id]}{probability_indent}{probs[class_id]*100:.7f}')

    #print(labels[top_4[0]], probs[top_4[0]]*100)
    #print(labels[top_4[1]], probs[top_4[1]]*100)
    #result.append(top_4[0])
    #res.write(file+':'+str(labels[top_4[0]])+'\n')
    res.write(file+':top1 = ('+labels[top_4[0]]+')'+str(probs[top_4[0]]*100)+', top2 = ('+labels[top_4[1]]+')'+str(probs[top_4[1]]*100)+'\n')

#res.write(str(result)+'\n')
res.close()

2024-04-11 17:05:59.401840: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Found 967 images belonging to 4 classes.
/home/hojun/Documents/project/ma2024/pytorch_template/results/train/20240411_164123/model.pth


  return forward_call(*args, **kwargs)


/home/hojun/Documents/project/ma2024/pytorch_template/results/train/20240411_164123/bin/class4.pickle
Loading the model to the plugin
Starting inference in synchronous mode
[8.7038457e-04 8.5998792e-04 7.3783117e-04 9.9753177e-01]
[3 0 1 2]
Image path: /home/hojun/Documents/project/ma2024/pytorch_template/dataset/test/khHyOj.jpg
Top 4 results: 
class_id            probability
-------------------------------
Pneumonia           99.7531772
NoOsteoarthritis    0.0870385
NoPneumonia         0.0859988
Osteoarthritis      0.0737831
[3.3353394e-04 9.9923241e-01 3.8087642e-04 5.3182361e-05]
[1 2 0 3]
Image path: /home/hojun/Documents/project/ma2024/pytorch_template/dataset/test/58JMlG.jpg
Top 4 results: 
class_id            probability
-------------------------------
NoPneumonia         99.9232411
Osteoarthritis      0.0380876
NoOsteoarthritis    0.0333534
Pneumonia           0.0053182
[5.5827486e-04 7.7623973e-04 9.9856263e-01 1.0286776e-04]
[2 1 0 3]
Image path: /home/hojun/Documents/project