In [1]:
import os
import time
import random
import pandas as pd
import os
import shutil

In [2]:
from torch.utils.data import Dataset, DataLoader
import albumentations as A
import torch
import numpy as np
from albumentations.pytorch import ToTensorV2
from PIL import Image
import random

# training config
img_size = 256
LR = 1e-3
EPOCHS = 30
BATCH_SIZE = 32
num_workers = 0

# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target
    
# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

tst_dataset = ImageDataset(
    "data/sample_submission.csv",
    "data/test/",
    transform=tst_transform
)
print(len(tst_dataset))

tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

3140


In [3]:
import pandas as pd
import os

# 클래스 이름 리스트
class_names = [
    '0_account_number',
    '1_application_for_payment_of_pregnancy_medical_expenses',
    '2_car_dashboard',
    '3_confirmation_of_admission_and_discharge',
    '4_diagnosis',
    '5_driver_lisence',
    '6_medical_bill_receipts',
    '7_medical_outpatient_certificate',
    '8_national_id_card',
    '9_passport',
    '10_payment_confirmation',
    '11_pharmaceutical_receipt',
    '12_prescription',
    '13_resume',
    '14_state_of_opinion',
    '15_vehicle_registration_certificate',
    '16_vehicle_registration_plate'
]



In [49]:
pred_list =  os.listdir('./pred/')

In [50]:
# human 정답 csv
answer_df = pd.read_csv("data/test_predictions.csv") # human 


In [53]:
for csv_file in pred_list:
    # 모델이 예측한 pred_df
    pred_df = pd.read_csv('./pred/'+csv_file)
    assert (answer_df['ID'] == pred_df['ID']).all()

    # 각 target별로 일치하는 비율 계산
    target_counts = pred_df.groupby('target').size()
    correct_counts = pred_df[pred_df['target'] == answer_df['target']].groupby('target').size()

    # 결과 출력
    total_accuracy = []
    ret = {}

    print("##### 각 target별 비율: #######", csv_file)
    for target, count in target_counts.items():
        correct_count = correct_counts.get(target, 0)
        accuracy = correct_count / count if count > 0 else 0
        #print(f"Target {target}: {accuracy:.2%}")
        ret[target] = np.round(accuracy, 4)
        total_accuracy.append(accuracy)

    log = ""
    for k, v in ret.items():
      log += f"{k}: {v:.4f}   "
    print("".join(log))

    print(f"총 비율 : {np.round(np.mean(total_accuracy), 4)}")
    #print()

##### 각 target별 비율: ####### 21.csv
0: 0.9852   1: 0.8462   2: 0.9749   3: 0.6624   4: 0.9424   5: 0.9896   6: 0.9474   7: 0.4654   8: 0.9612   9: 1.0000   10: 0.9585   11: 0.9439   12: 0.9659   13: 0.9481   14: 0.6364   15: 0.9849   16: 0.9949   
총 비율 : 0.8945
##### 각 target별 비율: ####### 28.csv
0: 0.9943   1: 0.4812   2: 1.0000   3: 0.3920   4: 0.5022   5: 0.9785   6: 0.8384   7: 0.4444   8: 0.9433   9: 0.9787   10: 0.7522   11: 0.7189   12: 0.7214   13: 0.7086   14: 0.3438   15: 0.8341   16: 0.8667   
총 비율 : 0.7352
##### 각 target별 비율: ####### 29.csv
0: 0.9894   1: 0.8454   2: 1.0000   3: 0.5230   4: 0.6881   5: 0.9895   6: 0.8889   7: 0.4775   8: 0.9850   9: 0.9948   10: 0.7022   11: 0.8945   12: 0.9069   13: 0.8516   14: 0.4507   15: 0.9554   16: 0.9755   
총 비율 : 0.8305
##### 각 target별 비율: ####### 24.csv
0: 1.0000   1: 0.8713   2: 1.0000   3: 0.6818   4: 0.8594   5: 0.9900   6: 0.9706   7: 0.5304   8: 1.0000   9: 0.9950   10: 0.9320   11: 0.9541   12: 0.9949   13: 0.8876   14: 0.6941