### mlp mixer

In [15]:
import timm
import torch.nn as nn

mixer = timm.create_model('mixer_b16_224', pretrained=True)
num_classes = 3  # 새로운 클래스 수
mixer.head = nn.Linear(mixer.head.in_features, num_classes)
mixer.eval()


MlpMixer(
  (stem): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (blocks): Sequential(
    (0): MixerBlock(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp_tokens): Mlp(
        (fc1): Linear(in_features=196, out_features=384, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (fc2): Linear(in_features=384, out_features=196, bias=True)
        (drop2): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp_channels): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (fc2): Linear(in_features=3072, out_features=768, bias=True)
        (drop2): Dropout(p=0.0, inplace=False)
      

In [16]:
import torch
import torch.nn as nn
checkpoint = torch.load("C:/Users/USER/Desktop/output/mlp_mixer_class_3_final_final_checkpoint.bin", map_location='cpu') # GPU 환경이 아닌 경우 'cpu'를 사용합니다.
mixer.load_state_dict(checkpoint)



<All keys matched successfully>

### vit 

In [18]:
import timm
vit = timm.create_model('vit_base_patch16_224', pretrained=True)

num_classes = 3  # 새로운 클래스 수
vit.head = nn.Linear(vit.head.in_features, num_classes)


In [19]:
import torch
import torch.nn as nn
checkpoint = torch.load("C:/Users/USER/Desktop/output/vit_class_3_final_final_89.556.bin", map_location='cpu') # GPU 환경이 아닌 경우 'cpu'를 사용합니다.
vit.load_state_dict(checkpoint)

<All keys matched successfully>

In [35]:
# 이미지 전처리 함수 정의
from torchvision import transforms
from PIL import Image

preprocess = transforms.Compose([
    transforms.Resize(224),  # 모델에 맞는 크기로 조정
    transforms.CenterCrop(224),  # 중앙을 기준으로 자르기
    transforms.ToTensor(),  # Tensor로 변환
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # 정규화
])

# 이미지 불러오기 및 전처리
image_path = "C:/Users/USER/Downloads/이상한표정.jpg"
input_image = Image.open(image_path)
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0)  # 배치 차원을 추가

In [36]:
import torch.nn.functional as F
import numpy as np

# 출력 형식을 소수점 두 번째 자리로 설정
np.set_printoptions(precision=2, suppress=True)
expression_dict= {0:'기쁨',1:'당황', 2:'중립'}

with torch.no_grad():
    # MLP-Mixer 예측
    mlp_output = mixer(input_batch)
    mlp_probs = F.softmax(mlp_output, dim=1)
    
    # ViT 예측
    vit_output = vit(input_batch)
    vit_probs = F.softmax(vit_output, dim=1)
    
    # 평균 앙상블
    avg_probs = (mlp_probs + vit_probs) / 2
    final_prediction = avg_probs.argmax(dim=1).item()

# 각각의 모델 예측 확률 및 최종 예측 확률 출력
print("MLP-Mixer (%):", (mlp_probs * 100).numpy().round(2))
print("VIT (%) :", (vit_probs * 100).numpy().round(2))
print("평균 앙상블 모델 (%):", (avg_probs * 100).numpy().round(2))
str_ex= expression_dict[final_prediction]
print("Final prediction:", str_ex)

# 각 클래스별 평균 확률 출력
avg_probs_per_class = (avg_probs.mean(dim=0) * 100).numpy().round(2)
print("평균 앙상블 모델 (%):", avg_probs_per_class)


MLP-Mixer (%): [[81.28 17.58  1.14]]
VIT (%) : [[18.27 81.27  0.46]]
평균 앙상블 모델 (%): [[49.77 49.42  0.8 ]]
Final prediction: 기쁨
평균 앙상블 모델 (%): [49.77 49.42  0.8 ]


### 배치 데이터 셋으로 f1 score 계산


In [5]:
import logging
import os
import json
from PIL import Image
from torchvision.transforms import ToTensor
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, RandomSampler, DistributedSampler, SequentialSampler
import torch
from sklearn.metrics import f1_score
import torch.nn.functional as F

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, img_dir,annotations_dir,  transform=None):
        """
        annotation_dir (string): 메타데이터가 있는 JSON 파일의 경로
        img_dir (string): 모든 이미지가 있는 디렉토리의 경로
        transform (callable, optional): 샘플에 적용될 선택적 변환
        """
        self.img_dir = img_dir
        self.transform = transform
        
        self.annotation_dir= annotations_dir
        

    def __len__(self):
        label_list= os.listdir(self.annotation_dir)
        return len(label_list)

    def __getitem__(self, idx):
        
        label_list= os.listdir(self.annotation_dir)
        
        img_path = os.path.join(self.img_dir, label_list[idx].split('.')[0]+'.'+label_list[idx].split('.')[1])

        try:
            image = Image.open(img_path)
        except (IOError, OSError) as e:
            print(f"Error loading image {img_path}: {e}")
            return self.__getitem__((idx + 1) % len(self))
        
        
        
        # faceExp_uploader 부분만 라벨로 사용
        with open(self.annotation_dir+'/'+label_list[idx],'r', encoding='utf-8') as f:
            self.image_labels=json.load(f)
        label = self.image_labels['faceExp_uploader']
        label_to_int = {'기쁨': 0, '당황': 1, '중립': 2}

        # 문자열 라벨을 정수로 매핑
        label_int = label_to_int[label]
        label_tensor = torch.tensor(label_int, dtype=torch.long)
        
        if self.transform:
            image_tensor = self.transform(image)
        else:
            # 기본적으로 이미지를 Tensor로 변환
            transform = ToTensor()
            image_tensor = transform(image)
        
        return image_tensor, label_tensor

In [6]:
def get_loader(img_size,  test_img_dir, test_annotation_dir, eval_batch_size):

    
    transform_test = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ])

    
    testset = CustomDataset(img_dir=test_img_dir,
                            annotations_dir=test_annotation_dir,
                            transform=transform_test)
    

    test_sampler = RandomSampler(testset) 
    
    test_loader = DataLoader(testset,
                             sampler=test_sampler,
                             batch_size=eval_batch_size,
                             num_workers=0,
                             pin_memory=True) if testset is not None else None

    return  test_loader

In [25]:
transform_test = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ])

In [7]:
# 테스트 데이터셋 및 데이터로더 설정
test_img_dir = "C:/Users/USER/Desktop/test_img_2030"
test_annotation_dir = "C:/Users/USER/Desktop/test_label_2030"

In [28]:
dataset=CustomDataset(img_dir=test_img_dir,
                            annotations_dir=test_annotation_dir,
                            transform=transform_test)

In [15]:
import torch
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, recall_score, f1_score
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 모델을 GPU로 이동
mixer.to(device)
vit.to(device)

eval_batch_size = 32
test_loader = get_loader(224, test_img_dir, test_annotation_dir, eval_batch_size)

# 평가 모드로 전환
mixer.eval()
vit.eval()

all_labels = []
all_predictions = []

with torch.no_grad():
    for input_batch, labels in tqdm(test_loader, desc="Evaluating"):  # tqdm을 사용하여 진행 상태 표시
        input_batch, labels = input_batch.to(device), labels.to(device)  # 데이터를 GPU로 이동
        
        # 모델 예측
        mlp_output = mixer(input_batch)
        mlp_probs = F.softmax(mlp_output, dim=1)
        
        vit_output = vit(input_batch)
        vit_probs = F.softmax(vit_output, dim=1)
        
        # 평균 앙상블
        avg_probs = (mlp_probs + vit_probs) / 2
        predictions = avg_probs.argmax(dim=1)
        
        # 실제 라벨과 예측 라벨을 저장 (CPU로 이동하여 numpy 변환)
        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predictions.cpu().numpy())

# 정확도, 재현율, F1 스코어 계산
accuracy = accuracy_score(all_labels, all_predictions)
recall = recall_score(all_labels, all_predictions, average='macro')
f1 = f1_score(all_labels, all_predictions, average='macro')

print(f"Accuracy: {accuracy:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Evaluating: 100%|██████████| 57/57 [03:08<00:00,  3.31s/it]

Accuracy: 0.9100
Recall: 0.9100
F1 Score: 0.9099





In [18]:
import torch
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, recall_score, f1_score
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 모델을 GPU로 이동
mixer.to(device)
vit.to(device)

eval_batch_size = 32
test_loader = get_loader(224, test_img_dir, test_annotation_dir, eval_batch_size)

# 평가 모드로 전환
mixer.eval()
vit.eval()

# # MLP Mixer 모델의 메트릭 계산
# all_labels_mixer = []
# all_predictions_mixer = []

# with torch.no_grad():
#     for input_batch, labels in tqdm(test_loader, desc="Evaluating MLP Mixer"):
#         input_batch, labels = input_batch.to(device), labels.to(device)
        
#         # 모델 예측
#         mlp_output = mixer(input_batch)
#         mlp_probs = F.softmax(mlp_output, dim=1)
#         predictions = mlp_probs.argmax(dim=1)
        
#         # 실제 라벨과 예측 라벨을 저장 (CPU로 이동하여 numpy 변환)
#         all_labels_mixer.extend(labels.cpu().numpy())
#         all_predictions_mixer.extend(predictions.cpu().numpy())

# # 메트릭 계산
# accuracy_mixer = accuracy_score(all_labels_mixer, all_predictions_mixer)
# recall_mixer = recall_score(all_labels_mixer, all_predictions_mixer, average='macro')
# f1_mixer = f1_score(all_labels_mixer, all_predictions_mixer, average='macro')
# print(f"MLP Mixer - Accuracy: {accuracy_mixer:.4f}, Recall: {recall_mixer:.4f}, F1 Score: {f1_mixer:.4f}")

# ViT 모델의 메트릭 계산
all_labels_vit = []
all_predictions_vit = []

with torch.no_grad():
    for input_batch, labels in tqdm(test_loader, desc="Evaluating ViT"):
        input_batch, labels = input_batch.to(device), labels.to(device)
        
        # 모델 예측
        vit_output = vit(input_batch)
        vit_probs = F.softmax(vit_output, dim=1)
        predictions = vit_probs.argmax(dim=1)
        
        # 실제 라벨과 예측 라벨을 저장 (CPU로 이동하여 numpy 변환)
        all_labels_vit.extend(labels.cpu().numpy())
        all_predictions_vit.extend(predictions.cpu().numpy())

# 메트릭 계산
accuracy_vit = accuracy_score(all_labels_vit, all_predictions_vit)
recall_vit = recall_score(all_labels_vit, all_predictions_vit, average='weighted')
f1_vit = f1_score(all_labels_vit, all_predictions_vit, average='weighted')
print(f"ViT - Accuracy: {accuracy_vit:.4f}, Recall: {recall_vit:.4f}, F1 Score: {f1_vit:.4f}")

Evaluating ViT: 100%|██████████| 57/57 [02:59<00:00,  3.15s/it]

ViT - Accuracy: 0.8917, Recall: 0.8917, F1 Score: 0.8912





In [22]:
import numpy as np
def evaluate(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    
    accuracy = np.mean(all_preds == all_labels)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    
    print(f"Accuracy: {accuracy}")
    print(f"F1 Score: {f1}")
    print(f"Recall: {recall}")
    
    return accuracy, f1, recall

# 모델과 데이터 로더 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vit.to(device)

test_loader = get_loader(img_size=224, test_img_dir=test_img_dir, test_annotation_dir=test_annotation_dir, eval_batch_size=32)
accuracy, f1, recall = evaluate(vit, test_loader, device)


Accuracy: 0.8916666666666667
F1 Score: 0.8912300921734742
Recall: 0.8916666666666667


In [None]:
ViT - Accuracy: 0.8961, Recall: 0.8961, F1 Score: 0.8954 #두번 fine tuning
ViT - Accuracy: 0.8917, Recall: 0.8917, F1 Score: 0.8912


In [32]:
import numpy as np
import torch
from sklearn.metrics import f1_score, recall_score

def evaluate(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    
    accuracy = np.mean(all_preds == all_labels)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    
    print(f"Accuracy: {accuracy}")
    print(f"F1 Score: {f1}")
    print(f"Recall: {recall}")
    
    return accuracy, f1, recall

# 모델과 데이터 로더 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vit.to(device)

# 데이터셋 및 데이터로더 설정
test_loader = get_loader(img_size=224, test_img_dir=test_img_dir, test_annotation_dir=test_annotation_dir, eval_batch_size=32)

# 클래스 분포 확인
check_class_distribution(test_loader.dataset)

# 모델 예측 확인
check_model_predictions(vit, test_loader, device)

# 모델 평가
accuracy, f1, recall = evaluate(vit, test_loader, device)


Counter({1: 600, 0: 600, 2: 600})
Predictions distribution: {0: 613, 1: 563, 2: 624}
Accuracy: 0.8916666666666667
F1 Score: 0.8912300921734742
Recall: 0.8916666666666667


In [36]:
from sklearn.metrics import classification_report

def evaluate_detailed(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    
    accuracy = accuracy_score(all_labels, all_preds)
    class_report = classification_report(all_labels, all_preds, target_names=['기쁨', '당황', '중립'], digits=4)
    
    print(f"Accuracy: {accuracy}")
    print("Classification Report:\n", class_report)
    
    return accuracy, class_report

# 모델 평가
accuracy, class_report = evaluate_detailed(vit, test_loader, device)


Accuracy: 0.9022222222222223
Classification Report:
               precision    recall  f1-score   support

          기쁨     0.9345    0.9517    0.9430       600
          당황     0.8690    0.8733    0.8712       600
          중립     0.9027    0.8817    0.8921       600

    accuracy                         0.9022      1800
   macro avg     0.9021    0.9022    0.9021      1800
weighted avg     0.9021    0.9022    0.9021      1800

