In [11]:
import os
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
from torch.utils.data import DataLoader
from PIL import Image
import torch.nn.functional as F
import pandas as pd

In [2]:
image_size = 224
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 클래스 이름 정렬 방식 그대로 불러오기
train_path = "D:/Project/Action/processed_dataset/train"
class_names = datasets.ImageFolder(train_path).classes
num_classes = len(class_names)

In [4]:
weights = EfficientNet_B0_Weights.DEFAULT
model = efficientnet_b0(weights=weights)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
model.load_state_dict(torch.load("D:/Project/Action/efficientnet_b0_Action.pt"))
model = model.to(device)
model.eval()

  model.load_state_dict(torch.load("D:/Project/Action/efficientnet_b0_Action.pt"))


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [5]:
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.5] * 3, [0.5] * 3)
])

In [6]:
inference_root = "D:/Project/Action/cat_frames2"
valid_exts = ('.jpg', '.jpeg', '.png')

image_paths = []
for dirpath, _, filenames in os.walk(inference_root):
    for fname in filenames:
        if fname.lower().endswith(valid_exts):
            image_paths.append(os.path.join(dirpath, fname))

# 추론 수행
print(f"Total images to infer: {len(image_paths)}\n")

Total images to infer: 300



In [13]:
results = []

for img_path in image_paths:
    try:
        image = Image.open(img_path).convert("RGB")
        input_tensor = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(input_tensor)
            probs = F.softmax(output, dim=1)
            conf, predicted = torch.max(probs, 1)
            pred_class = class_names[predicted.item()]
            confidence = conf.item()

        relative_path = os.path.relpath(img_path, inference_root)
        results.append({
            "image_path": relative_path,
            "predicted_label": pred_class,
            "confidence": round(confidence, 4)
        })

        print(f"{relative_path} → {pred_class} ({confidence:.4f})")

    except Exception as e:
        print(f"[ERROR] {img_path} → {e}")

목욕하는 고양이\frame_000.jpg → 머리를 들이대는 동작 (0.7798)
목욕하는 고양이\frame_001.jpg → 머리를 들이대는 동작 (0.9207)
목욕하는 고양이\frame_002.jpg → 머리를 들이대는 동작 (0.8671)
목욕하는 고양이\frame_003.jpg → 허리를 아치로 세우는 동작 (0.4720)
목욕하는 고양이\frame_004.jpg → 허리를 아치로 세우는 동작 (0.7366)
목욕하는 고양이\frame_005.jpg → 허리를 아치로 세우는 동작 (0.8154)
목욕하는 고양이\frame_006.jpg → 허리를 아치로 세우는 동작 (0.7633)
목욕하는 고양이\frame_007.jpg → 허리를 아치로 세우는 동작 (0.7475)
목욕하는 고양이\frame_008.jpg → 머리를 들이대는 동작 (0.4792)
목욕하는 고양이\frame_009.jpg → 머리를 들이대는 동작 (0.4891)
목욕하는 고양이\frame_010.jpg → 머리를 들이대는 동작 (0.7705)
목욕하는 고양이\frame_011.jpg → 머리를 들이대는 동작 (0.8968)
목욕하는 고양이\frame_012.jpg → 머리를 들이대는 동작 (0.7413)
목욕하는 고양이\frame_013.jpg → 머리를 들이대는 동작 (0.6807)
목욕하는 고양이\frame_014.jpg → 머리를 들이대는 동작 (0.5518)
목욕하는 고양이\frame_015.jpg → 머리를 들이대는 동작 (0.5680)
목욕하는 고양이\frame_016.jpg → 허리를 아치로 세우는 동작 (0.5411)
목욕하는 고양이\frame_017.jpg → 머리를 들이대는 동작 (0.6562)
목욕하는 고양이\frame_018.jpg → 머리를 들이대는 동작 (0.8631)
목욕하는 고양이\frame_019.jpg → 머리를 들이대는 동작 (0.8177)
목욕하는 고양이\frame_020.jpg → 머리를 들이대는 동작 (0.6189)
목욕하는 고양이\frame_0

In [15]:
from collections import Counter

def smooth_labels(df, window_size=5):
    labels = df['predicted_label'].tolist()
    smoothed = []

    for i in range(len(labels)):
        start = max(0, i - window_size // 2)
        end = min(len(labels), i + window_size // 2 + 1)
        window = labels[start:end]
        majority_label = Counter(window).most_common(1)[0][0]
        smoothed.append(majority_label)

    df['smoothed_label'] = smoothed
    return df


In [23]:
def enforce_min_duration(labels, min_len=3):
    result = []
    prev = labels[0]
    count = 1
    for i in range(1, len(labels)):
        if labels[i] == prev:
            count += 1
        else:
            if count < min_len:
                result.extend([labels[i]] * count)
            else:
                result.extend([prev] * count)
            prev = labels[i]
            count = 1
    result.extend([prev] * count)
    return result

df['fixed_label_length_label'] = enforce_min_duration(df['smoothed_label'].tolist(), min_len=3)


In [24]:
df = pd.DataFrame(results, columns=["image_path", "predicted_label", "confidence"])
df = smooth_labels(df, window_size=7)
df['fixed_label_length_label'] = enforce_min_duration(df['smoothed_label'].tolist(), min_len=3)
df.to_csv("inference_results_smoothed_final.csv", index=False, encoding="utf-8-sig")
print("\nResults saved to 'inference_results.csv'")


Results saved to 'inference_results.csv'
