In [None]:
from google.colab import files
files.upload()


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
import kagglehub

# Download latest version
data_dir = kagglehub.dataset_download("grassknoted/asl-alphabet")

print("Path to dataset files:", data_dir)

In [None]:
# @title 기본 제목 텍스트
import os
from PIL import Image
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset, Subset
from torchvision import transforms, models
from sklearn.model_selection import train_test_split

# GPU 사용 여부 확인
print("현재 사용 중인 디바이스:", "GPU 사용 가능" if torch.cuda.is_available() else "CPU 사용 중")

# 학습률 설정
learning_rate = 0.000001

# 데이터셋 정의
class GestureDataset(Dataset):
    def __init__(self, data_dir, labels, transform=None, img_size=64):
        self.data_dir = data_dir
        self.labels = labels
        self.transform = transform
        self.img_size = img_size
        self.image_paths = []
        self.targets = []

        for label in self.labels:
            folder_path = os.path.join(data_dir, label)
            if not os.path.exists(folder_path):
                continue
            for img_name in os.listdir(folder_path):
                img_path = os.path.join(folder_path, img_name)
                self.image_paths.append(img_path)
                self.targets.append(self.labels.index(label))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.targets[idx]
        img = Image.open(img_path).resize((self.img_size, self.img_size))

        if self.transform:
            img = self.transform(img)

        return img, label

# 데이터 경로 설정 및 라벨 정의
data_dir = os.path.join(data_dir, "asl_alphabet_train/asl_alphabet_train")
alphabet_labels = [chr(i) for i in range(ord('A'), ord('Z') + 1)]

# 데이터 증강 설정
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(),
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 데이터셋 및 데이터로더 생성
full_dataset = GestureDataset(data_dir=data_dir, labels=alphabet_labels, transform=train_transform, img_size=64)
train_idx, test_idx = train_test_split(np.arange(len(full_dataset)), test_size=0.2, random_state=42)
train_idx, val_idx = train_test_split(train_idx, test_size=0.1, random_state=42)

train_dataset = Subset(full_dataset, train_idx)
val_dataset = Subset(full_dataset, val_idx)
test_dataset = Subset(full_dataset, test_idx)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# MobileNetV3 모델 정의
model = models.mobilenet_v3_large(pretrained=True)
model.classifier[3] = nn.Linear(model.classifier[3].in_features, len(alphabet_labels))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 학습 함수 정의
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=100, patience=5):
    best_acc = 0.0
    epochs_no_improve = 0

    for epoch in tqdm(range(num_epochs), desc='Epochs'):
        model.train()
        running_loss = 0.0
        for images, labels in tqdm(train_loader, desc='Training Batches', leave=False):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)

        # 검증
        model.eval()
        correct = 0
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc='Validation Batches', leave=False):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                correct += torch.sum(preds == labels).item()

        epoch_acc = correct / len(val_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Validation Accuracy: {epoch_acc * 100:.2f}%")

        # GPU 사용량 확인
        if (epoch + 1) % 5 == 0:  # 5 에포크마다 확인
            print("\nGPU 상태 확인:")
            !nvidia-smi

        # 최상의 모델 저장
        if epoch_acc > best_acc:
            best_acc = epoch_acc
            epochs_no_improve = 0
            print("최고 정확도 모델이 저장되었습니다.")
            torch.save(model.state_dict(), '/content/drive/MyDrive/Colab_Notebooks/Projects/ASL_main/mobilenet_gesture_model_best.pth')
        else:
            epochs_no_improve += 1
            print(f"개선되지 않은 에포크 수: {epochs_no_improve}/{patience}")
            if epochs_no_improve >= patience:
                print("조기 중단 발동")
                break

        # 매 에포크마다 모델 저장
        torch.save(model.state_dict(), f'/content/drive/MyDrive/Colab_Notebooks/Projects/ASL_main/mobilenet_gesture_model_epoch_{epoch+1}.pth')
        print(f"Epoch {epoch+1} 모델이 저장되었습니다.")

    return best_acc

# 학습 시작
best_accuracy = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=100)
print(f"Best Validation Accuracy: {best_accuracy * 100:.2f}%")
