In [None]:
#데이터셋 다운로드
import os

tgz_path = "/root/.keras/datasets/102flowers.tgz"
os.makedirs(os.path.dirname(tgz_path), exist_ok=True)  # 디렉토리 생성

!wget -O {tgz_path} http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz

# 파일이 제대로 다운로드되었는지 확인
if os.path.exists(tgz_path):
    print(f"File downloaded successfully: {tgz_path}")
else:
    print("File download failed!")

In [None]:
#추출
import os
import tarfile

tgz_path = "/root/.keras/datasets/102flowers.tgz"
extracted_path = "/root/.keras/datasets/102flowers_extracted"

if os.path.exists(tgz_path):
    print(f"Extracting: {tgz_path}...")
    with tarfile.open(tgz_path, "r:gz") as tar:
        tar.extractall(path=extracted_path)
    print("Extraction complete.")
    print("Contents in extracted directory:", os.listdir(extracted_path))
else:
    print("102flowers.tgz file not found!")

In [None]:
#라벨 다운로드
import os

# 다운로드 경로 설정
labels_path = "/root/.keras/datasets/102flowers_extracted/imagelabels.mat"
os.makedirs(os.path.dirname(labels_path), exist_ok=True)

# 파일 다운로드
!wget -O {labels_path} http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat

# 다운로드 확인
if os.path.exists(labels_path):
    print(f"File downloaded successfully: {labels_path}")
else:
    print("File download failed!")

In [None]:
#분할 정보 다운로드
# setid.mat 파일 다운로드
setid_path = "/root/.keras/datasets/102flowers_extracted/setid.mat"
os.makedirs(os.path.dirname(setid_path), exist_ok=True)

!wget -O {setid_path} http://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat

# 다운로드 확인
if os.path.exists(setid_path):
    print(f"File downloaded successfully: {setid_path}")
else:
    print("File download failed!")

In [None]:
#다운로드 잘 됐는지 확인
import scipy.io

# 파일 경로
labels_path = "/root/.keras/datasets/102flowers_extracted/imagelabels.mat"
setid_path = "/root/.keras/datasets/102flowers_extracted/setid.mat"

# 라벨 파일 로드
if os.path.exists(labels_path):
    labels_data = scipy.io.loadmat(labels_path)
    print("Labels file loaded successfully!")
    print("Keys in labels file:", labels_data.keys())
else:
    print(f"Labels file not found at {labels_path}!")

# 데이터 분할 정보 로드
if os.path.exists(setid_path):
    setid_data = scipy.io.loadmat(setid_path)
    print("SetID file loaded successfully!")
    print("Keys in SetID file:", setid_data.keys())
else:
    print(f"SetID file not found at {setid_path}!")

In [None]:
#timm에서 지원하는 CoAtNet 모델 확인
from timm import list_models

print(list_models("*coatnet*"))

In [None]:
#수정된 데이터 분할 코드
import pandas as pd

# 라벨 및 분할 정보 로드
labels = labels_data["labels"][0] - 1  # 라벨을 0부터 시작하도록 수정
train_ids = setid_data["trnid"][0] - 1
val_ids = setid_data["valid"][0] - 1
test_ids = setid_data["tstid"][0] - 1

# 이미지 경로 설정
image_dir = "/root/.keras/datasets/102flowers_extracted/jpg"
image_files = sorted([os.path.join(image_dir, f"image_{i+1:05d}.jpg") for i in range(len(labels))])

# 데이터프레임 생성
df = pd.DataFrame({"filename": image_files, "label": labels})

# 데이터 분할
train_df = df.iloc[train_ids].reset_index(drop=True)
val_df = df.iloc[val_ids].reset_index(drop=True)
test_df = df.iloc[test_ids].reset_index(drop=True)

print(f"Training data: {len(train_df)} samples")
print(f"Validation data: {len(val_df)} samples")
print(f"Test data: {len(test_df)} samples")


In [None]:
# 이미지 전처리 함수
def preprocess_image(image_path, label, img_size=(224, 224)):
    import tensorflow as tf

    # 이미지 읽기 및 전처리
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, img_size) / 255.0  # 정규화
    return image, tf.one_hot(label, 102)

# 데이터셋 생성 함수
def create_dataset(df, batch_size=32, is_training=True):
    dataset = tf.data.Dataset.from_tensor_slices((df["filename"].values, df["label"].values))
    dataset = dataset.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    if is_training:
        dataset = dataset.shuffle(1000).repeat()
    dataset = dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

# 데이터셋 생성
train_dataset = create_dataset(train_df, batch_size=32, is_training=True)
val_dataset = create_dataset(val_df, batch_size=32, is_training=False)
test_dataset = create_dataset(test_df, batch_size=32, is_training=False)

# PyTorch용 데이터 리스트 생성
train_paths = train_df["filename"].tolist()
train_labels = train_df["label"].tolist()
val_paths = val_df["filename"].tolist()
val_labels = val_df["label"].tolist()
test_paths = test_df["filename"].tolist()
test_labels = test_df["label"].tolist()

print(f"Training data: {len(train_paths)} samples")
print(f"Validation data: {len(val_paths)} samples")
print(f"Test data: {len(test_paths)} samples")

In [None]:
#한 블록에 한 줄씩 실행해야 함
!pip install pillow
!pip install thop

In [None]:
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score, precision_score, recall_score
from thop import profile

# PyTorch 데이터셋 클래스 정의
class FlowerDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Pillow의 Image 클래스를 사용하여 이미지 열기
        image = Image.open(self.image_paths[idx]).convert("RGB")
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# 이미지 전처리(transform) 정의
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# PyTorch DataLoader 생성
train_dataset = FlowerDataset(train_paths, train_labels, transform=transform)
val_dataset = FlowerDataset(val_paths, val_labels, transform=transform)
test_dataset = FlowerDataset(test_paths, test_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Number of training batches: {len(train_loader)}")

# 모델 정의
from torchvision.models import resnet18
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 102)  # 102개의 클래스에 맞게 출력층 수정
model = model.to(device)

# 손실 함수 및 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# 평가 함수 정의
def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0.0

    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            # Predictions
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # 평가 지표 계산하는 부분
    avg_loss = total_loss / len(data_loader)
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average="weighted")
    recall = recall_score(all_labels, all_preds, average="weighted")

    return avg_loss, accuracy, precision, recall

# FLOPs 계산 함수
def calculate_flops(model, input_size=(1, 3, 224, 224)):
    dummy_input = torch.randn(*input_size).to(device)
    flops, params = profile(model, inputs=(dummy_input,))
    return flops, params

# 모델 학습 루프
for epoch in range(10):  # 10 epochs
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Epoch마다 결과 출력
    print(f"Epoch [{epoch+1}/10], Loss: {running_loss / len(train_loader):.4f}")

# Test에서 성능 측정
test_loss, test_accuracy, test_precision, test_recall = evaluate_model(model, test_loader, device)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
print(f"Test Precision: {test_precision * 100:.2f}%")
print(f"Test Recall: {test_recall * 100:.2f}%")

# FLOPs와 파라미터 수 계산 -> 연산량 측정
flops, params = calculate_flops(model)
print(f"FLOPs: {flops / 1e9:.2f} GFLOPs")
print(f"Parameters: {params / 1e6:.2f} M")