In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import Resize, Normalize, Compose
from transformers import ViTForImageClassification, ViTConfig, ViTImageProcessor
from torchvision.transforms import ToTensor, Resize, Normalize, Compose
from tqdm import tqdm  # tqdm import
from torch import nn
from sklearn.metrics import f1_score

2024-04-18 08:00:12.752207: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-18 08:00:12.752396: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-18 08:00:12.928447: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [1]:
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint

early_stopping = EarlyStopping(
    monitor='val_macro_f1',
    min_delta=0.01,
    patience=3,
    verbose=True,
    mode='max'
)

checkpoint_callback = ModelCheckpoint(
    monitor='val_macro_f1',
    dirpath='./kaggle/working/',
    filename='vit-model-{epoch:02d}-{val_macro_f1:.4f}',
    save_top_k=3,
    mode='max',
)

lr_reducer = ReduceLROnPlateau(
    monitor='val_macro_f1',
    factor=0.3,
    patience=3,
    verbose=True
)

callbacks = [early_stopping, checkpoint_callback, lr_reducer]

In [None]:
data_dir = '/kaggle/input/dataset/project (2)'
batch_size = 32
num_workers = 4

# Define the transforms
transform = Compose([
    Resize((224, 224)),
    ToTensor(),  # PIL 이미지를 Tensor로 변환
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])

# Load the dataset
train_dataset = ImageFolder(os.path.join(data_dir, 'x_train'), transform=transform)
val_dataset = ImageFolder(os.path.join(data_dir, 'x_val'), transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

# DataLoader로부터 이미지와 라벨을 가져오는 방법
for batch_images, batch_labels in train_dataset:
    # batch_labels는 각 이미지의 라벨을 나타냅니다.
    print(batch_labels)
    for label in batch_labels:
        # 라벨의 원래 이름을 가져오는 방법은 데이터셋 객체의 클래스를 이용하는 것입니다.
        original_label_name = train_loader.classes[label.item()]
        print("Original Label Name:", original_label_name)

In [2]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import Resize, Normalize, Compose
from transformers import ViTForImageClassification, ViTConfig, ViTImageProcessor
from torchvision.transforms import ToTensor, Resize, Normalize, Compose
from tqdm import tqdm  # tqdm import
from torch import nn
from sklearn.metrics import f1_score

# Set up the dataset and dataloader
data_dir = '/kaggle/input/dataset/project (2)'
batch_size = 32
num_workers = 4

# Define the transforms
transform = Compose([
    Resize((224, 224)),
    ToTensor(),  # PIL 이미지를 Tensor로 변환
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])

# Load the dataset
train_dataset = ImageFolder(os.path.join(data_dir, 'x_train'), transform=transform)
val_dataset = ImageFolder(os.path.join(data_dir, 'x_val'), transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

# Load the pre-trained ViT model
config = ViTConfig.from_pretrained('google/vit-base-patch16-224')
config.num_labels = len(train_dataset.classes)  # Set the number of output classes
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224', config=config,ignore_mismatched_sizes=True)
model.classifier = nn.Linear(model.config.hidden_size, len(train_dataset.classes))

# Fine-tune the model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
print(device)

learning_rate = 2e-5
num_epochs = 10

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.CrossEntropyLoss()

# Import f1_score from sklearn.metrics
from sklearn.metrics import f1_score

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    
    # train_loader 반복 시 tqdm 적용
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)[0]
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
        # Training 중 각 iteration마다 macro F1 점수 계산 및 출력
        preds = outputs.argmax(dim=1)
#         macro_f1 = f1_score(labels.cpu().numpy(), preds.cpu().numpy(), average='macro')
#         print(f"Training iteration: Macro F1: {macro_f1:.4f}")
    
    train_loss /= len(train_loader)
    
    model.eval()
    val_loss = 0
    y_true = []
    y_pred = []
    
    # val_loader 반복 시 tqdm 적용
    for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)[0]
        loss = criterion(outputs, labels)
        
        val_loss += loss.item()
        
        preds = outputs.argmax(dim=1)
        
        # 각 iteration마다 macro F1 점수 계산 및 출력
#         macro_f1 = f1_score(labels.cpu().numpy(), preds.cpu().numpy(), average='macro')
#         print(f"Validation iteration: Macro F1: {macro_f1:.4f}")
        
        # 실제 레이블과 예측된 레이블을 리스트에 추가
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())
    
    val_loss /= len(val_loader)
    
    # 전체 val_loader를 평가한 후 macro F1 점수 계산
    macro_f1 = f1_score(y_true, y_pred, average='macro')
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Macro F1: {macro_f1:.4f}')
    
    early_stopping.on_validation_epoch_end(epoch, {'val_loss': val_loss, 'val_macro_f1': macro_f1})
    checkpoint_callback.on_validation_epoch_end(epoch, {'val_loss': val_loss, 'val_macro_f1': macro_f1})
# Save the fine-tuned model
torch.save(model.state_dict(), 'fine_tuned_vit_model.pth')

2024-04-18 05:47:06.171337: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-18 05:47:06.171399: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-18 05:47:06.172987: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([25]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkp

cuda


Epoch 1/10 - Training: 100%|██████████| 396/396 [03:48<00:00,  1.73it/s]


Training iteration: Macro F1: 0.8626


Epoch 1/10 - Validation: 100%|██████████| 99/99 [00:21<00:00,  4.60it/s]


Validation iteration: Macro F1: 0.0187
Epoch [1/10], Train Loss: 0.7041, Train Macro F1: 0.8626, Val Loss: 0.1225, Val Macro F1: 0.0187


Epoch 2/10 - Training: 100%|██████████| 396/396 [03:48<00:00,  1.73it/s]


Training iteration: Macro F1: 0.9895


Epoch 2/10 - Validation: 100%|██████████| 99/99 [00:21<00:00,  4.65it/s]


Validation iteration: Macro F1: 0.0190
Epoch [2/10], Train Loss: 0.0522, Train Macro F1: 0.9895, Val Loss: 0.0772, Val Macro F1: 0.0190


Epoch 3/10 - Training:   5%|▍         | 19/396 [00:11<03:57,  1.59it/s]


KeyboardInterrupt: 

In [None]:
import os
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import Resize, Normalize, Compose, ToTensor

# 1. 테스트 데이터셋 준비
test_dir = '/kaggle/input/dataset/test'
test_transform = Compose([
    Resize((224, 224)),
    ToTensor(),
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])
test_dataset = ImageFolder(test_dir, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

# 2. 모델 로드 및 예측 수행
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224', config=config)
model.load_state_dict(torch.load('fine_tuned_vit_model.pth'))
model.to(device)
model.eval()

all_preds = []
for images, _ in test_loader:
    images = images.to(device)
    outputs = model(images)[0]
    preds = outputs.argmax(dim=1)
    all_preds.extend(preds.cpu().numpy())

# 3. 제출 파일 생성
test_filenames = [os.path.splitext(os.path.basename(path))[0] for path in test_dataset.imgs]
submission = pd.DataFrame({'id': test_filenames, 'label': all_preds})
submission.to_csv('submission.csv', index=False)

In [None]:
# batch_size = 64

# # 데이터로더를 생성합니다.
# train_dataloader = DataLoader(training_data, batch_size=batch_size)
# test_dataloader = DataLoader(test_data, batch_size=batch_size)

# for X, y in val_loader:
#     print(f"Shape of X [N, C, H, W]: {X.shape}")
#     print(f"Shape of y: {y.shape} {y.dtype}")
#     break

In [4]:
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
from torch.utils.data import Dataset, DataLoader
from PIL import Image

test_dir = '/kaggle/input/dataset/test'
test_transform = Compose([
    Resize((224, 224)),
    ToTensor(),
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])

class TestDataset(Dataset):
    def __init__(self, file_paths, transform=None):
        self.file_paths = file_paths
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        img_path = self.file_paths[idx]
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, img_path

# test_dir의 이미지 파일 리스트 생성
test_files = [os.path.join(test_dir, f) for f in os.listdir(test_dir)]

# 레이블이 없는 테스트 데이터셋 클래스 인스턴스화
test_dataset = TestDataset(test_files, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)

In [7]:
import os
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import Resize, Normalize, Compose, ToTensor
from transformers import ViTForImageClassification, ViTConfig, ViTImageProcessor
from torch import nn
from torch.utils.data import Dataset
from tqdm import tqdm


# 2. 모델 로드 및 예측 수행
device = 'cuda' if torch.cuda.is_available() else 'cpu'
config = ViTConfig.from_pretrained('google/vit-base-patch16-224')
config.num_labels = 25 # 테스트 데이터셋의 클래스 수로 설정
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224', config=config,ignore_mismatched_sizes=True)
model.classifier = nn.Linear(model.config.hidden_size, 25)
model.load_state_dict(torch.load('/kaggle/working/fine_tuned_vit_model.pth'))
model.to(device)
model.eval()

all_preds = []
all_paths = []
for images, names in tqdm(test_loader, '로딩'):
    images = images.to(device)
    outputs = model(images)[0]
    preds = outputs.argmax(dim=1).tolist()
    all_preds.extend(preds)
     all_paths.extend(names)

# 3. 제출 파일 생성
test_filenames = [os.path.splitext(os.path.basename(path))[0] for path in all_paths]
submission = pd.DataFrame({'id': test_filenames, 'label': all_preds})
submission.to_csv('submission.csv', index=False)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([25]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([25, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
로딩: 100%|██████████| 425/425 [00:44<00:00,  9.62it/s]


In [9]:
# DataLoader로부터 이미지 파일의 경로 추출
all_paths = []
for batch_images, batch_paths in test_loader:
    all_paths.extend(batch_paths)

# all_paths에는 모든 이미지 파일의 경로가 저장됩니다.

In [11]:
test_filenames = [os.path.splitext(os.path.basename(path))[0] for path in all_paths]

In [4]:
# 데이터셋 객체의 classes 속성을 사용하여 각 폴더의 이름을 가져옵니다.
class_names = train_dataset.classes

In [19]:
# 각 클래스 이름에 0부터 24까지의 정수 값을 할당하는 사전 생성
class_to_index = {class_name: index for index, class_name in enumerate(class_names)}

classes={}
# 결과를 출력
for class_name, index in class_to_index.items():
    classes[index]=class_name
    print(f"{index}: {class_name}")

0: Asian Green Bee-Eater
1: Brown-Headed Barbet
2: Cattle Egret
3: Common Kingfisher
4: Common Myna
5: Common Rosefinch
6: Common Tailorbird
7: Coppersmith Barbet
8: Forest Wagtail
9: Gray Wagtail
10: Hoopoe
11: House Crow
12: Indian Grey Hornbill
13: Indian Peacock
14: Indian Pitta
15: Indian Roller
16: Jungle Babbler
17: Northern Lapwing
18: Red-Wattled Lapwing
19: Ruddy Shelduck
20: Rufous Treepie
21: Sarus Crane
22: White Wagtail
23: White-Breasted Kingfisher
24: White-Breasted Waterhen


In [20]:
classes

{0: 'Asian Green Bee-Eater',
 1: 'Brown-Headed Barbet',
 2: 'Cattle Egret',
 3: 'Common Kingfisher',
 4: 'Common Myna',
 5: 'Common Rosefinch',
 6: 'Common Tailorbird',
 7: 'Coppersmith Barbet',
 8: 'Forest Wagtail',
 9: 'Gray Wagtail',
 10: 'Hoopoe',
 11: 'House Crow',
 12: 'Indian Grey Hornbill',
 13: 'Indian Peacock',
 14: 'Indian Pitta',
 15: 'Indian Roller',
 16: 'Jungle Babbler',
 17: 'Northern Lapwing',
 18: 'Red-Wattled Lapwing',
 19: 'Ruddy Shelduck',
 20: 'Rufous Treepie',
 21: 'Sarus Crane',
 22: 'White Wagtail',
 23: 'White-Breasted Kingfisher',
 24: 'White-Breasted Waterhen'}

In [23]:
import pandas as pd
all_pred =pd.read_csv('/kaggle/working/submission.csv')
all_pred.sort_values(by='id',inplace=True)
all_pred['label']=all_pred['label'].map(classes)

In [24]:
all_pred

Unnamed: 0,id,label
759,TEST_00000,Asian Green Bee-Eater
850,TEST_00001,Asian Green Bee-Eater
3621,TEST_00002,Jungle Babbler
754,TEST_00003,Sarus Crane
3748,TEST_00004,Northern Lapwing
...,...,...
4953,TEST_06781,Common Kingfisher
3202,TEST_06782,Gray Wagtail
3726,TEST_06783,Sarus Crane
2623,TEST_06784,Hoopoe


In [25]:
all_pred.to_csv('submission3.csv', index=False)

In [6]:
len(all_preds)

NameError: name 'all_preds' is not defined

In [26]:
submission = pd.DataFrame({'id': test_filenames, 'label': all_preds})
submission.to_csv('submission.csv', index=False)

TypeError: unhashable type: 'list'

In [22]:
submission.describe()

Unnamed: 0,label
count,6786.0
mean,11.818892
std,7.234369
min,0.0
25%,6.0
50%,12.0
75%,18.0
max,24.0


In [19]:
submission

Unnamed: 0,id,label
0,TEST_04038,16
1,TEST_04342,18
2,TEST_00733,13
3,TEST_03764,3
4,TEST_03144,17
...,...,...
6781,TEST_00659,5
6782,TEST_06036,7
6783,TEST_06657,20
6784,TEST_04091,16


In [None]:
test_filenames

In [None]:
all_preds

In [None]:
test_filenames = [os.path.splitext(os.path.basename(path))[0] for path in test_dataset.imgs]
submission = pd.DataFrame({'id': test_filenames, 'label': all_preds})
submission.to_csv('submission.csv', index=False)

In [8]:
import os
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import Resize, Normalize, Compose, ToTensor

# 1. 테스트 데이터셋 준비
test_dir = '/kaggle/input/dataset'
test_transform = Compose([
    Resize((224, 224)),
    ToTensor(),
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])
test_dataset = ImageFolder(test_dir, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

# 2. 모델 로드 및 예측 수행
device = 'cuda' if torch.cuda.is_available() else 'cpu'

config = ViTConfig.from_pretrained('google/vit-base-patch16-224')
config.num_labels = len(test_dataset.classes)  # 테스트 데이터셋의 클래스 수로 설정
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224', config=config)
model.classifier = nn.Linear(model.config.hidden_size, len(test_dataset.classes))
model.load_state_dict(torch.load('fine_tuned_vit_model.pth'))
model.to(device)
model.eval()


all_preds = []
for images, _ in test_loader:
    images = images.to(device)
    outputs = model(images)[0]
    preds = outputs.argmax(dim=1)
    all_preds.extend([test_dataset.classes[p] for p in preds.cpu().numpy()])

# 3. 제출 파일 생성
test_filenames = [os.path.splitext(os.path.basename(path))[0] for path in test_dataset.imgs]
submission = pd.DataFrame({'id': test_filenames, 'label': all_preds})
submission.to_csv('submission.csv', index=False)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([25]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([25, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


IndexError: list index out of range

In [9]:
print(len(test_dataset.classes))
print(config.num_labels)

2
25
