# 確認 GPU 類型
---

In [None]:
import torch
if not torch.cuda.is_available():
  raise Exception("GPU not availalbe. CPU training will be too slow.")
print("device name", torch.cuda.get_device_name(0))

device name Tesla P100-PCIE-16GB


# 是否要掛載 Google Drive
---

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#確認 ＴＰＵ規格

In [None]:
%tensorflow_version 2.x
import tensorflow as tf
print("Tensorflow version " + tf.__version__)

try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
  print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
except ValueError:
  raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

# 安裝 transformers,datastes,... 相依套件
---

In [None]:
%%capture
!pip install git+https://github.com/huggingface/datasets.git
!pip install git+https://github.com/huggingface/transformers.git
!pip install soundfile
!pip install jiwer
!git clone https://github.com/huggingface/transformers.git
!apt install git-lfs
!git config --global user.email "gary109@gmail.com"
!git config --global user.name "GARY"
!git config --global credential.helper store
!pip install wandb
!wandb login 2cf656515a3b158f4f603aeba63181236de2fc1b

In [None]:
 !pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-bndwutfm
  Running command git clone -q https://github.com/openai/CLIP.git /tmp/pip-req-build-bndwutfm
Collecting ftfy
  Downloading ftfy-6.1.1-py3-none-any.whl (53 kB)
[K     |████████████████████████████████| 53 kB 1.5 MB/s 
Building wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369387 sha256=7b4c8bcca55b466e88a74d0807f6b51958ee9bca67f072f3f09113a6f37d9595
  Stored in directory: /tmp/pip-ephem-wheel-cache-umhfy376/wheels/fd/b9/c3/5b4470e35ed76e174bff77c92f91da82098d5e35fd5bc8cdac
Successfully built clip
Installing collected packages: ftfy, clip
Successfully installed clip-1.0 ftfy-6.1.1


# 登入 huggingface 
---

In [None]:
! huggingface-cli login
# from huggingface_hub import notebook_login
# notebook_login()


        _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
        _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
        _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
        _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
        _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

        To login, `huggingface_hub` now requires a token generated from https://huggingface.co/settings/tokens.
        (Deprecated, will be removed in v0.3.0) To login with username and password instead, interrupt with Ctrl+C.
        
Token: 
Login successful
Your token has been saved to /root/.huggingface/token


# 安裝加速器
---

In [None]:
%%capture
!pip install accelerate deepspeed

In [None]:
!accelerate config

In which compute environment are you running? ([0] This machine, [1] AWS (Amazon SageMaker)): 0
Which type of machine are you using? ([0] No distributed training, [1] multi-CPU, [2] multi-GPU, [3] TPU): 3
What is the name of the function in your script that should be launched in all parallel scripts? [main]: 
How many TPU cores should be used for distributed training? [1]:


In [None]:
!accelerate test

# 下載 orchid219_classification 程式碼
--- 

In [None]:
! git clone https://gary109:Gygy844109109@gitlab.com/gary109/orchid219_classification.git

Cloning into 'orchid219_classification'...
remote: Enumerating objects: 87, done.[K
remote: Counting objects: 100% (87/87), done.[K
remote: Compressing objects: 100% (85/85), done.[K
remote: Total 87 (delta 42), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (87/87), done.


In [None]:
%cd orchid219_classification

/content/orchid219_classification


# 載入 orchid219 訓練資料集
---

In [None]:
from datasets import load_dataset
dataset = load_dataset("orchid219.py", use_auth_token=True)
dataset

No config specified, defaulting to: orchid219/orchid219


Downloading and preparing dataset orchid219/orchid219 to /root/.cache/huggingface/datasets/orchid219/orchid219/1.0.0/8f8444a00f455cca182e267fafef70db843b3dd0d3ddb264f27c2accbf34d75e...


Downloading data:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Dataset orchid219 downloaded and prepared to /root/.cache/huggingface/datasets/orchid219/orchid219/1.0.0/8f8444a00f455cca182e267fafef70db843b3dd0d3ddb264f27c2accbf34d75e. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['filename', 'image', 'category'],
        num_rows: 1971
    })
    validation: Dataset({
        features: ['filename', 'image', 'category'],
        num_rows: 219
    })
})

# 計算 ＩＭＡＧＥ's ＳＴＤ ＆ ＭＥＡＮ

In [None]:
import cv2, os, argparse
import numpy as np
from tqdm import tqdm
import shutil
from pathlib import Path
import os

orchid219_std = []
orchid219_mean = []
def delete_ipynb_checkpoints():
    # delete all .ipynb_checkpoints dir
    for filename in Path(os.getcwd()).glob('**/*.ipynb_checkpoints'):
        try:
            shutil.rmtree(filename)
        except OSError as e:
            print(e)
        else: 
            print("The %s is deleted successfully" % (filename))    

def process_image_std_mean():
    
    delete_ipynb_checkpoints()
    m_list, s_list = [], []
    for c in dataset:
        for ds in tqdm(dataset[c]):
            img = cv2.imread(ds['filename'])
            img = img / 255.0
            m, s = cv2.meanStdDev(img)
            m_list.append(m.reshape((3,)))
            s_list.append(s.reshape((3,)))    
          
    m_array = np.array(m_list)
    s_array = np.array(s_list)
    m = m_array.mean(axis=0, keepdims=True)
    s = s_array.mean(axis=0, keepdims=True)

    # print(f" mean:{m[0][::-1]} std:{s[0][::-1]} ")
    return s[0][::-1], m[0][::-1]

orchid219_std,orchid219_mean = process_image_std_mean()
print(orchid219_std,orchid219_mean)

100%|██████████| 1971/1971 [00:22<00:00, 88.65it/s]
100%|██████████| 219/219 [00:02<00:00, 86.43it/s]

[0.21403854 0.21571221 0.21655118] [0.48058045 0.42326896 0.36735169]





In [None]:
from transformers.utils.dummy_vision_objects import ImageGPTFeatureExtractor
import random
from PIL import ImageDraw, ImageFont, Image

def show_examples(ds, seed: int = 1234, examples_per_class: int = 3, size=(100, 100)):

    w, h = size
    labels = ds['train'].features['category'].names
    labels = labels[:9]
    grid = Image.new('RGB', size=(examples_per_class * w, len(labels) * h))
    draw = ImageDraw.Draw(grid)
    font = ImageFont.truetype("./fonts/LiberationMono-Bold.ttf", 24)
    for label_id, label in enumerate(labels):

        # Filter the dataset by a single label, shuffle it, and grab a few samples
        ds_slice = ds['train'].filter(lambda ex: ex['category'] == label_id).shuffle(seed).select(range(examples_per_class))

        # Plot this label's examples along a row
        for i, example in enumerate(ds_slice):
            image = example['image']
            idx = examples_per_class * label_id + i
            box = (idx % examples_per_class * w, idx // examples_per_class * h)
            grid.paste(image.resize(size), box=box)
            draw.text(box, str(label), (255, 255, 255), font=font)

    return grid

show_examples(dataset, seed=random.randint(0, 1337), examples_per_class=3)
# dataset['train'][0]['image']

In [None]:
dataset.push_to_hub("gary109/orchid219")

# 產出 feature_extractor FOR Orchid129 ===> google/vit-base-patch16-224-in21k


In [None]:
import transformers
from transformers import AutoFeatureExtractor
feature_extractor = AutoFeatureExtractor.from_pretrained(
        pretrained_model_name_or_path="google/vit-base-patch16-224-in21k",
        feature_extractor_name="google/vit-base-patch16-224-in21k",
        use_auth_token=True,
    )

# mean:[0.48058045 0.42326896 0.36735169] std:[0.21403854 0.21571221 0.21655118] 

ViTFeatureExtractor {
  "do_normalize": true,
  "do_resize": true,
  "feature_extractor_type": "ViTFeatureExtractor",
  "image_mean": [
    0.5,
    0.5,
    0.5
  ],
  "image_std": [
    0.5,
    0.5,
    0.5
  ],
  "resample": 2,
  "size": 224
}

In [None]:
feature_extractor.image_mean = [0.48058045,0.42326896,0.36735169]
feature_extractor.image_std = [0.21403854, 0.21571221, 0.21655118] 

In [None]:
feature_extractor

ViTFeatureExtractor {
  "do_normalize": true,
  "do_resize": true,
  "feature_extractor_type": "ViTFeatureExtractor",
  "image_mean": [
    0.48058045,
    0.42326896,
    0.36735169
  ],
  "image_std": [
    0.21403854,
    0.21571221,
    0.21655118
  ],
  "resample": 2,
  "size": 224
}

In [None]:
feature_extractor.push_to_hub('gary109/orchid219_feature_extractor',use_auth_token=True)

Cloning https://huggingface.co/gary109/orchid219_feature_extractor_vit-base-patch16-224-in21k into local empty directory.
remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/gary109/orchid219_feature_extractor_vit-base-patch16-224-in21k
   8c1b545..cc8872b  main -> main



'https://huggingface.co/gary109/orchid219_feature_extractor_vit-base-patch16-224-in21k/commit/cc8872b4873513c51281aa51489a86073d844210'

# 產出 feature_extractor FOR Orchid129 ===> facebook/data2vec-vision-base


In [None]:
import transformers
from transformers import AutoFeatureExtractor,AutoModelForImageClassification,AutoConfig
config = AutoConfig.from_pretrained("facebook/data2vec-vision-base",)
feature_extractor = AutoFeatureExtractor.from_pretrained(
        pretrained_model_name_or_path="facebook/data2vec-vision-base",
        feature_extractor_name="facebook/data2vec-vision-base",
        use_auth_token=True)
model = AutoModelForImageClassification.from_pretrained("facebook/data2vec-vision-base",use_auth_token=True)
feature_extractor
# mean:[0.48058045 0.42326896 0.36735169] std:[0.21403854 0.21571221 0.21655118] 

Some weights of Data2VecVisionForImageClassification were not initialized from the model checkpoint at facebook/data2vec-vision-base and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BeitFeatureExtractor {
  "crop_size": 224,
  "do_center_crop": false,
  "do_normalize": true,
  "do_resize": true,
  "feature_extractor_type": "BeitFeatureExtractor",
  "image_mean": [
    0.5,
    0.5,
    0.5
  ],
  "image_std": [
    0.5,
    0.5,
    0.5
  ],
  "reduce_labels": false,
  "resample": 3,
  "size": 224
}

In [None]:
feature_extractor.image_mean = [0.48058045,0.42326896,0.36735169]
feature_extractor.image_std = [0.21403854, 0.21571221, 0.21655118] 

In [None]:
feature_extractor

BeitFeatureExtractor {
  "crop_size": 224,
  "do_center_crop": false,
  "do_normalize": true,
  "do_resize": true,
  "feature_extractor_type": "BeitFeatureExtractor",
  "image_mean": [
    0.48058045,
    0.42326896,
    0.36735169
  ],
  "image_std": [
    0.21403854,
    0.21571221,
    0.21655118
  ],
  "reduce_labels": false,
  "resample": 3,
  "size": 224
}

In [None]:
config.push_to_hub('gary109/orchid219_data2vec-vision-base',use_auth_token=True)
model.push_to_hub('gary109/orchid219_data2vec-vision-base',use_auth_token=True)
feature_extractor.push_to_hub('gary109/orchid219_data2vec-vision-base',use_auth_token=True)

Upload file pytorch_model.bin:   0%|          | 3.34k/327M [00:00<?, ?B/s]

remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/gary109/orchid219_data2vec-vision-base
   eb27f63..a0eccdd  main -> main



# 產出 feature_extractor FOR Orchid129 ===> facebook/data2vec-vision-large


In [None]:
import transformers
from transformers import AutoFeatureExtractor,AutoModelForImageClassification,AutoConfig
config = AutoConfig.from_pretrained("facebook/data2vec-vision-large",)
feature_extractor = AutoFeatureExtractor.from_pretrained(
        pretrained_model_name_or_path="facebook/data2vec-vision-large",
        feature_extractor_name="facebook/data2vec-vision-large",
        use_auth_token=True)
model = AutoModelForImageClassification.from_pretrained("facebook/data2vec-vision-large",use_auth_token=True)
feature_extractor
# mean:[0.48058045 0.42326896 0.36735169] std:[0.21403854 0.21571221 0.21655118] 

Downloading:   0%|          | 0.00/1.01k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/302 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.13G [00:00<?, ?B/s]

Some weights of Data2VecVisionForImageClassification were not initialized from the model checkpoint at facebook/data2vec-vision-large and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BeitFeatureExtractor {
  "crop_size": 224,
  "do_center_crop": false,
  "do_normalize": true,
  "do_resize": true,
  "feature_extractor_type": "BeitFeatureExtractor",
  "image_mean": [
    0.5,
    0.5,
    0.5
  ],
  "image_std": [
    0.5,
    0.5,
    0.5
  ],
  "reduce_labels": false,
  "resample": 3,
  "size": 224
}

In [None]:
feature_extractor.image_mean = [0.48058045,0.42326896,0.36735169]
feature_extractor.image_std = [0.21403854, 0.21571221, 0.21655118] 

In [None]:
feature_extractor

BeitFeatureExtractor {
  "crop_size": 224,
  "do_center_crop": false,
  "do_normalize": true,
  "do_resize": true,
  "feature_extractor_type": "BeitFeatureExtractor",
  "image_mean": [
    0.48058045,
    0.42326896,
    0.36735169
  ],
  "image_std": [
    0.21403854,
    0.21571221,
    0.21655118
  ],
  "reduce_labels": false,
  "resample": 3,
  "size": 224
}

In [None]:
config.push_to_hub('gary109/orchid219_data2vec-vision-large',use_auth_token=True)

Cloning https://huggingface.co/gary109/orchid219_data2vec-vision-large into local empty directory.


Download file pytorch_model.bin:   0%|          | 2.01k/1.13G [00:00<?, ?B/s]

Clean file pytorch_model.bin:   0%|          | 1.00k/1.13G [00:00<?, ?B/s]

remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/gary109/orchid219_data2vec-vision-large
   a9e2135..72862eb  main -> main



'https://huggingface.co/gary109/orchid219_data2vec-vision-large/commit/72862ebd5dc6c3419cd91a598617b9229a0beb5f'

In [None]:
model.push_to_hub('gary109/orchid219_data2vec-vision-large',use_auth_token=True)
feature_extractor.push_to_hub('gary109/orchid219_data2vec-vision-large',use_auth_token=True)

Cloning https://huggingface.co/gary109/orchid219_data2vec-vision-large into local empty directory.


Upload file pytorch_model.bin:   0%|          | 3.33k/1.13G [00:00<?, ?B/s]

remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/gary109/orchid219_data2vec-vision-large
   7029e10..46708a8  main -> main

remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/gary109/orchid219_data2vec-vision-large
   46708a8..a9e2135  main -> main



'https://huggingface.co/gary109/orchid219_data2vec-vision-large/commit/a9e2135dd5adeccba2657b4a32a073b336e5fb77'

# FOR TPU needs
---

In [None]:
%%capture
!pip uninstall -y torch
!pip install torch==1.8.2+cpu torchvision==0.9.2+cpu -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
!pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.8-cp37-cp37m-linux_x86_64.whl

# 開始訓練
---

## Fine-Tune Pre-Trained Model
---
- google
    - google/vit-base-patch16-224-in21k
    - google/vit-base-patch32-224-in21k
    - google/vit-large-patch16-224-in21k
    - google/vit-large-patch32-224-in21k
    - google/vit-huge-patch14-224-in21k
---
- gary109
    - gary109/orchid219_pretrain_vit-mae-base
    - gary109/orchid219_pretrain_vit-mae-large
    - gary109/orchid219_pretrain_vit-base-patch16-224-in21k-mae
    - gary109/orchid219_pretrain_data2vec-vision-base-mae
    - gary109/orchid219_pretrain_vit-mae-large
    - gary109/orchid219_pretrain_vit-base-mim
---


## google/vit-base-patch16-224-in21k
---

In [None]:
!accelerate launch run_image_classification.py \
    --dataset_name "gary109/orchid219" \
    --model_name_or_path="google/vit-base-patch16-224-in21k" \
    --feature_extractor_name='gary109/orchid219_feature_extractor' \
    --output_dir="./orchid219_ft_vit-base-patch16-224-in21k" \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train \
    --do_eval \
    --push_to_hub \
    --push_to_hub_model_id orchid219_vit-base-patch16-224-in21k \
    --learning_rate 2e-5 \
    --num_train_epochs 5 \
    --per_device_train_batch_size 8 \
    --per_device_eval_batch_size 8 \
    --logging_strategy steps \
    --logging_steps 10 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 3 \
    --use_auth_token \
    --seed 1337 \
    --cache_dir="./cache_orchid219_vit-base-patch16-224-in21k"
    # --gradient_accumulation_steps 8 \
    # --gradient_checkpointing

## google/vit-base-patch32-224-in21k
---

In [None]:
! accelerate launch run_image_classification.py \
    --dataset_name "orchid219" \
    --model_name_or_path "google/vit-base-patch32-224-in21k" \
    --output_dir ./orchid219_vit-base-patch32-224-in21k/ \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train \
    --do_eval \
    --push_to_hub \
    --push_to_hub_model_id orchid219_vit-base-patch32-224-in21k \
    --learning_rate 2e-5 \
    --num_train_epochs 20 \
    --per_device_train_batch_size 80 \
    --per_device_eval_batch_size 16 \
    --logging_strategy steps \
    --logging_steps 10 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 3 \
    --use_auth_token \
    --seed 1337
    # --gradient_accumulation_steps 8 \
    # --gradient_checkpointing

## google/vit-large-patch16-224-in21k
---

In [None]:
! accelerate launch run_image_classification.py \
    --dataset_name "orchid219" \
    --model_name_or_path "google/vit-large-patch16-224-in21k" \
    --output_dir "./orchid219_vit-large-patch16-224-in21k" \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train \
    --do_eval \
    --push_to_hub \
    --push_to_hub_model_id "orchid219_vit-large-patch16-224-in21k" \
    --learning_rate 2e-5 \
    --num_train_epochs 20 \
    --per_device_train_batch_size 16 \
    --per_device_eval_batch_size 16 \
    --logging_strategy steps \
    --logging_steps 10 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 3 \
    --use_auth_token \
    --seed 1337
    # --gradient_accumulation_steps 8 \
    # --gradient_checkpointing

## google/vit-large-patch32-224-in21k
---

In [None]:
! accelerate launch run_image_classification.py \
    --dataset_name "gary109/orchid219" \
    --model_name_or_path "google/vit-large-patch32-224-in21k" \
    --output_dir "./orchid219_vit-large-patch32-224-in21k" \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train \
    --do_eval \
    --push_to_hub \
    --push_to_hub_model_id "orchid219_vit-large-patch32-224-in21k" \
    --learning_rate 2e-5 \
    --num_train_epochs 20 \
    --per_device_train_batch_size 16 \
    --per_device_eval_batch_size 16 \
    --logging_strategy steps \
    --logging_steps 10 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 1 \
    --use_auth_token \
    --seed 1337
    # --gradient_accumulation_steps 8 \
    # --gradient_checkpointing

## google/vit-huge-patch14-224-in21k
---

In [None]:
! accelerate launch run_image_classification.py \
    --dataset_name "gary109/orchid219" \
    --model_name_or_path "google/vit-huge-patch14-224-in21k" \
    --output_dir "./orchid219_vit-huge-patch14-224-in21k" \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train \
    --do_eval \
    --push_to_hub \
    --push_to_hub_model_id "orchid219_vit-huge-patch14-224-in21k" \
    --learning_rate 2e-5 \
    --num_train_epochs 20 \
    --per_device_train_batch_size 4 \
    --per_device_eval_batch_size 4 \
    --logging_strategy steps \
    --logging_steps 5 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 1 \
    --use_auth_token \
    --seed 1337 \
    --gradient_accumulation_steps 8 \
    --gradient_checkpointing

## gary109/orchid219_pretrain_vit-mae-large
---



In [None]:
!accelerate launch run_image_classification_ViT-MAE.py \
    --dataset_name "gary109/orchid219" \
    --model_name_or_path "gary109/orchid219_pretrain_vit-mae-large" \
    --output_dir="./orchid219_vit-mae-large_ft/" \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train \
    --do_eval \
    --push_to_hub \
    --push_to_hub_model_id="orchid219_vit-mae-large" \
    --learning_rate 2e-5 \
    --num_train_epochs 100 \
    --per_device_train_batch_size 16 \
    --per_device_eval_batch_size 16 \
    --logging_strategy steps \
    --logging_steps 10 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 3 \
    --use_auth_token \
    --seed 1337 
    # --cache_dir="./cache_test/"
    # --gradient_accumulation_steps 8 \
    # --gradient_checkpointing

# --model_name_or_path "gary109/orchid219_pretrain_vit-base-patch16-224-in21k-mae" \
    #orchid219_pretrain_vit-base-patch16-224-in21k-mae
    # --model_name_or_path "gary109/orchid219_vit-base-patch16-224-in21k" 

## gary109/orchid219_ft_pretrain_vit-base-mim
---

In [None]:
!accelerate launch run_image_classification.py \
    --model_name_or_path "gary109/orchid219_pretrain_vit-base-mim" \
    --dataset_name "gary109/orchid219" \
    --output_dir="orchid219_ft_pretrain_vit-base-mim/" \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train \
    --do_eval \
    --push_to_hub \
    --push_to_hub_model_id="orchid219_ft_pretrain_vit-base-mim" \
    --hub_token="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" \
    --learning_rate 2e-5 \
    --num_train_epochs 400 \
    --per_device_train_batch_size 64 \
    --per_device_eval_batch_size 64 \
    --logging_strategy steps \
    --logging_steps 10 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 1 \
    --use_auth_token="True" \
    --seed 1337 

    # --gradient_accumulation_steps 8 \
    # --gradient_checkpointing

## [暫時未成功] openai/clip-vit-base-patch16
---

In [None]:
!accelerate launch run_image_classification.py \
    --model_name_or_path "google/vit-base-patch16-224-in21k" \
    --dataset_name "gary109/orchid219" \
    --output_dir="orchid219_ft_clip-vit-base-patch16-224-in21k" \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train \
    --do_eval \
    --push_to_hub \
    --push_to_hub_model_id="orchid219_ft_clip-vit-base-patch16-224-in21k" \
    --hub_token="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" \
    --learning_rate 2e-5 \
    --num_train_epochs 400 \
    --per_device_train_batch_size 8 \
    --per_device_eval_batch_size 8 \
    --logging_strategy steps \
    --logging_steps 10 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 1 \
    --use_auth_token="True" \
    --seed 1337 

    # --gradient_accumulation_steps 8 \
    # --gradient_checkpointing

## gary109/orchid219_ft_data2vec-vision-base ===> facebook/data2vec-vision-base
---

In [None]:
!accelerate launch run_image_classification.py \
    --dataset_name "gary109/orchid219" \
    --model_name_or_path "gary109/orchid219_data2vec-vision-base" \
    --output_dir="orchid219_ft_data2vec-vision-base" \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train --do_eval --push_to_hub \
    --push_to_hub_model_id="orchid219_ft_data2vec-vision-base" \
    --learning_rate 2e-5 \
    --num_train_epochs 50 \
    --per_device_train_batch_size 64 \
    --per_device_eval_batch_size 64 \
    --logging_strategy steps \
    --logging_steps 10 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 3 \
    --use_auth_token \
    --use_auth_token="True" \
    --seed 1337 

# --gradient_accumulation_steps 8 \
# --gradient_checkpointing

## gary109/orchid219_ft_pretrain_data2vec-vision-base-mae 
---

In [None]:
!accelerate launch run_image_classification_Data2Vec-MAE.py \
    --dataset_name "gary109/orchid219" \
    --model_name_or_path "gary109/orchid219_pretrain_data2vec-vision-base-mae" \
    --output_dir="orchid219_ft_pretrain_data2vec-vision-base-mae" \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train --do_eval --push_to_hub \
    --push_to_hub_model_id="orchid219_ft_pretrain_data2vec-vision-base-mae" \
    --learning_rate 2e-4 \
    --num_train_epochs 200 \
    --per_device_train_batch_size 32 \
    --per_device_eval_batch_size 32 \
    --logging_strategy steps \
    --logging_steps 10 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 3 \
    --use_auth_token="True" \
    --seed 1337 

# --gradient_accumulation_steps 8 \
# --gradient_checkpointing

[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
                                 [A[INFO|trainer.py:2345] 2022-05-19 04:19:48,969 >> Saving model checkpoint to orchid219_ft_pretrain_data2vec-vision-base-mae/checkpoint-1302
[INFO|configuration_utils.py:446] 2022-05-19 04:19:48,973 >> Configuration saved in orchid219_ft_pretrain_data2vec-vision-base-mae/checkpoint-1302/config.json
[INFO|modeling_utils.py:1546] 2022-05-19 04:19:50,647 >> Model weights saved in orchid219_ft_pretrain_data2vec-vision-base-mae/checkpoint-1302/pytorch_model.bin
[INFO|feature_extraction_utils.py:351] 2022-05-19 04:19:50,648 >> Feature extractor saved in orchid219_ft_pretrain_data2vec-vision-base-mae/checkpoint-1302/preprocessor_config.json
[INFO|trainer.py:2453] 2022-05-19 04:19:54,268 >> Deleting older checkpoint [orchid219_ft_pretrain_data2vec-vision-base-mae/checkpoint-1116] due to args.save_total_limit
 11% 1310/12400 [12:47<2:06:01,  1.47it/s]{'loss': 1.1568, 'learning_rate': 0.0001788709677419355, 'epoch': 21.13}
   

## gary109/orchid219_ft_data2vec-vision-large
---

In [None]:
!accelerate launch run_image_classification.py \
    --dataset_name "gary109/orchid219" \
    --model_name_or_path "facebook/data2vec-vision-large" \
    --output_dir="orchid219_ft_data2vec-vision-large" \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train --do_eval --push_to_hub \
    --push_to_hub_model_id="orchid219_ft_data2vec-vision-large" \
    --learning_rate 2e-5 \
    --num_train_epochs 400 \
    --per_device_train_batch_size 64 \
    --per_device_eval_batch_size 64 \
    --logging_strategy steps \
    --logging_steps 10 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 3 \
    --use_auth_token \
    --seed 1337 

# --gradient_accumulation_steps 8 \
# --gradient_checkpointing

## gary109/orchid219_ft_data2vec-vision-base-ft1k 
---

In [None]:
!accelerate launch run_image_classification_data2vec.py \
    --dataset_name "gary109/orchid219" \
    --model_name_or_path "facebook/data2vec-vision-base-ft1k" \
    --output_dir="orchid219_ft_data2vec-vision-base-ft1k" \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train --do_eval --push_to_hub \
    --push_to_hub_model_id="orchid219_ft_data2vec-vision-base-ft1k" \
    --learning_rate 2e-5 \
    --num_train_epochs 400 \
    --per_device_train_batch_size 64 \
    --per_device_eval_batch_size 64 \
    --logging_strategy steps \
    --logging_steps 10 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 3 \
    --use_auth_token \
    --seed 1337 

# --gradient_accumulation_steps 8 \
# --gradient_checkpointing

 58% 7250/12400 [2:37:41<1:12:32,  1.18it/s]{'loss': 0.9712, 'learning_rate': 8.306451612903227e-06, 'epoch': 233.87}
 58% 7254/12400 [2:37:44<1:10:37,  1.21it/s][INFO|trainer.py:2625] 2022-05-19 13:34:43,816 >> ***** Running Evaluation *****
[INFO|trainer.py:2627] 2022-05-19 13:34:43,817 >>   Num examples = 219
[INFO|trainer.py:2630] 2022-05-19 13:34:43,817 >>   Batch size = 64

  0% 0/4 [00:00<?, ?it/s][A
 50% 2/4 [00:00<00:00,  9.02it/s][A
 75% 3/4 [00:00<00:00,  6.38it/s][A
                                            
{'eval_loss': 1.2681431770324707, 'eval_accuracy': 0.7671232876712328, 'eval_runtime': 4.2472, 'eval_samples_per_second': 51.563, 'eval_steps_per_second': 0.942, 'epoch': 234.0}
 58% 7254/12400 [2:37:49<1:10:37,  1.21it/s]
100% 4/4 [00:00<00:00,  7.04it/s][A
                                 [A[INFO|trainer.py:2345] 2022-05-19 13:34:48,066 >> Saving model checkpoint to orchid219_ft_data2vec-vision-base-ft1k/checkpoint-7254
[INFO|configuration_utils.py:446] 2022-05

## gary109/orchid219_ft_data2vec-vision-large-ft1k 
---

In [None]:
!accelerate launch run_image_classification_data2vec.py \
    --dataset_name "gary109/orchid219" \
    --model_name_or_path "facebook/data2vec-vision-large-ft1k" \
    --output_dir="orchid219_ft_data2vec-vision-large-ft1k" \
    --remove_unused_columns False \
    --overwrite_output_dir \
    --do_train --do_eval --push_to_hub \
    --push_to_hub_model_id="orchid219_ft_data2vec-vision-large-ft1k" \
    --learning_rate 2e-5 \
    --num_train_epochs 400 \
    --per_device_train_batch_size 32 \
    --per_device_eval_batch_size 32 \
    --logging_strategy steps \
    --logging_steps 10 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end True \
    --save_total_limit 3 \
    --use_auth_token \
    --seed 1337 

# --gradient_accumulation_steps 8 \
# --gradient_checkpointing


  0% 0/4 [00:00<?, ?it/s][A
 50% 2/4 [00:00<00:00,  9.25it/s][A
 75% 3/4 [00:00<00:00,  6.51it/s][A
                                        
[A{'eval_loss': 5.367639064788818, 'eval_accuracy': 0.0136986301369863, 'eval_runtime': 4.1237, 'eval_samples_per_second': 53.107, 'eval_steps_per_second': 0.97, 'epoch': 2.0}
  0% 62/12400 [03:02<2:44:24,  1.25it/s]
100% 4/4 [00:00<00:00,  7.19it/s][A
                                 [A[INFO|trainer.py:2345] 2022-05-19 11:00:00,368 >> Saving model checkpoint to orchid219_ft_data2vec-vision-base-ft1k/checkpoint-62
[INFO|configuration_utils.py:446] 2022-05-19 11:00:00,371 >> Configuration saved in orchid219_ft_data2vec-vision-base-ft1k/checkpoint-62/config.json
[INFO|modeling_utils.py:1546] 2022-05-19 11:00:02,091 >> Model weights saved in orchid219_ft_data2vec-vision-base-ft1k/checkpoint-62/pytorch_model.bin
[INFO|feature_extraction_utils.py:351] 2022-05-19 11:00:02,092 >> Feature extractor saved in orchid219_ft_data2vec-vision-base-ft1k/ch

# Test Model
---



## 製作 Public Test Datasets with Datasets

In [None]:
import csv
import pandas as pd
import os

In [None]:
# !wget -O /content/drive/MyDrive/datasets/Orchid219/Public_Test/Public_Test.zip http://images.cocodataset.org/zips/val2017.zip
!unzip /content/drive/MyDrive/datasets/Orchid219/Public_Test/Public_Test.zip -d /content/drive/MyDrive/datasets/Orchid219/Public_Test/

In [None]:
from datasets import load_dataset
# # dataset = load_dataset("/content/ai-cup-2022-crop_classification/datasets/crop14.py", 'crop14-small')
# dataset = load_dataset("/content/crop14.py", 'crop14-balance')
# dataset = load_dataset("gary109/crop14_balance", use_auth_token=True)
# dataset = load_dataset("gary109/crop14-small", use_auth_token=True)
# dataset = load_dataset("gary109/crop14-pretrain", use_auth_token=True, cache_dir='/content/drive/MyDrive/datasets/crop14-pretrain')
# dataset = load_dataset("gary109/crop14_balance", use_auth_token=True, cache_dir='/content/drive/MyDrive/datasets/cache_crop14-balance')
# dataset = load_dataset("STAS_colab.py","public-test", use_auth_token=True, cache_dir='/content/drive/MyDrive/datasets/cache_STAS')
dataset = load_dataset("orchid219.py","public-test", use_auth_token=True, cache_dir='/content/drive/MyDrive/datasets/cache_orchid219_public_test')
dataset

Reusing dataset orchid219 (/content/drive/MyDrive/datasets/cache_orchid219_public_test/orchid219/public-test/1.0.0/8f8444a00f455cca182e267fafef70db843b3dd0d3ddb264f27c2accbf34d75e)


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    test: Dataset({
        features: ['filename', 'image'],
        num_rows: 19
    })
})

In [None]:
dataset.push_to_hub("gary109/orchid219_public-test")

Pushing split test to the Hub.


Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

## 預測 Public Test Dataset Results using Fine-Tune Models
---
- gary109/orchid219_ft_vit-base-patch16-224-in21k
- gary109/orchid219_ft_vit-large-patch16-224-in21k
- gary109/orchid219_ft_vit-huge-patch14-224-in21k
- gary109/orchid219_ft_data2vec-vision-base
- gary109/orchid219_ft_vit-base-patch32-224-in21k
- gary109/orchid219_ft_vit-large-patch32-224-in21k
- gary109/orchid219_ft_vit-base-mae
- gary109/orchid219_ft_vit-mae-large

In [None]:
from transformers import ViTFeatureExtractor, ViTForImageClassification, BeitFeatureExtractor, Data2VecVisionForImageClassification
from PIL import Image
import requests
from pathlib import Path
from datetime import datetime
from tqdm import tqdm

ft_models = [
            #  'gary109/orchid219_ft_vit-base-patch16-224-in21k',
            #  'gary109/orchid219_ft_vit-large-patch16-224-in21k',
            #  'gary109/orchid219_ft_vit-huge-patch14-224-in21k'
             'gary109/orchid219_ft_data2vec-vision-base',
            #  'gary109/orchid219_ft_vit-base-patch32-224-in21k',
            #  'gary109/orchid219_ft_vit-large-patch32-224-in21k',
             'gary109/orchid219_ft_vit-base-mae',
            #  'gary109/orchid219_ft_vit-mae-large'
]

root_results = './results/'
Path(root_results).mkdir(parents=True, exist_ok=True)

for m in ft_models:
    print(f'Using {m} ===> Predicting ...')
    if 'data2vec-vision-base' in m:
        feature_extractor = BeitFeatureExtractor.from_pretrained(m)
        model = Data2VecVisionForImageClassification.from_pretrained(m)
    else:
        feature_extractor = ViTFeatureExtractor.from_pretrained(m, use_auth_token=True)
        model = ViTForImageClassification.from_pretrained(m, use_auth_token=True)

    
    file_list = []
    label_list = []
    for ds in tqdm(dataset['test']):
        # print(ds['filename'])
        file_list.append(os.path.basename(ds['filename']))

        inputs = feature_extractor(images=ds['image'], return_tensors="pt")
        outputs = model(**inputs)
        logits = outputs.logits
        # model predicts one of the 219 Orchid classes
        predicted_class_idx = logits.argmax(-1).item()
        # print("Predicted class:", model.config.id2label[predicted_class_idx])
        label_list.append(model.config.id2label[predicted_class_idx])

    # Generate CSV file
    df = pd.DataFrame()
    modelName = os.path.basename(m)
    dateStr = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    csv_path = os.path.join(root_results, f'{modelName}_{dateStr}.csv')
    df['file'] = file_list
    df['label'] = label_list
    df.to_csv(csv_path, index=False, encoding="utf8")
    del model

Using gary109/orchid219_ft_data2vec-vision-base ===> Predicting ...


Downloading:   0%|          | 0.00/302 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/8.05k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/328M [00:00<?, ?B/s]

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
100%|██████████| 19/19 [00:09<00:00,  1.99it/s]


Using gary109/orchid219_ft_vit-base-mae ===> Predicting ...


Downloading:   0%|          | 0.00/228 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/7.71k [00:00<?, ?B/s]

You are using a model of type vit_mae to instantiate a model of type vit. This is not supported for all configurations of models and can yield errors.


Downloading:   0%|          | 0.00/328M [00:00<?, ?B/s]

100%|██████████| 19/19 [00:08<00:00,  2.22it/s]


# Git Push

In [None]:
! git add .
! git commit -am "submit function ready"
! git push

[main 845c131] submit function ready
 4 files changed, 80 insertions(+)
 create mode 100644 results/orchid219_ft_data2vec-vision-base_2022-05-10-04-40-44.csv
 create mode 100644 results/orchid219_ft_vit-base-mae_2022-05-10-04-41-02.csv
 create mode 100644 results/orchid219_ft_vit-base-patch16-224-in21k_2022-05-10-04-35-31.csv
 create mode 100644 results/orchid219_ft_vit-large-patch16-224-in21k_2022-05-10-04-36-36.csv
Counting objects: 6, done.
Delta compression using up to 4 threads.
Compressing objects: 100% (6/6), done.
Writing objects: 100% (6/6), 962 bytes | 962.00 KiB/s, done.
Total 6 (delta 2), reused 0 (delta 0)
To https://gitlab.com/gary109/orchid219_classification.git
   d3f3779..845c131  main -> main
