In [1]:
!ls ../input/stratified-nwpu-resisc45-500-trainval7015/STRATIFIED_NWPU-RESISC45-500-TRAINVAL7015

train  val


In [2]:
!nvidia-smi

Fri Jun 13 03:51:57 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off |   00000000:00:04.0 Off |                    0 |
| N/A   31C    P0             25W /  250W |       0MiB /  16384MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
import tensorflow as tf
import numpy as np

import joblib
import os

2025-06-13 03:51:58.860496: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749786719.047983      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749786719.102147      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
tf.config.list_logical_devices()

I0000 00:00:1749786732.185786      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


[LogicalDevice(name='/device:CPU:0', device_type='CPU'),
 LogicalDevice(name='/device:GPU:0', device_type='GPU')]

# Data Preparation

In [5]:
path_ds = "../input/stratified-nwpu-resisc45-500-trainval7015/STRATIFIED_NWPU-RESISC45-500-TRAINVAL7015"

In [6]:
import datasets
 
def create_image_folder_dataset(root_path):
  """creates `Dataset` from image folder structure"""
 
  # get class names by folders names
  _CLASS_NAMES= os.listdir(root_path)
  # defines `datasets` features`
  features=datasets.Features({
                      "img": datasets.Image(),
                      "label": datasets.features.ClassLabel(names=_CLASS_NAMES),
                  })
  # temp list holding datapoints for creation
  img_data_files=[]
  label_data_files=[]
  # load images into list for creation
  for img_class in os.listdir(root_path):
    for img in os.listdir(os.path.join(root_path,img_class)):
      path_=os.path.join(root_path,img_class,img)
      img_data_files.append(path_)
      label_data_files.append(img_class)
  # create dataset
  ds = datasets.Dataset.from_dict({"img":img_data_files,"label":label_data_files},features=features)
  return ds
 

## Load Data

In [7]:
train_ds = create_image_folder_dataset(f"{path_ds}/train")
train_ds

Dataset({
    features: ['img', 'label'],
    num_rows: 15750
})

In [8]:
val_ds = create_image_folder_dataset(f"{path_ds}/val")
val_ds

Dataset({
    features: ['img', 'label'],
    num_rows: 3375
})

In [9]:
classes = train_ds.features["label"].names
classes

['airplane',
 'cloud',
 'mountain',
 'medium_residential',
 'thermal_power_station',
 'terrace',
 'commercial_area',
 'dense_residential',
 'baseball_diamond',
 'mobile_home_park',
 'ship',
 'airport',
 'river',
 'golf_course',
 'roundabout',
 'church',
 'circular_farmland',
 'overpass',
 'railway',
 'wetland',
 'lake',
 'parking_lot',
 'intersection',
 'tennis_court',
 'runway',
 'industrial_area',
 'chaparral',
 'bridge',
 'sparse_residential',
 'freeway',
 'sea_ice',
 'beach',
 'palace',
 'snowberg',
 'meadow',
 'ground_track_field',
 'harbor',
 'rectangular_farmland',
 'island',
 'basketball_court',
 'desert',
 'stadium',
 'forest',
 'storage_tank',
 'railway_station']

## Preprocess

In [10]:
from transformers import ViTFeatureExtractor
from tensorflow import keras
from tensorflow.keras import layers
 
model_id = "google/vit-base-patch16-224"

feature_extractor = ViTFeatureExtractor.from_pretrained(model_id)

# basic processing (only resizing)
def process(examples):
    examples.update(feature_extractor(examples['img'], ))
    return examples

preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



In [11]:
train_ds = train_ds.rename_column("label", "labels")
val_ds = val_ds.rename_column("label", "labels")

In [12]:
train_ds_preprocessed = train_ds.map(process, batched=True)
train_ds_preprocessed

Map:   0%|          | 0/15750 [00:00<?, ? examples/s]

Dataset({
    features: ['img', 'labels', 'pixel_values'],
    num_rows: 15750
})

In [13]:
val_ds_preprocessed = val_ds.map(process, batched=True) 
val_ds_preprocessed

Map:   0%|          | 0/3375 [00:00<?, ? examples/s]

Dataset({
    features: ['img', 'labels', 'pixel_values'],
    num_rows: 3375
})

## Convert DS to TF Datasets

In [14]:
id2label = {str(i): label for i, label in enumerate(classes)}
label2id = {v: k for k, v in id2label.items()}
 
num_train_epochs = 15
batch_size = 10
learning_rate = 1e-5
weight_decay_rate = 1e-2
num_warmup_steps = 0

In [15]:
from transformers import DefaultDataCollator
 
# Data collator that will dynamically pad the inputs received, as well as the labels.
data_collator = DefaultDataCollator(return_tensors="tf")
 
# converting our train dataset to tf.data.Dataset
train_data_no_augmentation = train_ds_preprocessed.to_tf_dataset(
   columns=["pixel_values"],
   label_cols=["labels"],
   shuffle=True,
   batch_size=batch_size,
   collate_fn=data_collator)

 
# converting our val dataset to tf.data.Dataset
val_data = val_ds_preprocessed.to_tf_dataset(
   columns=["pixel_values"],
   label_cols=["labels"],
   shuffle=True,
   batch_size=batch_size,
   collate_fn=data_collator)

Old behaviour: columns=['a'], labels=['labels'] -> (tf.Tensor, tf.Tensor)  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor)  
New behaviour: columns=['a'],labels=['labels'] -> ({'a': tf.Tensor}, {'labels': tf.Tensor})  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor) 


## Apply Data Prefetch

In [17]:
AUTOTUNE = tf.data.AUTOTUNE
train_data_no_augmentation = train_data_no_augmentation.prefetch(buffer_size=AUTOTUNE)
val_data = val_data.prefetch(buffer_size=AUTOTUNE)

# Modelling

In [18]:
from transformers import TFViTForImageClassification, create_optimizer
import tensorflow as tf
 
# create optimizer wight weigh decay
num_train_steps = len(train_data_with_augmentation) * num_train_epochs
optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=num_warmup_steps,
)

def vit():
    # load pre-trained ViT model
    model = TFViTForImageClassification.from_pretrained(
        model_id,
        num_labels=len(classes),
        id2label=id2label,
        label2id=label2id,
        ignore_mismatched_sizes=True
    ) 
     
    # compile model
    model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=["accuracy"]
    )
    
    model.summary()
    print(f"\n##### Optimizer Func Information\n{model.optimizer.get_config()}")
    print(f"\n##### Loss Func Information\n{model.loss.get_config()}")
    
    return model

## 15 Epochs No Augmentation

In [19]:
base_vit_p16 = vit()

config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFViTForImageClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTForImageClassification for predictions without further training.
Some weights of TFViTForImageClassification were not initialized from the model checkpoint are newly initialize

Model: "tf_vi_t_for_image_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  34605     
                                                                 
Total params: 85833261 (327.43 MB)
Trainable params: 85833261 (327.43 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________

##### Optimizer Func Information
{'name': 'AdamWeightDecay', 'learning_rate': {'module': 'keras.optimizers.schedules', 'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 1e-05, 'decay_steps': 23625, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}, 'registered_name': None}, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgra

In [20]:
%%time

history_base_vit_p16 = base_vit_p16.fit(
    train_data_no_augmentation,
    validation_data=val_data,
    epochs=num_train_epochs,
)

Epoch 1/15


I0000 00:00:1749786983.723728      96 cuda_dnn.cc:529] Loaded cuDNN version 90300


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
CPU times: user 2h 28min 47s, sys: 8min 47s, total: 2h 37min 35s
Wall time: 2h 11min 3s


In [21]:
# base_vit_p16.save("base_vit_p16_epoch0010.keras")
base_vit_p16.save_pretrained("./base_vit_p16_in1k")
joblib.dump(history_base_vit_p16.history, "history_base_vit_p16_epoch0015.joblib")

['history_base_vit_p16_epoch0015.joblib']