# ViT classification pipeline


In [None]:
from google.colab import drive
from keras.activations import sigmoid
from keras.layers import Dense, Input
from keras.losses import BinaryCrossentropy
from keras.optimizers import Adam
from keras.models import Sequential, Model

import numpy as np
import pathlib
from PIL import Image

from sklearn.metrics import accuracy_score

from transformers import ViTImageProcessor, ViTModel, ViTConfig, TFAutoModel, ViTForImageClassification
from transformers import TrainingArguments, Trainer, TFViTForImageClassification
import tensorflow as tf
from tqdm.notebook import tqdm

import matplotlib.pyplot as plt


## Loading and Preprocessing Data

In [None]:
batch_size=32

In [None]:
drive.mount('/content/drive')
N_TRAIN = 1250
N_VAL = 250

#paths to LR and HR images
real_bicubic_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Real/Bicubic')
real_sr3_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Real/SR3')
real_srgan_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Real/SRGAN')
real_srcnn_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Real/SRCNN')

baseline_bicubic_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Baseline/Bicubic')
baseline_sr3_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Baseline/SR3')
baseline_srgan_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Baseline/SRGAN')
baseline_srcnn_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Baseline/SRCNN')

adversarial_bicubic_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Adversarial Attack/Bicubic')
adversarial_sr3_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Adversarial Attack/SR3')
adversarial_srgan_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Adversarial Attack/SRGAN')
adversarial_srcnn_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Adversarial Attack/SRCNN')

faceshift_bicubic_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Faceshift/Bicubic')
faceshift_sr3_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Faceshift/SR3')
faceshift_srgan_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Faceshift/SRGAN')
faceshift_srcnn_dir = pathlib.Path('/content/drive/MyDrive/ST456_project_team_afi/DFGC/Faceshift/SRCNN')

real_bicubic_paths = sorted([str(path) for path in real_bicubic_dir.glob('*.png')])
real_sr3_paths = sorted([str(path) for path in real_sr3_dir.glob('*.png')])
real_srgan_paths = sorted([str(path) for path in real_srgan_dir.glob('*.png')])
real_srcnn_paths = sorted([str(path) for path in real_srcnn_dir.glob('*.png')])

baseline_bicubic_paths = sorted([str(path) for path in baseline_bicubic_dir.glob('*.png')])
baseline_sr3_paths = sorted([str(path) for path in baseline_sr3_dir.glob('*.png')])
baseline_srgan_paths = sorted([str(path) for path in baseline_srgan_dir.glob('*.png')])
baseline_srcnn_paths = sorted([str(path) for path in baseline_srcnn_dir.glob('*.png')])

adversarial_bicubic_paths = sorted([str(path) for path in adversarial_bicubic_dir.glob('*.png')])
adversarial_sr3_paths = sorted([str(path) for path in adversarial_sr3_dir.glob('*.png')])
adversarial_srgan_paths = sorted([str(path) for path in adversarial_srgan_dir.glob('*.png')])
adversarial_srcnn_paths = sorted([str(path) for path in adversarial_srcnn_dir.glob('*.png')])

faceshift_bicubic_paths = sorted([str(path) for path in faceshift_bicubic_dir.glob('*.png')])
faceshift_sr3_paths = sorted([str(path) for path in faceshift_sr3_dir.glob('*.png')])
faceshift_srgan_paths = sorted([str(path) for path in faceshift_srgan_dir.glob('*.png')])
faceshift_srcnn_paths = sorted([str(path) for path in faceshift_srcnn_dir.glob('*.png')])


Mounted at /content/drive


In [None]:
def train_val_test_split(real_paths, fake_paths, n_train=N_TRAIN, n_val=N_VAL):
  """
  Split the dataset into training, validation, and test sets.

  Args:
    real_paths (list): List of real images paths.
    fake_paths (list): List of fake images paths.
    n_train (int): Number of training images.
    n_val (int): Number of validation images.
  Returns:
    real_train (list): List of real training images paths.
    fake_train (list): List of fake training images paths.
    real_val (list): List of real validation images paths.
    fake_val (list): List of fake validation images paths.
    real_test (list): List of real test images paths.
    fake_test (list): List of fake test images paths.
  """
  length = min([len(real_paths), len(fake_paths)])
  real_train = real_paths[:n_train//2]
  fake_train = fake_paths[:n_train//2]
  real_val = real_paths[n_train//2:n_train//2+n_val//2]
  fake_val = fake_paths[n_train//2:n_train//2+n_val//2]
  real_test = real_paths[n_train//2+n_val//2:length]
  fake_test = fake_paths[n_train//2+n_val//2:length]
  return real_train, fake_train, real_val, fake_val, real_test, fake_test
#
(bicubic_real_train,
 bicubic_baseline_train,
 bicubic_real_val,
 bicubic_baseline_val,
 bicubic_real_test,
 bicubic_baseline_test) =  train_val_test_split(real_bicubic_paths, baseline_bicubic_paths)

(bicubic_real_train,
 bicubic_adversarial_train,
 bicubic_real_val,
 bicubic_adversarial_val,
 bicubic_real_test,
 bicubic_adversarial_test) =  train_val_test_split(real_bicubic_paths, adversarial_bicubic_paths)

(bicubic_real_train,
 bicubic_faceshift_train,
 bicubic_real_val,
 bicubic_faceshift_val,
 bicubic_real_test,
 bicubic_faceshift_test) =  train_val_test_split(real_bicubic_paths, faceshift_bicubic_paths)
#
(srgan_real_train,
 srgan_baseline_train,
 srgan_real_val,
 srgan_baseline_val,
 srgan_real_test,
 srgan_baseline_test) =  train_val_test_split(real_srgan_paths, baseline_srgan_paths)

(srgan_real_train,
 srgan_adversarial_train,
 srgan_real_val,
 srgan_adversarial_val,
 srgan_real_test,
 srgan_adversarial_test) =  train_val_test_split(real_srgan_paths, adversarial_srgan_paths)

(srgan_real_train,
 srgan_faceshift_train,
 srgan_real_val,
 srgan_faceshift_val,
 srgan_real_test,
 srgan_faceshift_test) =  train_val_test_split(real_srgan_paths, faceshift_srgan_paths)
#
(srcnn_real_train,
 srcnn_baseline_train,
 srcnn_real_val,
 srcnn_baseline_val,
 srcnn_real_test,
 srcnn_baseline_test) =  train_val_test_split(real_srcnn_paths, baseline_srcnn_paths)

(srcnn_real_train,
 srcnn_adversarial_train,
 srcnn_real_val,
 srcnn_adversarial_val,
 srcnn_real_test,
 srcnn_adversarial_test) =  train_val_test_split(real_srcnn_paths, adversarial_srcnn_paths)

(srcnn_real_train,
 srcnn_faceshift_train,
 srcnn_real_val,
 srcnn_faceshift_val,
 srcnn_real_test,
 srcnn_faceshift_test) =  train_val_test_split(real_srcnn_paths, faceshift_srcnn_paths)

#
(sr3_real_train,
 sr3_baseline_train,
 sr3_real_val,
 sr3_baseline_val,
 sr3_real_test,
 sr3_baseline_test) =  train_val_test_split(real_sr3_paths, baseline_sr3_paths, n_train=150, n_val=0)

(sr3_real_train,
 sr3_adversarial_train,
 sr3_real_val,
 sr3_adversarial_val,
 sr3_real_test,
 sr3_adversarial_test) =  train_val_test_split(real_sr3_paths, adversarial_sr3_paths, n_train=150, n_val=0)

(sr3_real_train,
 sr3_faceshift_train,
 sr3_real_val,
 sr3_faceshift_val,
 sr3_real_test,
 sr3_faceshift_test) =  train_val_test_split(real_sr3_paths, faceshift_sr3_paths, n_train=150, n_val=0)




In [None]:
# load image processor
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

In [1]:
# @tf.function
def create_x_y(real_paths, fake_paths):
  """
  Preprocess images and labels for training.

  Args:
    real_paths: list of paths to real images.
    fake_paths: list of paths to deepfaked images.
  Returns:
    train_x: list of preprocessed images.
    train_y: list of labels.
  """
  train_x = []
  train_y = []
  for path in tqdm(real_paths):
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels=3)
    img = processor(images=img, return_tensors="pt")

    train_x.append(img["pixel_values"][0])
    train_y.append(0)

  for path in tqdm(fake_paths):
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels=3)
    img = processor(images=img, return_tensors="pt")
    train_x.append(img["pixel_values"][0])
    train_y.append(1)

  train_x = np.array(train_x)
  train_y = np.array(train_y)
  return train_x, train_y



## SRGAN

In [None]:
train_x, train_y = create_x_y(srgan_real_train, srgan_faceshift_train)
val_x, val_y = create_x_y(srgan_real_val, srgan_faceshift_val)
test_x, test_y = create_x_y(srgan_real_test, srgan_faceshift_test)

# load ViT model from huggingface
vit = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=1, ignore_mismatched_sizes=True)
# freeze the ViT layer but while making the final classification layer trainable
# this is crucial as fine-tuning the entire model would take very long
# instead we use transfer learning to classify based of the features extracted by the ViT
vit.trainable = True
vit.layers[0].trainable = False
vit.layers[-1].kernel_regularizer = tf.keras.regularizers.l1_l2(0.01, 0.01)
vit.layers[-1].activation = tf.keras.activations.sigmoid
vit.summary()
# compile, train and evaluate the model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
vit.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
vit.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=30)
vit.evaluate(test_x, test_y)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_vi_t_for_image_classification_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 85799425 (327.30 MB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 85798656 (327.30 MB)
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.12604999542236328, 0.9678714871406555]

In [None]:
train_x, train_y = create_x_y(srgan_real_train, srgan_baseline_train)
val_x, val_y = create_x_y(srgan_real_val, srgan_baseline_val)
test_x, test_y = create_x_y(srgan_real_test, srgan_baseline_test)
# reload model to reset weights
vit = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=1, ignore_mismatched_sizes=True)
vit.trainable = True
vit.layers[0].trainable = False
vit.layers[-1].kernel_regularizer = tf.keras.regularizers.l1_l2(0.01, 0.01)
vit.layers[-1].activation = tf.keras.activations.sigmoid
vit.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

vit.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
vit.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=30)
vit.evaluate(test_x, test_y)


  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/249 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_vi_t_for_image_classification_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 85799425 (327.30 MB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 85798656 (327.30 MB)
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.750725269317627, 0.6432865858078003]

In [None]:
train_x, train_y = create_x_y(srgan_real_train, srgan_adversarial_train)
val_x, val_y = create_x_y(srgan_real_val, srgan_adversarial_val)
test_x, test_y = create_x_y(srgan_real_test, srgan_adversarial_test)

vit = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=1, ignore_mismatched_sizes=True)
vit.trainable = True
vit.layers[0].trainable = False
vit.layers[-1].kernel_regularizer = tf.keras.regularizers.l1_l2(0.01, 0.01)
vit.layers[-1].activation = tf.keras.activations.sigmoid
vit.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

vit.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
vit.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=30)
vit.evaluate(test_x, test_y)


  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/249 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_vi_t_for_image_classification_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 85799425 (327.30 MB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 85798656 (327.30 MB)
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.10832899063825607, 0.9639278650283813]

## SRCNN

In [None]:
train_x, train_y = create_x_y(srcnn_real_train, srcnn_adversarial_train)
val_x, val_y = create_x_y(srcnn_real_val, srcnn_adversarial_val)
test_x, test_y = create_x_y(srcnn_real_test, srcnn_adversarial_test)

vit = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=1, ignore_mismatched_sizes=True)
vit.trainable = True
vit.layers[0].trainable = False
vit.layers[-1].kernel_regularizer = tf.keras.regularizers.l1_l2(0.01, 0.01)
vit.layers[-1].activation = tf.keras.activations.sigmoid
vit.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

vit.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
vit.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=30)
vit.evaluate(test_x, test_y)

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/249 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_vi_t_for_image_classification_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 85799425 (327.30 MB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 85798656 (327.30 MB)
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.11582652479410172, 0.959919810295105]

In [None]:
train_x, train_y = create_x_y(srcnn_real_train, srcnn_baseline_train)
val_x, val_y = create_x_y(srcnn_real_val, srcnn_baseline_val)
test_x, test_y = create_x_y(srcnn_real_test, srcnn_baseline_test)

vit = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=1, ignore_mismatched_sizes=True)
vit.trainable = True
vit.layers[0].trainable = False
vit.layers[-1].kernel_regularizer = tf.keras.regularizers.l1_l2(0.01, 0.01)
vit.layers[-1].activation = tf.keras.activations.sigmoid
vit.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

vit.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
vit.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=30)
vit.evaluate(test_x, test_y)

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/249 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_vi_t_for_image_classification_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 85799425 (327.30 MB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 85798656 (327.30 MB)
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.754755973815918, 0.6352705359458923]

In [None]:
train_x, train_y = create_x_y(srcnn_real_train, srcnn_faceshift_train)
val_x, val_y = create_x_y(srcnn_real_val, srcnn_faceshift_val)
test_x, test_y = create_x_y(srcnn_real_test, srcnn_faceshift_test)

vit = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=1, ignore_mismatched_sizes=True)
vit.trainable = True
vit.layers[0].trainable = False
vit.layers[-1].kernel_regularizer = tf.keras.regularizers.l1_l2(0.01, 0.01)
vit.layers[-1].activation = tf.keras.activations.sigmoid
vit.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

vit.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
vit.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=30)
vit.evaluate(test_x, test_y)

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/249 [00:00<?, ?it/s]

  0%|          | 0/249 [00:00<?, ?it/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_vi_t_for_image_classification_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 85799425 (327.30 MB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 85798656 (327.30 MB)
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.1880931556224823, 0.9437751173973083]

## Bicubic Interpolation

In [None]:
train_x, train_y = create_x_y(bicubic_real_train, bicubic_faceshift_train)
val_x, val_y = create_x_y(bicubic_real_val, bicubic_faceshift_val)
test_x, test_y = create_x_y(bicubic_real_test, bicubic_faceshift_test)

vit = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=1, ignore_mismatched_sizes=True)
vit.trainable = True
vit.layers[0].trainable = False
vit.layers[-1].kernel_regularizer = tf.keras.regularizers.l1_l2(0.01, 0.01)
vit.layers[-1].activation = tf.keras.activations.sigmoid
vit.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

vit.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
vit.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=30)
vit.evaluate(test_x, test_y)

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/249 [00:00<?, ?it/s]

  0%|          | 0/249 [00:00<?, ?it/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_vi_t_for_image_classification_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 85799425 (327.30 MB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 85798656 (327.30 MB)
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.07240816950798035, 0.9819276928901672]

In [None]:
train_x, train_y = create_x_y(bicubic_real_train, bicubic_baseline_train)
val_x, val_y = create_x_y(bicubic_real_val, bicubic_baseline_val)
test_x, test_y = create_x_y(bicubic_real_test, bicubic_baseline_test)

vit = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=1, ignore_mismatched_sizes=True)
vit.trainable = True
vit.layers[0].trainable = False
vit.layers[-1].kernel_regularizer = tf.keras.regularizers.l1_l2(0.01, 0.01)
vit.layers[-1].activation = tf.keras.activations.sigmoid
vit.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

vit.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
vit.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=30)
vit.evaluate(test_x, test_y)

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/249 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_vi_t_for_image_classification_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 85799425 (327.30 MB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 85798656 (327.30 MB)
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.7335337996482849, 0.6452905535697937]

In [None]:
train_x, train_y = create_x_y(bicubic_real_train, bicubic_adversarial_train)
val_x, val_y = create_x_y(bicubic_real_val, bicubic_adversarial_val)
test_x, test_y = create_x_y(bicubic_real_test, bicubic_adversarial_test)

vit = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=1, ignore_mismatched_sizes=True)
vit.trainable = True
vit.layers[0].trainable = False
vit.layers[-1].kernel_regularizer = tf.keras.regularizers.l1_l2(0.01, 0.01)
vit.layers[-1].activation = tf.keras.activations.sigmoid
vit.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

vit.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
vit.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=30)
vit.evaluate(test_x, test_y)

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/249 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_vi_t_for_image_classification_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 85799425 (327.30 MB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 85798656 (327.30 MB)
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.11155572533607483, 0.9619238376617432]

## SR3

In [None]:
train_x, train_y = create_x_y(sr3_real_train, sr3_faceshift_train)
test_x, test_y = create_x_y(sr3_real_test, sr3_faceshift_test)

vit = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=1, ignore_mismatched_sizes=True)
vit.trainable = True
vit.layers[0].trainable = False
vit.layers[-1].kernel_regularizer = tf.keras.regularizers.l1_l2(0.01, 0.01)
vit.layers[-1].activation = tf.keras.activations.sigmoid
vit.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

vit.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
vit.fit(train_x, train_y, epochs=30)
vit.evaluate(test_x, test_y)

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_vi_t_for_image_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 85799425 (327.30 MB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 85798656 (327.30 MB)
_________________________________________________________________
Epoch 1/30


Cause: for/else statement not yet supported


Cause: for/else statement not yet supported
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.3436296582221985, 0.8399999737739563]

In [None]:
train_x, train_y = create_x_y(sr3_real_train, sr3_baseline_train)
test_x, test_y = create_x_y(sr3_real_test, sr3_baseline_test)

vit = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=1, ignore_mismatched_sizes=True)
vit.trainable = True
vit.layers[0].trainable = False
vit.layers[-1].kernel_regularizer = tf.keras.regularizers.l1_l2(0.01, 0.01)
vit.layers[-1].activation = tf.keras.activations.sigmoid
vit.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

vit.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
vit.fit(train_x, train_y, epochs=30)
vit.evaluate(test_x, test_y)

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_vi_t_for_image_classification_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 85799425 (327.30 MB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 85798656 (327.30 MB)
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.5998238921165466, 0.7599999904632568]

In [None]:
train_x, train_y = create_x_y(sr3_real_train, sr3_adversarial_train)
test_x, test_y = create_x_y(sr3_real_test, sr3_adversarial_test)

vit = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=1, ignore_mismatched_sizes=True)
vit.trainable = True
vit.layers[0].trainable = False
vit.layers[-1].kernel_regularizer = tf.keras.regularizers.l1_l2(0.01, 0.01)
vit.layers[-1].activation = tf.keras.activations.sigmoid
vit.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

vit.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
vit.fit(train_x, train_y, epochs=30)
vit.evaluate(test_x, test_y)

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_vi_t_for_image_classification_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 85799425 (327.30 MB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 85798656 (327.30 MB)
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.3724135458469391, 0.8799999952316284]