In [1]:
import sklearn
import pandas as pd
import sys
import os
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
from scipy.spatial import procrustes
from sklearn.manifold import TSNE
from sklearn.manifold import LocallyLinearEmbedding
from sklearn.manifold import MDS
import numpy as np
import warnings
import tensorflow as tf
warnings.filterwarnings('ignore')

plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

In [2]:
from pathlib import Path

IMAGES_PATH = Path() / "images" / "dim_reduction"
IMAGES_PATH.mkdir(parents=True, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = IMAGES_PATH / f"{fig_id}.{fig_extension}"
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [3]:
import cv2
import os

def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename))
        grayscale_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        if img is not None:
            images.append(cv2.resize(grayscale_image,(800,800)))
    return np.array(images)

def load_images_from_folder_rgb(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename))
        if img is not None:
            images.append(cv2.resize(img,(224,224)))
    return np.array(images)

def load_labels_from_folder(folder):
    labels = []
    for filename in os.listdir(folder):
        label = filename.split('_')[0]
        labels.append(label)
    return labels

In [4]:
img_360 = load_images_from_folder("D:\\IU\\Classes\\Fall 23\\AML\\Assignment 4\\360 Rocks\\")
img_360_labels = load_labels_from_folder("D:\\IU\\Classes\\Fall 23\\AML\\Assignment 4\\360 Rocks\\")
img_360_rgb = load_images_from_folder_rgb("D:\\IU\\Classes\\Fall 23\\AML\\Assignment 4\\360 Rocks\\")
img_120 = load_images_from_folder("D:\\IU\\Classes\\Fall 23\\AML\\Assignment 4\\120 Rocks\\")
img_120_rgb = load_images_from_folder_rgb("D:\\IU\\Classes\\Fall 23\\AML\\Assignment 4\\120 Rocks\\")
img_120_labels = load_labels_from_folder("D:\\IU\\Classes\\Fall 23\\AML\\Assignment 4\\120 Rocks\\")

In [5]:
flattened_image = img_360.reshape(360, -1)

In [6]:
flattened_image.shape

(360, 640000)

In [7]:
flattened_image_rgb = img_360_rgb.reshape(360, -1)

In [8]:
flattened_image_rgb.shape

(360, 150528)

In [9]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(img_360_labels)
val_labels = label_encoder.fit_transform(img_120_labels)

In [10]:
with open('mds_360.txt','r') as f:
    arr = f.read().strip().split('\n')

In [11]:
mds_features = []
for i in arr:
    mds_features.append(i.strip().split('  '))

In [12]:
mds_features = np.array(mds_features)
mds_features.shape

(360, 8)

In [13]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, InputLayer, Flatten, Conv2D, MaxPooling2D, BatchNormalization, Dropout
from keras.regularizers import Regularizer
from keras.callbacks import EarlyStopping
import gc

In [14]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [15]:
import os
import datasets

def create_image_folder_dataset(root_path):
    """creates `Dataset` from image folder structure"""

    # get class names by folders names
    _CLASS_NAMES= os.listdir(root_path)
    # defines `datasets` features`
    features=datasets.Features({
                      "img": datasets.Image(),
                      "label": datasets.features.ClassLabel(names=_CLASS_NAMES),
                  })
    # temp list holding datapoints for creation
    img_data_files=[]
    label_data_files=[]
    # load images into list for creation
    for img_class in os.listdir(root_path):
        for img in os.listdir(os.path.join(root_path,img_class)):
            path_=os.path.join(root_path,img_class,img)
            img_data_files.append(path_)
            label_data_files.append(img_class)
    # create dataset
    ds = datasets.Dataset.from_dict({"img":img_data_files,"label":label_data_files},features=features)
    return ds



In [16]:
rocks360_ds = create_image_folder_dataset("D:\\IU\\Classes\\Fall 23\\AML\\Assignment 4\\360 Split\\")

In [17]:
img_class_labels = rocks360_ds.features["label"].names

In [18]:
rocks360_ds

Dataset({
    features: ['img', 'label'],
    num_rows: 360
})

In [19]:
rocks360_labels = rocks360_ds.features["label"].names

In [20]:
rocks360_labels

['I', 'M', 'S']

In [21]:
from transformers import ViTFeatureExtractor
from tensorflow import keras
from tensorflow.keras import layers

model_id = "google/vit-base-patch16-224-in21k"

feature_extractor = ViTFeatureExtractor.from_pretrained(model_id)

# learn more about data augmentation here: https://www.tensorflow.org/tutorials/images/data_augmentation
data_augmentation = keras.Sequential(
    [
        layers.Resizing(feature_extractor.size, feature_extractor.size),
        layers.Rescaling(1./255),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
        layers.RandomZoom(
            height_factor=0.2, width_factor=0.2
        ),
    ],
    name="data_augmentation",
)
# use keras image data augementation processing
def augmentation(examples):
    # print(examples["img"])
    examples["pixel_values"] = [data_augmentation(image) for image in examples["img"]]
    return examples


# basic processing (only resizing)
def process(examples):
    examples.update(feature_extractor(examples['img'], ))
    return examples

# we are also renaming our label col to labels to use `.to_tf_dataset` later
rocks360_ds = rocks360_ds.rename_column("label", "labels")


In [22]:
processed_dataset = rocks360_ds.map(process, batched=True)
processed_dataset

# # augmenting dataset takes a lot of time
# processed_dataset = eurosat_ds.map(augmentation, batched=True)


Map:   0%|          | 0/360 [00:00<?, ? examples/s]

Dataset({
    features: ['img', 'labels', 'pixel_values'],
    num_rows: 360
})

In [23]:
test_size=.3333333333333333333333

processed_dataset = processed_dataset.shuffle().train_test_split(test_size=test_size)

In [24]:
processed_dataset["train"]

Dataset({
    features: ['img', 'labels', 'pixel_values'],
    num_rows: 240
})

In [25]:
from huggingface_hub import HfFolder
import tensorflow as tf

id2label = {str(i): label for i, label in enumerate(img_class_labels)}
label2id = {v: k for k, v in id2label.items()}

num_train_epochs = 20
train_batch_size = 32
eval_batch_size = 32
learning_rate = 3e-5
weight_decay_rate=0.01
num_warmup_steps=0
output_dir=model_id.split("/")[1]
hub_token = HfFolder.get_token() # or your token directly "hf_xxx"
hub_model_id = f'{model_id.split("/")[1]}-rock360'
fp16=True

# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
if fp16:
    tf.keras.mixed_precision.set_global_policy("mixed_float16")

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3070 Laptop GPU, compute capability 8.6


In [26]:
from transformers import DefaultDataCollator

# Data collator that will dynamically pad the inputs received, as well as the labels.
data_collator = DefaultDataCollator(return_tensors="tf")

# converting our train dataset to tf.data.Dataset
tf_train_dataset = processed_dataset["train"].to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=train_batch_size,
   collate_fn=data_collator)

# converting our test dataset to tf.data.Dataset
tf_eval_dataset = processed_dataset["test"].to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=eval_batch_size,
   collate_fn=data_collator)


In [27]:
from transformers import TFViTForImageClassification, create_optimizer
import tensorflow as tf

# create optimizer wight weigh decay
num_train_steps = len(tf_train_dataset) * num_train_epochs
optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=num_warmup_steps,
)

# load pre-trained ViT model
model = TFViTForImageClassification.from_pretrained(
    model_id,
    num_labels=len(img_class_labels),
    id2label=id2label,
    label2id=label2id,
)

# define loss
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# define metrics
metrics=[
    tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
]

# compile model
model.compile(optimizer=optimizer,
              loss=loss,
              metrics=metrics
              )


Some layers from the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing TFViTForImageClassification: ['vit/pooler/dense/bias:0', 'vit/pooler/dense/kernel:0']
- This IS expected if you are initializing TFViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [28]:
# alternatively create Image Classification model using Keras Layer and ViTModel
# here you can also add the processing layers of keras

import tensorflow as tf
from transformers import TFViTModel
from tensorflow.keras.models import Model


base_model = TFViTModel.from_pretrained('google/vit-base-patch16-224-in21k')


# inputs
pixel_values = tf.keras.layers.Input(shape=(3,224,224), name='pixel_values', dtype='float32')

# model layer
vit = base_model.vit(pixel_values)[0]
# dense1 = tf.keras.layers.Dense(128, activation='relu', name='dense1')(vit[:, 0, :])
# dense1 = tf.keras.layers.Dense(64, activation='relu', name='dense1')(vit[:, 0, :])
# dropout = tf.keras.layers.Dropout(0.5, name='dropout')(dense1)
mds = tf.keras.layers.Dense(8, activation='tanh', name='mds')(vit[:, 0, :])
mds_model = tf.keras.Model(inputs=pixel_values, outputs=mds)
classifier = tf.keras.layers.Dense(3, activation='softmax', name='outputs')(mds)

# model
keras_model = tf.keras.Model(inputs=pixel_values, outputs=classifier)


All model checkpoint layers were used when initializing TFViTModel.

All the layers of TFViTModel were initialized from the model checkpoint at google/vit-base-patch16-224-in21k.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTModel for predictions without further training.


In [29]:
import os
from transformers.keras_callbacks import PushToHubCallback
from tensorflow.keras.callbacks import TensorBoard as TensorboardCallback, EarlyStopping

callbacks=[]

callbacks.append(TensorboardCallback(log_dir=os.path.join(output_dir,"logs")))
callbacks.append(EarlyStopping(monitor="val_accuracy",patience=5,restore_best_weights=True,verbose=1))
if hub_token:
    callbacks.append(PushToHubCallback(output_dir=output_dir,
                                     hub_model_id=hub_model_id,
                                     hub_token=hub_token))



Cloning https://huggingface.co/jayashprasad/vit-base-patch16-224-in21k-rock360 into local empty directory.


Download file tf_model.h5:   0%|          | 8.00k/328M [00:00<?, ?B/s]

Download file logs/train/events.out.tfevents.1702244529.ASUS.12924.0.v2:   0%|          | 8.00k/3.12M [00:00<?…

Download file logs/train/events.out.tfevents.1702243059.ASUS.11396.0.v2:   1%|1         | 32.0k/3.12M [00:00<?…

Download file logs/train/events.out.tfevents.1702084309.ASUS.10920.0.v2:   1%|1         | 32.0k/3.11M [00:00<?…

Download file logs/train/events.out.tfevents.1702247943.ASUS.3892.0.v2:   1%|1         | 32.0k/3.12M [00:00<?,…

Download file logs/train/events.out.tfevents.1702249761.ASUS.11444.0.v2:   1%|          | 24.0k/3.12M [00:00<?…

Download file logs/train/events.out.tfevents.1702085955.ASUS.17556.0.v2:   1%|          | 16.0k/3.11M [00:00<?…

Download file logs/train/events.out.tfevents.1702241941.ASUS.22960.0.v2:   1%|1         | 32.0k/3.12M [00:00<?…

Clean file logs/train/events.out.tfevents.1702085955.ASUS.17556.0.v2:   0%|          | 1.00k/3.11M [00:00<?, ?…

Clean file logs/train/events.out.tfevents.1702241941.ASUS.22960.0.v2:   0%|          | 1.00k/3.12M [00:00<?, ?…

Clean file logs/train/events.out.tfevents.1702243059.ASUS.11396.0.v2:   0%|          | 1.00k/3.12M [00:00<?, ?…

Clean file logs/train/events.out.tfevents.1702244529.ASUS.12924.0.v2:   0%|          | 1.00k/3.12M [00:00<?, ?…

Clean file logs/train/events.out.tfevents.1702247943.ASUS.3892.0.v2:   0%|          | 1.00k/3.12M [00:00<?, ?B…

Clean file logs/train/events.out.tfevents.1702249761.ASUS.11444.0.v2:   0%|          | 1.00k/3.12M [00:00<?, ?…

Download file logs/train/events.out.tfevents.1702067930.ASUS.27144.0.v2:   1%|1         | 32.0k/3.11M [00:00<?…

Clean file logs/train/events.out.tfevents.1702084309.ASUS.10920.0.v2:   0%|          | 1.00k/3.11M [00:00<?, ?…

Download file logs/train/events.out.tfevents.1702248998.ASUS.29184.0.v2:   0%|          | 8.00k/3.11M [00:00<?…

Download file logs/train/events.out.tfevents.1702249404.ASUS.26472.0.v2:   1%|          | 18.5k/3.11M [00:00<?…

Download file logs/validation/events.out.tfevents.1702243102.ASUS.11396.1.v2: 100%|##########| 7.82k/7.82k [00…

Download file logs/train/events.out.tfevents.1702083642.ASUS.20804.0.v2:   1%|1         | 32.0k/3.11M [00:00<?…

Clean file logs/train/events.out.tfevents.1702067930.ASUS.27144.0.v2:   0%|          | 1.00k/3.11M [00:00<?, ?…

Clean file logs/train/events.out.tfevents.1702248998.ASUS.29184.0.v2:   0%|          | 1.00k/3.11M [00:00<?, ?…

Download file logs/validation/events.out.tfevents.1702249803.ASUS.11444.1.v2: 100%|##########| 7.82k/7.82k [00…

Download file logs/validation/events.out.tfevents.1702241984.ASUS.22960.1.v2: 100%|##########| 5.32k/5.32k [00…

Clean file logs/validation/events.out.tfevents.1702243102.ASUS.11396.1.v2:  13%|#2        | 1.00k/7.82k [00:00…

Clean file logs/train/events.out.tfevents.1702083642.ASUS.20804.0.v2:   0%|          | 1.00k/3.11M [00:00<?, ?…

Clean file logs/validation/events.out.tfevents.1702249803.ASUS.11444.1.v2:  13%|#2        | 1.00k/7.82k [00:00…

Clean file logs/validation/events.out.tfevents.1702241984.ASUS.22960.1.v2:  19%|#8        | 1.00k/5.32k [00:00…

Download file logs/validation/events.out.tfevents.1702244570.ASUS.12924.1.v2: 100%|##########| 5.32k/5.32k [00…

Clean file logs/train/events.out.tfevents.1702249404.ASUS.26472.0.v2:   0%|          | 1.00k/3.11M [00:00<?, ?…

Download file logs/validation/events.out.tfevents.1702085995.ASUS.17556.1.v2: 100%|##########| 3.45k/3.45k [00…

Download file logs/validation/events.out.tfevents.1702247984.ASUS.3892.1.v2: 100%|##########| 5.32k/5.32k [00:…

Download file logs/validation/events.out.tfevents.1702084350.ASUS.10920.1.v2: 100%|##########| 3.14k/3.14k [00…

Download file logs/validation/events.out.tfevents.1702249446.ASUS.26472.1.v2: 100%|##########| 672/672 [00:00<…

Download file logs/validation/events.out.tfevents.1702067972.ASUS.27144.1.v2: 100%|##########| 2.52k/2.52k [00…

Clean file logs/validation/events.out.tfevents.1702244570.ASUS.12924.1.v2:  19%|#8        | 1.00k/5.32k [00:00…

Clean file logs/validation/events.out.tfevents.1702085995.ASUS.17556.1.v2:  29%|##8       | 1.00k/3.45k [00:00…

Clean file logs/validation/events.out.tfevents.1702247984.ASUS.3892.1.v2:  19%|#8        | 1.00k/5.32k [00:00<…

Clean file logs/validation/events.out.tfevents.1702084350.ASUS.10920.1.v2:  32%|###1      | 1.00k/3.14k [00:00…

Clean file logs/validation/events.out.tfevents.1702249446.ASUS.26472.1.v2: 100%|##########| 672/672 [00:00<?, …

Clean file logs/validation/events.out.tfevents.1702067972.ASUS.27144.1.v2:  40%|###9      | 1.00k/2.52k [00:00…

Download file logs/validation/events.out.tfevents.1702249040.ASUS.29184.1.v2: 100%|##########| 354/354 [00:00<…

Clean file logs/validation/events.out.tfevents.1702249040.ASUS.29184.1.v2: 100%|##########| 354/354 [00:00<?, …

Download file logs/validation/events.out.tfevents.1702083682.ASUS.20804.1.v2: 100%|##########| 354/354 [00:00<…

Clean file logs/validation/events.out.tfevents.1702083682.ASUS.20804.1.v2: 100%|##########| 354/354 [00:00<?, …

Clean file tf_model.h5:   0%|          | 1.00k/328M [00:00<?, ?B/s]

In [30]:
train_results = model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=callbacks,
    epochs=num_train_epochs,
    shuffle=True
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 14: early stopping


In [31]:
from huggingface_hub import HfApi

api = HfApi()

user = api.whoami(hub_token)


feature_extractor.save_pretrained(output_dir)

api.upload_file(
    token=hub_token,
    repo_id=f"{user['name']}/{hub_model_id}",
    path_or_fileobj=os.path.join(output_dir,"preprocessor_config.json"),
    path_in_repo="preprocessor_config.json",
)


'https://huggingface.co/jayashprasad/vit-base-patch16-224-in21k-rock360/blob/main/preprocessor_config.json'

In [32]:
img_360_rgb = tf.transpose(img_360_rgb, perm=[0, 3, 1, 2])
img_120_rgb = tf.transpose(img_120_rgb, perm=[0, 3, 1, 2])

In [33]:
activations_train = mds_model.predict(img_360_rgb)
activations_val = mds_model.predict(img_120_rgb)



In [34]:
activations_train.shape

(360, 8)

In [35]:
with open('mds_360.txt','r') as f:
    arr = f.read().strip().split('\n')

mds_360 = []
for i in arr:
    mds_360.append(i.strip().split('  '))

mds_360 = np.array(mds_360)
mds_360.shape

(360, 8)

In [36]:
with open('mds_120.txt','r') as f:
    arr = f.read().strip().split('\n')

mds_120 = []
for i in arr:
    mds_120.append(i.strip().split(' '))

mds_120 = np.array(mds_120)
mds_120.shape

(120, 8)

In [37]:
mtx1, mtx2, disparity = procrustes(mds_360, activations_train)
print("Disparity from MDS 360: "+str(disparity))

Disparity from MDS 360: 0.8187035941238407


In [38]:
cnn_human_comp = pd.DataFrame()
cnn_human_comp['Dimension'] = range(1, mtx1.shape[1] + 1)

In [39]:
corr_coeff = np.zeros(mtx1.shape[1])
avg_train_coeff = 0
for i in range(mtx1.shape[1]):
    corr_coeff[i] = np.corrcoef(mtx1[:, i], mtx2[:, i])[0, 1]
    avg_train_coeff += corr_coeff[i]
cnn_human_comp["Train"] = corr_coeff
avg_train_coeff = avg_train_coeff/mtx1.shape[1]

In [40]:
mtx1, mtx2, disparity = procrustes(mds_120, activations_val)
print("Disparity from MDS 120: "+str(disparity))

Disparity from MDS 120: 0.803113422312189


In [41]:
corr_coeff = np.zeros(mtx1.shape[1])
avg_test_coeff = 0
for i in range(mtx1.shape[1]):
    corr_coeff[i] = np.corrcoef(mtx1[:, i], mtx2[:, i])[0, 1]
    avg_test_coeff+= corr_coeff[i]
cnn_human_comp["Test"] = corr_coeff
avg_test_coeff = avg_test_coeff/mtx1.shape[1]

In [42]:
cnn_human_comp.set_index('Dimension')

Unnamed: 0_level_0,Train,Test
Dimension,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.635095,0.659669
2,0.496616,0.426088
3,0.218076,0.402673
4,0.33845,0.343896
5,0.195416,0.363707
6,0.500461,0.628203
7,0.353426,0.22317
8,0.421611,0.314117


In [43]:
print(avg_train_coeff)

0.39489388512995816


### The Average Correlation Coefficient for Train Dataset of 360 Images is 0.39489388512995816

In [44]:
print(avg_test_coeff)

0.4201902482186779


### The Average Correlation Coefficient for Test Dataset of 120 Images is 0.4201902482186779