In [1]:
import numpy as np 
import pandas as pd 
import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

In [2]:
import tensorflow as tf
import tensorflow_hub as hub

import requests
from PIL import Image
from io import BytesIO

import matplotlib.pyplot as plt
import numpy as np

In [None]:


original_image_cache = {}

def preprocess_image(image):
  image = np.array(image)
  # reshape into shape [batch_size, height, width, num_channels]
  img_reshaped = tf.reshape(image, [1, image.shape[0], image.shape[1], image.shape[2]])
  # Use `convert_image_dtype` to convert to floats in the [0,1] range.
  image = tf.image.convert_image_dtype(img_reshaped, tf.float32)
  return image

def load_image_from_url(img_url):
  """Returns an image with shape [1, height, width, num_channels]."""
  user_agent = {'User-agent': 'Colab Sample (https://tensorflow.org)'}
  response = requests.get(img_url, headers=user_agent)
  image = Image.open(BytesIO(response.content))
  image = preprocess_image(image)
  return image

def load_image(image_url, image_size=256, dynamic_size=False, max_dynamic_size=512):
  """Loads and preprocesses images."""
  # Cache image file locally.
  if image_url in original_image_cache:
    img = original_image_cache[image_url]
  elif image_url.startswith('https://'):
    img = load_image_from_url(image_url)
  else:
    fd = tf.io.gfile.GFile(image_url, 'rb')
    img = preprocess_image(Image.open(fd))
  original_image_cache[image_url] = img
  # Load and convert to float32 numpy array, add batch dimension, and normalize to range [0, 1].
  img_raw = img
  if tf.reduce_max(img) > 1.0:
    img = img / 255.
  if len(img.shape) == 3:
    img = tf.stack([img, img, img], axis=-1)
  if not dynamic_size:
    img = tf.image.resize_with_pad(img, image_size, image_size)
  elif img.shape[1] > max_dynamic_size or img.shape[2] > max_dynamic_size:
    img = tf.image.resize_with_pad(img, max_dynamic_size, max_dynamic_size)
  return img, img_raw

def show_image(image, title=''):
  image_size = image.shape[1]
  w = (image_size * 6) // 320
  plt.figure(figsize=(w, w))
  plt.imshow(image[0], aspect='equal')
  plt.axis('off')
  plt.title(title)
  plt.show()

image_size = 224
dynamic_size = False

In [None]:
# Select an Image Classification model

model_name = "vit-b8"

model_handle_map = {
  "vit-b8": "/kaggle/input/vision-transformer/tensorflow2/vit-b8-classification/1",
}


model_image_size_map = {
  "vit-b8": 224,
}

model_handle = model_handle_map[model_name]

print(f"Selected model: {model_name} : {model_handle}")

In [None]:
max_dynamic_size = 512
if model_name in model_image_size_map:
  image_size = model_image_size_map[model_name]
  dynamic_size = False
  print(f"Images will be converted to {image_size}x{image_size}")
else:
  dynamic_size = True
  print(f"Images will be capped to a max size of {max_dynamic_size}x{max_dynamic_size}")

labels_file = "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt"

#download labels and creates a maps
downloaded_file = tf.keras.utils.get_file("labels.txt", origin=labels_file)

classes = []

with open(downloaded_file) as f:
  labels = f.readlines()
  classes = [l.strip() for l in labels]


Select an Input Image

Everything is ready for inference. Here you can see the top 5 results from the model for the selected image.

# Using Kaggle Models for Transfer Learning

Select a model

In [None]:
model_name = "vit-b8"

model_handle_map = {
  "vit-b8": "/kaggle/input/vision-transformer/tensorflow2/vit-b8-fe/1",
}

model_image_size_map = {
  "vit-b8": 224,
}

model_handle = model_handle_map.get(model_name)
pixels = model_image_size_map.get(model_name, 224)

print(f"Selected model: {model_name} : {model_handle}")

IMAGE_SIZE = (pixels, pixels)
print(f"Input size {IMAGE_SIZE}")

BATCH_SIZE = 16#@param {type:"integer"}

In [None]:
data_dir = '/kaggle/input/blind-app-ds-300/blind_app_dataset_300'
print(data_dir)

Select a dataset to fine-tune the model against

In [None]:


def build_dataset(subset):
  return tf.keras.preprocessing.image_dataset_from_directory(
      data_dir,
      validation_split=.10,
      subset=subset,
      label_mode="categorical",
      seed=123,
      image_size=IMAGE_SIZE,
      batch_size=1)

train_ds = build_dataset("training")
class_names = tuple(train_ds.class_names)
train_size = train_ds.cardinality().numpy()
train_ds = train_ds.unbatch().batch(BATCH_SIZE)
train_ds = train_ds.repeat()

normalization_layer = tf.keras.layers.Rescaling(1. / 255)
preprocessing_model = tf.keras.Sequential([normalization_layer])
do_data_augmentation = False #@param {type:"boolean"}
if do_data_augmentation:
  preprocessing_model.add(
      tf.keras.layers.RandomRotation(40))
  preprocessing_model.add(
      tf.keras.layers.RandomTranslation(0, 0.2))
  preprocessing_model.add(
      tf.keras.layers.RandomTranslation(0.2, 0))
  # Like the old tf.keras.preprocessing.image.ImageDataGenerator(),
  # image sizes are fixed when reading, and then a random zoom is applied.
  # If all training inputs are larger than image_size, one could also use
  # RandomCrop with a batch size of 1 and rebatch later.
  preprocessing_model.add(
      tf.keras.layers.RandomZoom(0.2, 0.2))
  preprocessing_model.add(
      tf.keras.layers.RandomFlip(mode="horizontal"))
train_ds = train_ds.map(lambda images, labels:
                        (preprocessing_model(images), labels))

val_ds = build_dataset("validation")
valid_size = val_ds.cardinality().numpy()
val_ds = val_ds.unbatch().batch(BATCH_SIZE)
val_ds = val_ds.map(lambda images, labels:
                    (normalization_layer(images), labels))

Found 300 files belonging to 14 classes.
Using 270 files for training.
Found 300 files belonging to 14 classes.
Using 30 files for validation.


Defining the model.

All it takes is to put a linear classifier on top of the `feature_extractor_layer` with the Hub module.

For speed, we start out with a non-trainable `feature_extractor_layer`, but you can also enable fine-tuning for greater accuracy.

In [14]:
do_fine_tuning = False 

print("Building model with", model_handle)
model = tf.keras.Sequential([
    # Explicitly define the input shape so the model can be properly
    # loaded by the TFLiteConverter
    tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE + (3,)),
    hub.KerasLayer(model_handle, trainable=do_fine_tuning),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(len(class_names),
                          kernel_regularizer=tf.keras.regularizers.l2(0.0001))
])
model.build((None,)+IMAGE_SIZE+(3,))
model.summary()

Building model with /kaggle/input/vision-transformer/tensorflow2/vit-b8-fe/1
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 768)               85807872  
_________________________________________________________________
dropout (Dropout)            (None, 768)               0         
_________________________________________________________________
dense (Dense)                (None, 14)                10766     
Total params: 85,818,638
Trainable params: 10,766
Non-trainable params: 85,807,872
_________________________________________________________________


Training the model

In [None]:
model.compile(
  optimizer=tf.keras.optimizers.SGD(learning_rate=0.004, momentum=0.9), 
  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
  metrics=['accuracy'])

steps_per_epoch = train_size // BATCH_SIZE
validation_steps = valid_size // BATCH_SIZE


from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
# Callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0001, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, restore_best_weights=True)

# Fit the model
hist = model.fit(
    train_ds,
    epochs=15,  # Increase epochs since early stopping will terminate early if needed
    steps_per_epoch=steps_per_epoch,
    validation_data=val_ds,
    validation_steps=validation_steps,
    callbacks=[reduce_lr, early_stopping]
).history


In [None]:
# Evaluate the model on the validation dataset
val_loss, val_accuracy = model.evaluate(val_ds, verbose=1)

print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")


In [None]:
# Initialize lists for true and predicted labels
y_true_indices = []
y_pred_indices = []
y_score_list = []

# Loop through the validation dataset
for x_batch, y_batch in val_ds:
    # Loop through each image in the batch
    for i in range(len(x_batch)):
        # Extract the image and its true label
        image = x_batch[i]
        true_index = np.argmax(y_batch[i])  # True label index
        y_true_indices.append(true_index)
        
        # Predict the label
        prediction_scores = model.predict(np.expand_dims(image, axis=0), verbose=0)
        
        y_scores_prob = np.exp(prediction_scores) / np.sum(np.exp(prediction_scores), axis=1, keepdims=True)
        y_score_list.append(y_scores_prob)
        
        predicted_index = np.argmax(prediction_scores)  # Predicted label index
        y_pred_indices.append(predicted_index)

# Print the collected indices
print("y_true_indices:", y_true_indices[:15])  # Show first 15 for brevity
print("y_pred_indices:", y_pred_indices[:15])  # Show first 15 for brevity


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Initialize a dictionary to count the occurrences of each class
class_counts = {class_name: 0 for class_name in class_names}

# Iterate through the validation dataset to count class occurrences
for _, labels in val_ds.unbatch():
    class_indices = np.argmax(labels.numpy(), axis=-1)  # Get class index for each label
    class_name = class_names[class_indices]
    class_counts[class_name] += 1

# Extract class names and counts for plotting
classes = list(class_counts.keys())
counts = list(class_counts.values())

# Plot the frequency distribution
plt.figure(figsize=(10, 6))
plt.bar(classes, counts, color="skyblue", edgecolor="black", alpha=0.7)
plt.xticks(rotation=45, ha="right", fontsize=10)  # Rotate class names for readability
plt.title("Frequency of Each Class in Validation Dataset", fontsize=14, fontweight="bold")
plt.xlabel("Class Name", fontsize=12)
plt.ylabel("Number of Images", fontsize=12)
plt.grid(axis="y", linestyle="--", alpha=0.7)
# for i, count in enumerate(counts):
#     plt.text(i, count + 0.5, str(count), ha='center', fontsize=10, fontweight="bold")
plt.tight_layout()
plt.savefig("class_frequency_plot.png", dpi=300)  # Save the plot for journal submission
plt.show()


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming y_true and y_pred are arrays of shape (1677, 13) with class indices

# Accuracy
accuracy = accuracy_score(y_true_indices, y_pred_indices)
print(f"Accuracy: {accuracy:.6f}")

# Precision, Recall, and F1-Score for multi-class classification (use 'weighted' average to account for class imbalance)
precision = precision_score(y_true_indices, y_pred_indices, average='weighted')
recall = recall_score(y_true_indices, y_pred_indices, average='weighted')
f1 = f1_score(y_true_indices, y_pred_indices, average='weighted')

print(f"Precision: {precision:.6f}")
print(f"Recall: {recall:.6f}")
print(f"F1-Score: {f1:.6f}")


In [None]:
x, y = next(iter(val_ds))
image = x[1, :, :, :]
true_index = np.argmax(y[1])
plt.imshow(image)
plt.axis('off')
plt.show()

# Expand the validation image to (1, 224, 224, 3) before predicting the label
prediction_scores = model.predict(np.expand_dims(image, axis=0))
predicted_index = np.argmax(prediction_scores)
print("True label: " + class_names[true_index])
print("Predicted label: " + class_names[predicted_index])


In [None]:

print(type(val_ds))


In [None]:
import time
true_labels = []
predicted_labels = []
tot_times = []
# Iterate over all batches in val_ds
for x, y in val_ds:
    batch_size = x.shape[0]  # Get batch size
    
    for i in range(batch_size):  # Iterate over each sample in the batch
        st = time.time()
        image = x[i, :, :, :]
        true_index = np.argmax(y[i])  # Get predicted class index

        # Debug: Print values to check if they are valid
        print(f"Sample {len(true_labels)}: true_index={true_index}, class_names length={len(class_names)}")

        # Check if index is valid before accessing class_names
        if true_index >= len(class_names):
            print(f"Warning: true_index {true_index} is out of bounds!")
            continue  # Skip this entry to avoid crashing
        
        # Expand the validation image before predicting
        
        prediction_scores = model.predict(np.expand_dims(image, axis=0))
        
        predicted_index = np.argmax(prediction_scores)
        
        et = time.time()
        # Store the labels
        tot_times.append(et-st)
        true_labels.append(class_names[true_index])
        predicted_labels.append(class_names[predicted_index])
        


In [None]:
print(len(true_labels))
print(len(predicted_labels))

print(true_labels)
print(predicted_labels)

print("ART:", sum(tot_times)/len(tot_times))

In [None]:
def time_to_minutes(time_str):
    # Handle times like 12, 11:30, 10:30, etc.
    if "_" in time_str:  # For times like 12:30, 10:30, etc.
        hour, minute = map(int, time_str.split("_"))
    else:  # For times like 12, 11, 10, etc.
        hour, minute = int(time_str), 0
    # print(hour,minute)
    # If hour is greater than or equal to 1, add 12 hours
    if hour == 1 or hour == 2 or hour == 3:
        hour += 12
    
    return hour * 60 + minute

def calculate_time_difference(gt_val, out_val):
    # Convert both gt_val and out_val to minutes
    gt_time_in_minutes = time_to_minutes(gt_val)
    out_time_in_minutes = time_to_minutes(out_val)

    # print(gt_time_in_minutes, out_time_in_minutes, similar_cnt)
    # Calculate the absolute difference
    return abs(gt_time_in_minutes - out_time_in_minutes)

# print(len(gt_value), len(out_pred))
# out_pred = ['0' if val is None else val for val in out_pred]
# print(out_pred)
# Example for the 5 iterations
size = 30
tot_dig = 0
similar_cnt = 0
# print(true_labels)
# print(predicted_labels)
for i in range(size):
    gt_val = str(true_labels[i]).replace(" ", "")
    out_val = str(predicted_labels[i]).replace(" ", "")
    time_diff = calculate_time_difference(gt_val, out_val)
    if time_diff == 0:
        similar_cnt += 1
    tot_dig += time_diff * 0.5

print("MAE:", tot_dig / size, "degree")
print("Accuracy:", (similar_cnt / size) * 100, "%")

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Example true labels and predicted labels
# Replace these with your actual data
y_true = np.array(y_true_indices)  # Replace with your true labels
y_pred = np.array(y_pred_indices)  # Replace with your predicted labels

# Define the class names (optional)
class_names = [f"Class {i}" for i in range(len(np.unique(y_true)))]

# Compute the confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Visualize the confusion matrix
plt.figure(figsize=(14, 10))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
disp.plot(cmap=plt.cm.Blues, values_format='d', ax=plt.gca())

# Customize the plot
plt.title("Confusion Matrix", fontsize=16, fontweight="bold")
plt.xlabel("Predicted Label", fontsize=14)
plt.ylabel("True Label", fontsize=14)
plt.xticks(rotation=90)  # Rotate x-axis labels vertically
plt.grid(False)  # Disable grid for better visibility
plt.tight_layout()
plt.savefig("confusion_matrix.png")  # Save the plot
plt.show()
