In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential

from tensorflow.keras.regularizers import l2


In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/


In [None]:
!kaggle competitions download -c vlg-recruitment-24-challenge

Downloading vlg-recruitment-24-challenge.zip to /content
100% 4.18G/4.18G [03:27<00:00, 9.52MB/s]
100% 4.18G/4.18G [03:27<00:00, 21.6MB/s]


In [None]:
import zipfile
zip_ref=zipfile.ZipFile('/content/vlg-recruitment-24-challenge.zip','r')
zip_ref.extractall('/content')
zip_ref.close()

In [None]:
train_dataset = tf.keras.utils.image_dataset_from_directory(
    '/content/vlg-dataset/vlg-dataset/train',
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=(224, 224),
    batch_size=32,
)

val_dataset = tf.keras.utils.image_dataset_from_directory(
    "/content/vlg-dataset/vlg-dataset/train",
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=(224, 224),
    batch_size=32,
)




Found 9544 files belonging to 40 classes.
Using 7636 files for training.
Found 9544 files belonging to 40 classes.
Using 1908 files for validation.


In [None]:
normalization_layer = layers.Rescaling(1./255)  # Normalize pixel values to [0,1]


In [None]:
# Get the number of classes from the train dataset
num_classes = len(train_dataset.class_names)

# Create the ResNet50 base model without the top layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model layers
base_model.trainable = True


for layer in base_model.layers[:100]:
    layer.trainable = False

# Create the classification head
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(1024, activation='relu', kernel_regularizer=l2(0.001)),  # L2 regularization
    layers.Dropout(0.5),  # Dropout to reduce overfitting
    layers.Dense(num_classes, activation='softmax', kernel_regularizer=l2(0.001))
])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau

# Define a learning rate schedule
def scheduler(epoch, lr):
    if epoch < 10:
        return float(lr)  # Ensure returning as float
    else:
        return float(lr * tf.math.exp(-0.1))  # Explicitly cast to float

lr_scheduler = LearningRateScheduler(scheduler)

# Add ReduceLROnPlateau
lr_reducer = ReduceLROnPlateau(
    monitor='val_loss',        # Watches validation loss
    factor=0.5,                # Reduces learning rate by half
    patience=3,                # Wait for 3 epochs without improvement
    min_lr=1e-6,               # Minimum learning rate
    verbose=1                  # Prints updates
)

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=1e-5),  # Initial learning rate
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Include callbacks in training
callbacks = [lr_scheduler, lr_reducer]

# Model training
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=15,
    callbacks=callbacks  # Add the updated callbacks list here
)


Epoch 1/15
[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 578ms/step - accuracy: 0.0641 - loss: 5.3562 - val_accuracy: 0.5026 - val_loss: 3.7223 - learning_rate: 1.0000e-05
Epoch 2/15
[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 380ms/step - accuracy: 0.4108 - loss: 3.7399 - val_accuracy: 0.6840 - val_loss: 2.7102 - learning_rate: 1.0000e-05
Epoch 3/15
[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 384ms/step - accuracy: 0.6449 - loss: 2.8056 - val_accuracy: 0.7542 - val_loss: 2.3661 - learning_rate: 1.0000e-05
Epoch 4/15
[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 377ms/step - accuracy: 0.7497 - loss: 2.3671 - val_accuracy: 0.7914 - val_loss: 2.1866 - learning_rate: 1.0000e-05
Epoch 5/15
[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 381ms/step - accuracy: 0.8047 - loss: 2.1207 - val_accuracy: 0.8166 - val_loss: 2.0723 - learning_rate: 1.0000e-05
Epoch 6/15
[1m239/239[0m [3

In [None]:
def classify_images(image_path):
    input_image = tf.keras.utils.load_img(image_path, target_size=(224, 224))
    input_image_array = tf.keras.utils.img_to_array(input_image)
    input_image_exp_dim = tf.expand_dims(input_image_array, 0)  # Batch dimension for model input

    predictions = model.predict(input_image_exp_dim)
    result = tf.nn.softmax(predictions[0])  # Convert logits to probabilities
    outcome = data_cat[np.argmax(result)]  # Predicted class label
    confidence = result[np.argmax(result)].numpy()  # Confidence score
    return outcome, confidence

import csv
import os

# Directory containing the images
image_dir = "/content/vlg-dataset/vlg-dataset/test/"

# Path to save the CSV file
csv_file = "predict_with_confidence.csv"

# Create the CSV file
with open(csv_file, mode="w", newline="") as file:
    writer = csv.writer(file)
    # Write the header
    writer.writerow(["image_id", "class", "confidence"])

    # Process each image in the directory
    for idx, image_name in enumerate(os.listdir(image_dir), start=1):
        if image_name.lower().endswith((".png", ".jpg", ".jpeg", ".bmp")):  # Check for image file extensions
            image_path = os.path.join(image_dir, image_name)
            class_label, confidence_score = classify_images(image_path)
            # Write the image ID, class, and confidence score to the CSV
            writer.writerow([image_name, class_label, confidence_score])

print(f"CSV file '{csv_file}' created successfully.")

In [None]:
import os
import csv
import shutil  # For moving files

def classify_images(image_path):
    input_image = tf.keras.utils.load_img(image_path, target_size=(224, 224))
    input_image_array = tf.keras.utils.img_to_array(input_image)
    input_image_exp_dim = tf.expand_dims(input_image_array, 0)  # Batch dimension for model input

    predictions = model.predict(input_image_exp_dim)
    result = tf.nn.softmax(predictions[0])  # Convert logits to probabilities
    outcome = data_cat[np.argmax(result)]  # Predicted class label
    confidence = result[np.argmax(result)].numpy()  # Confidence score
    return outcome, confidence

# Directory containing the images
image_dir = "/content/vlg-dataset/vlg-dataset/test/"

# Directory to save low-confidence images
new_folder = "/content/vlg-dataset/vlg-dataset/test_new/"
os.makedirs(new_folder, exist_ok=True)

# Path to save the CSV file
csv_file = "predict_above_threshold.csv"

# Create the CSV file
with open(csv_file, mode="w", newline="") as file:
    writer = csv.writer(file)
    # Write the header
    writer.writerow(["image_id", "class", "confidence"])

    # Process each image in the directory
    for image_name in os.listdir(image_dir):
        if image_name.lower().endswith((".png", ".jpg", ".jpeg", ".bmp")):  # Check for image file extensions
            image_path = os.path.join(image_dir, image_name)
            class_label, confidence_score = classify_images(image_path)

            if confidence_score < 0.04:
                # Move low-confidence images to the new folder
                shutil.move(image_path, os.path.join(new_folder, image_name))
            else:
                # Log images with confidence >= 0.04 in the CSV
                writer.writerow([image_name, class_label, confidence_score])

print(f"CSV file '{csv_file}' created for images with confidence >= 0.04.")
print(f"Low-confidence images moved to '{new_folder}'.")

In [None]:
data_cat=val_dataset.class_names

In [None]:
image = '/content/vlg-dataset/vlg-dataset/test/00010.jpg'
image=tf.keras.utils.load_img(image,target_size=(224,224))
img_bat=tf.keras.utils.img_to_array(image)
img_bat=tf.expand_dims(img_bat,0)
predict = model.predict(img_bat)
score = tf.nn.softmax(predict)
print('Animal in image is {} with accuracy of {:0.2f}'.format(data_cat[np.argmax(score)],np.max(score)*100))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Animal in image is lion with accuracy of 6.34


In [None]:
import os
import shutil  # For moving files

def classify_images(image_path):
    input_image = tf.keras.utils.load_img(image_path, target_size=(224, 224))
    input_image_array = tf.keras.utils.img_to_array(input_image)
    input_image_exp_dim = tf.expand_dims(input_image_array, 0)  # Batch dimension for model input

    predictions = model.predict(input_image_exp_dim)
    result = tf.nn.softmax(predictions[0])  # Convert logits to probabilities
    outcome = data_cat[np.argmax(result)]  # Predicted class label
    confidence = result[np.argmax(result)].numpy()  # Confidence score
    return confidence

# Directory containing the images
image_dir = "/content/vlg-dataset/vlg-dataset/test/"

# Directory to save low-confidence images
new_folder = "/content/vlg-dataset/vlg-dataset/TESTXYZ/"
os.makedirs(new_folder, exist_ok=True)

# Process each image in the directory
for image_name in os.listdir(image_dir):
    if image_name.lower().endswith((".png", ".jpg", ".jpeg", ".bmp")):  # Check for image file extensions
        image_path = os.path.join(image_dir, image_name)
        confidence_score = classify_images(image_path)

        # Move low-confidence images to the new folder
        if confidence_score < 0.04:
            shutil.move(image_path, os.path.join(new_folder, image_name))

print(f"Low-confidence images moved to '{new_folder}'.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26

In [None]:
import numpy as np
import os
from glob import glob
from PIL import Image
import torch
from torch.utils import data
import cv2

class AnimalDataset(data.dataset.Dataset):
  def __init__(self, classes_file, transform):
    predicate_binary_mat = np.array(np.genfromtxt('data/predicate-matrix-binary.txt', dtype='int'))
    self.predicate_binary_mat = predicate_binary_mat
    self.transform = transform

    class_to_index = dict()
    # Build dictionary of indices to classes
    with open('data/classes.txt') as f:
      index = 0
      for line in f:
        class_name = line.split('\t')[0].strip()
        class_to_index[class_name] = index
        index += 1
    self.class_to_index = class_to_index

    img_names = []
    img_index = []
    with open('data/{}'.format(classes_file)) as f:
      for line in f:
        class_name = line.strip()
        FOLDER_DIR = os.path.join('data/JPEGImages', class_name)
        file_descriptor = os.path.join(FOLDER_DIR, '*.jpg')
        files = glob(file_descriptor)

        class_index = class_to_index[class_name]
        for file_name in files:
          img_names.append(file_name)
          img_index.append(class_index)
    self.img_names = img_names
    self.img_index = img_index

  def __getitem__(self, index):
    im = Image.open(self.img_names[index])
    if im.getbands()[0] == 'L':
      im = im.convert('RGB')
    if self.transform:
      im = self.transform(im)
    if im.shape != (3,224,224):
      print(self.img_names[index])

    im_index = self.img_index[index]
    im_predicate = self.predicate_binary_mat[im_index,:]
    return im, im_predicate, self.img_names[index], im_index

  def __len__(self):
    return len(self.img_names)



In [None]:
import shutil

# Path to the folder you want to compress
folder_path = '/content/vlg-dataset/vlg-dataset/TESTXYZ'

# Path where you want to save the zip file
zip_file_path = '/content/folder_name.zip'

# Compress the folder into a zip file
shutil.make_archive(zip_file_path.replace('.zip', ''), 'zip', folder_path)

print(f"Folder compressed to: {zip_file_path}")


In [None]:
import argparse
import numpy as np
import torch
import torch.nn as nn
from torch.utils import data
import torchvision
import torchvision.transforms as transforms
import os
import sys

def build_model(num_labels, is_pretrained, is_parallel):
  model = torchvision.models.resnet50(pretrained=is_pretrained).to(device)
  if is_pretrained:
    for i, param in model.named_parameters():
      param.requires_grad = False
  if is_parallel:
    print('Using DataParallel:')
    model = nn.DataParallel(model)
    model_features = model.module.fc.in_features
    model.module.fc = nn.Sequential(nn.BatchNorm1d(model_features), nn.ReLU(), nn.Dropout(0.25), nn.Linear(model_features, num_labels))
  else:
    print('Not using DataParallel:')
    model_features = model.fc.in_features
    model.fc = nn.Sequential(nn.BatchNorm1d(model_features), nn.ReLU(), nn.Dropout(0.25), nn.Linear(model_features, num_labels))
  return model

def train(num_epochs, eval_interval, learning_rate, output_filename, model_name, optimizer_name, batch_size):
  train_params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 3}
  test_params = {'batch_size': 1, 'shuffle': True, 'num_workers': 3}
  train_process_steps = transforms.Compose([
    transforms.RandomRotation(15),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.3, contrast=0.3),
    transforms.Resize((224,224)), # ImageNet standard
    transforms.ToTensor()
  ])
  test_process_steps = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor()
  ])
  train_dataset = AnimalDataset('trainclasses.txt', train_process_steps)
  test_dataset = AnimalDataset('testclasses.txt', test_process_steps)
  train_loader = data.DataLoader(train_dataset, **train_params)
  test_loader = data.DataLoader(test_dataset, **test_params)
  criterion = nn.BCELoss() #nn.BCELoss()

  total_steps = len(train_loader)
  if torch.cuda.device_count() > 1:
    model = build_model(num_labels, False, True).to(device)
  else:
    model = build_model(num_labels, False, False).to(device)
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  for epoch in range(num_epochs):
    for i, (images, features, img_names, indexes) in enumerate(train_loader):
      # Batchnorm1D can't handle batch size of 1
      if images.shape[0] < 2:
        break
      images = images.to(device)
      features = features.to(device).float()
      # Toggle training flag
      model.train()

      outputs = model(images)
      sigmoid_outputs = torch.sigmoid(outputs)
      loss = criterion(sigmoid_outputs, features)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if i % 50 == 0:
        curr_iter = epoch * len(train_loader) + i
        print ('Epoch [{}/{}], Step [{}/{}], Batch Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_steps, loss.item()))
        sys.stdout.flush()

    # Do some evaluations
    if (epoch + 1) % eval_interval == 0:
      print('Evaluating:')
      curr_acc = evaluate(model, test_loader)
      print('Epoch [{}/{}] Approx. training accuracy: {}'.format(epoch+1, num_epochs, curr_acc))

  # Make final predictions
  print('Making predictions:')
  if not os.path.exists('models'):
    os.mkdir('models')
  torch.save(model.state_dict(), 'models/{}'.format(model_name))
  torch.save(optimizer.state_dict(), 'models/{}'.format(optimizer_name))
  make_predictions(model, test_loader, output_filename)

def get_hamming_dist(curr_labels, class_labels):
  return np.sum(curr_labels != class_labels)

def get_cosine_dist(curr_labels, class_labels):
  return np.sum(curr_labels * class_labels) / np.sqrt(np.sum(curr_labels)) / np.sqrt(np.sum(class_labels))

def get_euclidean_dist(curr_labels, class_labels):
  return np.sqrt(np.sum((curr_labels - class_labels)**2))

def labels_to_class(pred_labels):
  predictions = []
  for i in range(pred_labels.shape[0]):
    curr_labels = pred_labels[i,:].cpu().detach().numpy()
    best_dist = sys.maxsize
    best_index = -1
    for j in range(predicate_binary_mat.shape[0]):
      class_labels = predicate_binary_mat[j,:]
      dist = get_euclidean_dist(curr_labels, class_labels)
      if dist < best_dist and classes[j] not in train_classes:
        best_index = j
        best_dist = dist
    predictions.append(classes[best_index])
  return predictions

def evaluate(model, dataloader):
  # Toggle flag
  model.eval()
  mean_acc = 0.0

  pred_classes = []
  truth_classes = []
  with torch.no_grad():
    for i, (images, features, img_names, indexes) in enumerate(dataloader):
      images = images.to(device)
      features = features.to(device).float()
      outputs = model(images)
      sigmoid_outputs = torch.sigmoid(outputs)
      pred_labels = sigmoid_outputs #> 0.5
      curr_pred_classes = labels_to_class(pred_labels)
      pred_classes.extend(curr_pred_classes)

      curr_truth_classes = []
      for index in indexes:
        curr_truth_classes.append(classes[index])
      truth_classes.extend(curr_truth_classes)

  pred_classes = np.array(pred_classes)
  truth_classes = np.array(truth_classes)
  mean_acc = np.mean(pred_classes == truth_classes)

  # Reset
  model.train()
  return mean_acc

def make_predictions(model, dataloader, output_filename):
  # Toggle flag
  model.eval()

  pred_classes = []
  output_img_names = []
  with torch.no_grad():
    for i, (images, features, img_names, indexes) in enumerate(dataloader):
      images = images.to(device)
      features = features.to(device).float()
      outputs = model(images)
      sigmoid_outputs = torch.sigmoid(outputs)
      pred_labels = sigmoid_outputs #> 0.5
      curr_pred_classes = labels_to_class(pred_labels)
      pred_classes.extend(curr_pred_classes)
      output_img_names.extend(img_names)

      if i % 1000 == 0:
        print('Prediction iter: {}'.format(i))

    with open(output_filename, 'w') as f:
      for i in range(len(pred_classes)):
        output_name = output_img_names[i].replace('data/JPEGImages/', '')
        f.write(output_name + ' ' + pred_classes[i] + '\n')

def load_model(model_file):
  is_parallel = True # torch.cuda.device_count() > 1
  model = build_model(num_labels, False, is_parallel).to(device)
  if is_parallel:
    model = torch.nn.DataParallel(model)
    dict = torch.load(model_file)
    model = model.module
    model.load_state_dict(dict)
  else:
    state_dict = torch.load(model_file)
    model.load_state_dict(state_dict)
  return model

def debug(model_file, mode):
  model = load_model(model_file)
  test_params = {'batch_size': 1, 'shuffle': True, 'num_workers': 3}
  process_steps = transforms.Compose([
    #transforms.RandomRotation(15),
    #transforms.RandomHorizontalFlip(),
    #transforms.ColorJitter(brightness=0.3),
    transforms.Resize((224,224)), # ImageNet standard
    transforms.ToTensor()
  ])
  test_dataset = AnimalDataset('testclasses.txt', process_steps)
  test_loader = data.DataLoader(test_dataset, **test_params)
  if mode == 'evaluate':
    print(evaluate(model, test_loader))
  elif mode == 'predict':
    make_predictions(model, test_loader)

# Sample usage: `python train.py -n 25 -et 5 -lr 0.000025 -bs 24`
if __name__ == '__main__':
 args = {
    'num_epochs': 2,
    'eval_interval': 5,
    'learning_rate': 0.00001,
    'model_name': 'model.bin',
    'optimizer_name': 'optimizer.bin',
    'output_file': 'predictions.txt',
    'batch_size': 24,
}


num_epochs = args['num_epochs']
eval_interval = args['eval_interval']
learning_rate = args['learning_rate']
model_name = args['model_name']
optimizer_name = args['optimizer_name']
output_filename = args['output_file']
batch_size = args['batch_size']

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
train_classes = np.array(np.genfromtxt('data/trainclasses.txt', dtype='str'))
classes = np.array(np.genfromtxt('data/classes.txt', dtype='str'))
predicates = np.array(np.genfromtxt('data/predicates.txt', dtype='str'))
predicate_binary_mat = np.array(np.genfromtxt('data/predicate-matrix-binary.txt', dtype='int'))
predicate_continuous_mat = np.array(np.genfromtxt('data/predicate-matrix-continuous.txt', dtype='float'))
num_labels = len(predicates)

train(num_epochs, eval_interval, learning_rate, output_filename, model_name, optimizer_name, batch_size)

  #debug('models/model.bin', 'evaluate')