In [None]:
import os, json, math, random
import numpy as np
import tensorflow as tf

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!ls "/content/drive/MyDrive/Colab Notebooks/SIH/data"

buffalo  cattle  class_names.json


In [None]:
print(tf.__version__)

2.19.0


In [None]:
tf.random.set_seed(42)
np.random.seed(42)
random.seed(42)

In [None]:
DATA_DIR = "/content/drive/MyDrive/Colab Notebooks/SIH/data/cattle"

In [None]:
if os.path.isdir(DATA_DIR):
  print(f"Folder found: {DATA_DIR}")
else:
  print(f"Folder not found: {DATA_DIR}")

Folder found: /content/drive/MyDrive/Colab Notebooks/SIH/data/cattle


In [None]:
import os
for cls in sorted(os.listdir(DATA_DIR)):
    cls_path = os.path.join(DATA_DIR, cls)
    if os.path.isdir(cls_path):
        print(cls, len(os.listdir(cls_path)))


Ayrshire cattle 260
Brown Swiss cattle 238
Holstein Friesian cattle 254
Jersey cattle 252
Red Dane cattle 204


In [None]:
IMG_SIZE = (224, 224)  # ResNet50 default
BATCH_SIZE = 32
VAL_SPLIT = 0.20       # 20% for val+test (we'll split this in half later into val and test)
SEED = 42
COLOR_MODE = "rgb"

In [None]:
# Load datasets (train / val_temp)
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",       # Automatically label based on folder
    label_mode="int",        # keep integer labels; we can one-hot later inside the model
    class_names=None,        # infer from subfolder names
    color_mode=COLOR_MODE,
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,     # will resize for you
    shuffle=True,
    seed=SEED,
    validation_split=VAL_SPLIT,
    subset="training",
)


Found 1208 files belonging to 5 classes.
Using 967 files for training.


In [None]:
val_temp_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode="int",
    color_mode=COLOR_MODE,
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    shuffle=True,
    seed=SEED,
    validation_split=VAL_SPLIT,
    subset="validation",
)

Found 1208 files belonging to 5 classes.
Using 241 files for validation.


In [None]:
# Save class names (folder names) for later use
class_names = train_ds.class_names
num_classes = len(class_names)
print("Classes:", class_names)

Classes: ['Ayrshire cattle', 'Brown Swiss cattle', 'Holstein Friesian cattle', 'Jersey cattle', 'Red Dane cattle']


In [None]:
''' so, mapping will be
'Ayrshire cattle' → 0
'Brown Swiss cattle' → 1
'Guernsey cattle' → 2
'Holstein cattle' → 3
'Jersey cattle' → 4
'''

" so, mapping will be\n'Ayrshire cattle' → 0\n'Brown Swiss cattle' → 1\n'Guernsey cattle' → 2\n'Holstein cattle' → 3\n'Jersey cattle' → 4\n"

In [None]:
# Save mapping to file
label_map = {i: name for i, name in enumerate(class_names)}
#print(label_map)
with open("/content/drive/MyDrive/Colab Notebooks/SIH/data/class_names.json", "w") as f:
    json.dump(label_map, f, indent=2)
print("/content/drive/MyDrive/Colab Notebooks/SIH/data/class_names.json")

/content/drive/MyDrive/Colab Notebooks/SIH/data/class_names.json


In [None]:
for images, labels in train_ds.take(1): # take first batch
  print("Labels: ",labels.numpy())
  print("Class names: ", [class_names[i] for i in labels.numpy()])

Labels:  [0 1 1 2 4 3 0 2 2 3 4 4 3 0 0 2 2 4 0 0 3 0 4 2 3 0 4 1 3 4 0 1]
Class names:  ['Ayrshire cattle', 'Brown Swiss cattle', 'Brown Swiss cattle', 'Holstein Friesian cattle', 'Red Dane cattle', 'Jersey cattle', 'Ayrshire cattle', 'Holstein Friesian cattle', 'Holstein Friesian cattle', 'Jersey cattle', 'Red Dane cattle', 'Red Dane cattle', 'Jersey cattle', 'Ayrshire cattle', 'Ayrshire cattle', 'Holstein Friesian cattle', 'Holstein Friesian cattle', 'Red Dane cattle', 'Ayrshire cattle', 'Ayrshire cattle', 'Jersey cattle', 'Ayrshire cattle', 'Red Dane cattle', 'Holstein Friesian cattle', 'Jersey cattle', 'Ayrshire cattle', 'Red Dane cattle', 'Brown Swiss cattle', 'Jersey cattle', 'Red Dane cattle', 'Ayrshire cattle', 'Brown Swiss cattle']


In [None]:
# revesre mapping
class_to_idx = {name: i for i, name in enumerate(class_names)}
print(class_to_idx)


{'Ayrshire cattle': 0, 'Brown Swiss cattle': 1, 'Holstein Friesian cattle': 2, 'Jersey cattle': 3, 'Red Dane cattle': 4}


In [None]:
# Split val_temp into real val and test (50/50)

In [None]:
# In TensorFlow, a tf.data.Dataset doesn’t store everything in memory — it’s a pipeline of operations (like “read images → resize → batch → shuffle”)
# To know how many batches are inside the dataset, you use: tf.data.experimental.cardinality(dataset)
# This returns a special TensorFlow object (tf.Tensor) representing the count of batches
val_temp_batches = tf.data.experimental.cardinality(val_temp_ds).numpy()
print(val_temp_batches)

8


In [None]:
test_ds = val_temp_ds.take(val_temp_batches // 2)
val_ds  = val_temp_ds.skip(val_temp_batches // 2)

In [None]:
print("Batches -> train:", tf.data.experimental.cardinality(train_ds).numpy(),
      "| val:", tf.data.experimental.cardinality(val_ds).numpy(),
      "| test:", tf.data.experimental.cardinality(test_ds).numpy())

Batches -> train: 31 | val: 4 | test: 4


In [None]:
print(32*(31+4+4))
print(260+238+254+252+204) # last batch are not full

1248
1208


Data augmentation (training only)

In [None]:
#    Keep augmentations conservative to avoid distorting breed characteristics.
#    NOTE: These layers work fine before preprocess_input(i.e., before preprocess_input). Augmentation works on normal images (0–255 pixel range)

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(factor=0.05),  # ~±9°
    tf.keras.layers.RandomZoom(height_factor=0.05, width_factor=0.05), # 5% zoom in/out
    tf.keras.layers.RandomTranslation(height_factor=0.05, width_factor=0.05), # shift up to 5%
], name="data_augmentation")

Preprocess for ResNet50

In [None]:
#    Use tf.keras.applications.resnet50.preprocess_input (Caffe-style: BGR, mean subtraction).
#    Do NOT also rescale 1/255.

In [None]:
from tensorflow.keras.applications.resnet50 import preprocess_input

AUTOTUNE = tf.data.AUTOTUNE

In [None]:
def add_augmentation(image, label):
    # Apply only on training
    image = data_augmentation(image, training=True)
    return image, label

In [None]:
def preprocess_for_resnet(image, label):
    image = tf.cast(image, tf.float32)      # convert to float32
    image = preprocess_input(image)  # expects float32 in [0..255]
    return image, label

In [None]:
# Apply augmentation to train only, then preprocess on all sets
train_ds = (train_ds
            .map(add_augmentation, num_parallel_calls=AUTOTUNE)
            .map(preprocess_for_resnet, num_parallel_calls=AUTOTUNE))

In [None]:
val_ds   = val_ds.map(preprocess_for_resnet, num_parallel_calls=AUTOTUNE)
test_ds  = test_ds.map(preprocess_for_resnet, num_parallel_calls=AUTOTUNE)

pipeline optimizations:- Caching, shuffling, prefetching (performance)
Cache to disk to avoid RAM spikes on large datasets in Colab.

In [None]:
train_ds = (train_ds
            .cache("/tmp/cattle_train.cache")
            .shuffle(buffer_size= BATCH_SIZE * 8, seed=SEED, reshuffle_each_iteration=True)
            .prefetch(AUTOTUNE))

val_ds   = val_ds.cache("/tmp/cattle_val.cache").prefetch(AUTOTUNE)
test_ds  = test_ds.cache("/tmp/cattle_test.cache").prefetch(AUTOTUNE)

Quick sanity checks

In [None]:
for images, labels in train_ds.take(1):
    print("Image batch shape:", images.shape, "Label batch shape:", labels.shape)
    print("Label dtype:", labels.dtype)
    print("Min/Max pixel after preprocess_input:", tf.reduce_min(images).numpy(), tf.reduce_max(images).numpy())

Image batch shape: (32, 224, 224, 3) Label batch shape: (32,)
Label dtype: <dtype: 'int32'>
Min/Max pixel after preprocess_input: -123.68 151.061


Class distribution — helpful later for class weight

In [None]:
import numpy as np

# Count labels in training dataset
train_class_counts = np.zeros(len(class_names))
for images, labels in train_ds.unbatch():
    train_class_counts[labels.numpy()] += 1

print("Class counts:", train_class_counts)


Class counts: [210. 195. 203. 203. 156.]


In [None]:
from sklearn.utils.class_weight import compute_class_weight

# Collect all labels into a single 1D numpy array
y_labels = np.array([labels.numpy() for _, labels in train_ds.unbatch()])
#print(y_labels)


class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.arange(len(class_names)),
    y=y_labels
)

class_weights = dict(enumerate(class_weights))
print("Class Weights:", class_weights)


Class Weights: {0: np.float64(0.920952380952381), 1: np.float64(0.9917948717948718), 2: np.float64(0.9527093596059113), 3: np.float64(0.9527093596059113), 4: np.float64(1.2397435897435898)}


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
num_classes = len(class_names)

In [None]:

# 1) Load base ResNet50 (pretrained on ImageNet)
base_model = tf.keras.applications.ResNet50(
    include_top=False,  # remove old classifier head
    weights="imagenet",
    input_shape=(224, 224, 3)
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# 2) Freeze base model
base_model.trainable = False

In [None]:
# 3) Build new model
inputs = tf.keras.Input(shape=(224, 224, 3))
x = data_augmentation(inputs)                # apply augmentation
x = tf.keras.applications.resnet50.preprocess_input(x)  # apply preprocess
x = base_model(x, training=False)            # pass through ResNet50 (frozen)
x = layers.GlobalAveragePooling2D()(x)       # convert feature maps → vector
x = layers.Dropout(0.3)(x)                   # dropout for regularization
outputs = layers.Dense(num_clatest_loss, test_acc = model.evaluate(test_ds)
print(f"Test Accuracy: {test_acc:.2f}")
sses, activation="softmax")(x)  # final classifier
model = tf.keras.Model(inputs, outputs)

In [None]:
model.summary()

In [None]:
# Compile model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=["accuracy"]
)

In [None]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint("best_resnet50.h5", save_best_only=True)
]

In [None]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,     # start small (you can increase later)
    callbacks=callbacks
)

Epoch 1/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step - accuracy: 0.3464 - loss: 1.9121



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 2s/step - accuracy: 0.3474 - loss: 1.9057 - val_accuracy: 0.6018 - val_loss: 1.0429
Epoch 2/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step - accuracy: 0.5188 - loss: 1.1457



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 173ms/step - accuracy: 0.5201 - loss: 1.1436 - val_accuracy: 0.6637 - val_loss: 0.9079
Epoch 3/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step - accuracy: 0.6275 - loss: 0.9531



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 171ms/step - accuracy: 0.6283 - loss: 0.9514 - val_accuracy: 0.6903 - val_loss: 0.7950
Epoch 4/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step - accuracy: 0.6252 - loss: 0.9235



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 173ms/step - accuracy: 0.6262 - loss: 0.9216 - val_accuracy: 0.6903 - val_loss: 0.7857
Epoch 5/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step - accuracy: 0.7260 - loss: 0.7365



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 170ms/step - accuracy: 0.7259 - loss: 0.7369 - val_accuracy: 0.6814 - val_loss: 0.7752
Epoch 6/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step - accuracy: 0.7526 - loss: 0.6482



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 175ms/step - accuracy: 0.7528 - loss: 0.6482 - val_accuracy: 0.7257 - val_loss: 0.7263
Epoch 7/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step - accuracy: 0.7663 - loss: 0.6002



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 180ms/step - accuracy: 0.7657 - loss: 0.6023 - val_accuracy: 0.7434 - val_loss: 0.6961
Epoch 8/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 149ms/step - accuracy: 0.7628 - loss: 0.6155 - val_accuracy: 0.7434 - val_loss: 0.7007
Epoch 9/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step - accuracy: 0.8259 - loss: 0.5254



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 180ms/step - accuracy: 0.8255 - loss: 0.5262 - val_accuracy: 0.7168 - val_loss: 0.6509
Epoch 10/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 147ms/step - accuracy: 0.8042 - loss: 0.5416 - val_accuracy: 0.7257 - val_loss: 0.6787


In [None]:
test_loss, test_acc = model.evaluate(test_ds)
print(f"Test Accuracy: {test_acc:.2f}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 140ms/step - accuracy: 0.7365 - loss: 0.5998
Test Accuracy: 0.73


In [None]:
base_model.trainable = True  # unfreeze
for layer in base_model.layers[:-30]:  # keep first layers frozen
    layer.trainable = False

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),  # smaller LR
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=["accuracy"]
)

In [None]:

history_fine = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=5,
    callbacks=callbacks
)

Epoch 1/5
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185ms/step - accuracy: 0.7224 - loss: 0.7212



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 304ms/step - accuracy: 0.7226 - loss: 0.7201 - val_accuracy: 0.7788 - val_loss: 0.5948
Epoch 2/5
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185ms/step - accuracy: 0.8138 - loss: 0.5577



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 227ms/step - accuracy: 0.8141 - loss: 0.5568 - val_accuracy: 0.8053 - val_loss: 0.5699
Epoch 3/5
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step - accuracy: 0.8532 - loss: 0.4231



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 230ms/step - accuracy: 0.8531 - loss: 0.4231 - val_accuracy: 0.8230 - val_loss: 0.5248
Epoch 4/5
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step - accuracy: 0.8709 - loss: 0.3883



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 229ms/step - accuracy: 0.8711 - loss: 0.3882 - val_accuracy: 0.8407 - val_loss: 0.4934
Epoch 5/5
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step - accuracy: 0.8793 - loss: 0.3794



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 231ms/step - accuracy: 0.8795 - loss: 0.3791 - val_accuracy: 0.8850 - val_loss: 0.4660


In [None]:
test_loss, test_acc = model.evaluate(test_ds)
print(f"Test Accuracy: {test_acc:.2f}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 136ms/step - accuracy: 0.8510 - loss: 0.5006
Test Accuracy: 0.84


prediction

In [None]:
import tensorflow as tf
import numpy as np
import json
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image

In [None]:
with open("/content/drive/MyDrive/Colab Notebooks/SIH/data/class_names.json", "r") as f:
    class_indices = json.load(f)

# Convert string keys → int (just to be safe)
class_indices = {int(k): v for k, v in class_indices.items()}
print(class_indices)

{0: 'Ayrshire cattle', 1: 'Brown Swiss cattle', 2: 'Holstein Friesian cattle', 3: 'Jersey cattle', 4: 'Red Dane cattle'}


In [None]:
IMG_SIZE = (224, 224)

def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=IMG_SIZE)  # load & resize
    img_array = image.img_to_array(img)                   # convert to array
    img_array = np.expand_dims(img_array, axis=0)         # Add batch dimension (1, 224, 224, 3)
    img_array = tf.keras.applications.resnet50.preprocess_input(img_array)
    return img_array


In [None]:
def predict_breed(img_path):
    # Step 1: Preprocess image
    img_array = preprocess_image(img_path)

    # Step 2: Get prediction probabilities
    preds = model.predict(img_array)

    # Step 3: Get class with max probability
    predicted_index = np.argmax(preds, axis=1)[0]
    confidence = np.max(preds)

    # Step 4: Map index to breed name
    breed_name = class_indices[predicted_index]

    return breed_name, confidence


In [None]:
img_path = "/content/Jerseycattle1_c.jpg"   # Example image path
breed, conf = predict_breed(img_path)

print(f"Predicted Breed: {breed} (Confidence: {conf:.2f})")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
Predicted Breed: Jersey cattle (Confidence: 0.77)
