<a href="https://colab.research.google.com/github/bhuiyanmobasshir94/Cow-weight-and-Breed-Prediction/blob/main/notebooks/027_dec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import sys
import os
import PIL
import PIL.Image
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds
import pathlib
from sklearn.model_selection import train_test_split

In [2]:
images_dataset_url = "https://cv-datasets-2021.s3.amazonaws.com/images.tar.gz"
images_data_dir = tf.keras.utils.get_file(origin=images_dataset_url,
                                   fname='images',
                                   untar=True)
images_data_dir = pathlib.Path(images_data_dir)

Downloading data from https://cv-datasets-2021.s3.amazonaws.com/images.tar.gz


In [3]:
yt_images_dataset_url = "https://cv-datasets-2021.s3.amazonaws.com/yt_images.tar.gz"
yt_images_data_dir = tf.keras.utils.get_file(origin=yt_images_dataset_url,
                                   fname='yt_images',
                                   untar=True)
yt_images_data_dir = pathlib.Path(yt_images_data_dir)

Downloading data from https://cv-datasets-2021.s3.amazonaws.com/yt_images.tar.gz


In [4]:
if sys.platform == 'darwin':
  os.system(f"dot_clean {images_data_dir}")
  os.system(f"dot_clean {yt_images_data_dir}")
elif sys.platform.startswith("lin"):
  os.system(f"cd {images_data_dir} && find . -type f -name '._*' -delete")
  os.system(f"cd {yt_images_data_dir} && find . -type f -name '._*' -delete")


In [5]:
image_count = len(list(images_data_dir.glob('*/*.jpg')))
print(image_count)

2056


In [6]:
yt_image_count = len(list(yt_images_data_dir.glob('*/*.jpg')))
print(yt_image_count)

15843


In [7]:
df = pd.read_csv("https://cv-datasets-2021.s3.amazonaws.com/dataset.csv")
df.shape

(513, 14)

In [8]:
df.head(2)

Unnamed: 0,sku,sex,color,breed,feed,age_in_year,teeth,height_in_inch,weight_in_kg,price,size,images_count,yt_images_count,total_images
0,BLF 2340,MALE_BULL,RED,LOCAL,"['JUMBOO', 'LUCERNE', 'NAPIER', 'SILAGE', 'STR...",2.0,2,50.9,270.0,19000.0,MEDIUM,4,0,4
1,BLF 2342,MALE_BULL,NON_RED,LOCAL,"['JUMBOO', 'LUCERNE', 'NAPIER', 'SILAGE', 'STR...",2.0,2,52.0,256.0,18000.0,LARGE,4,31,35


In [9]:
images = list(images_data_dir.glob('*/*.jpg'))
yt_images = list(yt_images_data_dir.glob('*/*.jpg'))

In [10]:
min_height = 0
max_height = 0
min_width = 0
max_width = 0
for i, image in enumerate(images):
  w, h = PIL.Image.open(str(image)).size
  if i == 0:
    min_height = h
    max_height = h
    min_width = w
    max_width = w
  
  if h <= min_height:
    min_height = h
  if h >= max_height:
    max_height = h

  if w <= min_width:
    min_width = w
  if w >= max_width:
    max_width = w

print(f"min_height: {min_height}")
print(f"min_width: {min_width}")
print(f"max_height: {max_height}")
print(f"max_width: {max_width}")

min_height: 450
min_width: 800
max_height: 675
max_width: 1200


In [11]:
min_height = 0
max_height = 0
min_width = 0
max_width = 0
for i, image in enumerate(yt_images):
  w, h = PIL.Image.open(str(image)).size
  if i == 0:
    min_height = h
    max_height = h
    min_width = w
    max_width = w

  if h <= min_height:
    min_height = h
  if h >= max_height:
    max_height = h

  if w <= min_width:
    min_width = w
  if w >= max_width:
    max_width = w

print(f"min_height: {min_height}")
print(f"min_width: {min_width}")
print(f"max_height: {max_height}")
print(f"max_width: {max_width}")

min_height: 720
min_width: 1280
max_height: 720
max_width: 1280


In [12]:
f_df = pd.DataFrame(columns = ['file_path', 'teeth', 'age_in_year', 'breed', 'height_in_inch', 'weight_in_kg'])
for index, row in df.iterrows():
  images = list(images_data_dir.glob(f"{row['sku']}/*.jpg"))
  yt_images = list(yt_images_data_dir.glob(f"{row['sku']}/*.jpg"))

  for image in images:
    f_df = f_df.append({'file_path' : image, 'teeth' : row['teeth'], 'age_in_year' : row['age_in_year'], 'breed': row['breed'], 'height_in_inch': row['height_in_inch'], 'weight_in_kg': row['weight_in_kg']}, 
                ignore_index = True)
    
  for image in yt_images:
    f_df = f_df.append({'file_path' : image, 'teeth' : row['teeth'], 'age_in_year' : row['age_in_year'], 'breed': row['breed'], 'height_in_inch': row['height_in_inch'], 'weight_in_kg': row['weight_in_kg']}, 
                ignore_index = True)


In [13]:
f_df.shape

(17864, 6)

In [14]:
f_df.head(1)

Unnamed: 0,file_path,teeth,age_in_year,breed,height_in_inch,weight_in_kg
0,/root/.keras/datasets/images/BLF 2340/BLF 2340...,2,2.0,LOCAL,50.9,270.0


In [15]:
train_df, valid_test_df = train_test_split(f_df, test_size=0.3)
validation_df, test_df = train_test_split(valid_test_df, test_size=0.3)
print(f"train_df: {train_df.shape}")
print(f"validation_df: {validation_df.shape}")
print(f"test_df: {test_df.shape}")

train_df: (12504, 6)
validation_df: (3752, 6)
test_df: (1608, 6)


In [18]:
# min_height: 450
# min_width: 800

# input: [image, teeth]
# outpur: [age_in_year, breed, height_in_inch, weight_in_kg]

# class CustomDataGen(tf.keras.utils.Sequence):
    
#     def __init__(self, df, X_col, y_col,
#                  batch_size,
#                  input_size=(450, 800, 3), # (input_height, input_width, input_channel)
#                  shuffle=True):
        
#         self.df = df.copy()
#         self.X_col = X_col
#         self.y_col = y_col
#         self.batch_size = batch_size
#         self.input_size = input_size
#         self.shuffle = shuffle
        
#         self.n = len(self.df)
#         # self.n_teeth = df[X_col['teeth']].max()
#         # self.n_breed = df[y_col['breed']].nunique()
    
#     def on_epoch_end(self):
#         if self.shuffle:
#             self.df = self.df.sample(frac=1).reset_index(drop=True)
    
#     def __get_input(self, path, target_size):
#         image = tf.keras.preprocessing.image.load_img(path)
#         image_arr = tf.keras.preprocessing.image.img_to_array(image)

#         # image_arr = image_arr[ymin:ymin+h, xmin:xmin+w]
#         image_arr = tf.image.resize(image_arr,(target_size[0], target_size[1])).numpy()

#         return image_arr/255.
    
#     def __get_output(self, label, num_classes):
#         return tf.keras.utils.to_categorical(label, num_classes=num_classes)
    
#     def __get_data(self, batches):
#         # Generates data containing batch_size samples

#         path_batch = batches[self.X_col['file_path']]       
#         # teeth_batch = batches[self.X_col['teeth']]

#         # breed_batch = batches[self.y_col['breed']]
#         weight_in_kg_batch = batches[self.y_col['weight_in_kg']]
#         height_in_inch_batch = batches[self.y_col['height_in_inch']]
#         age_in_year_batch = batches[self.y_col['age_in_year']]

#         X0 = np.asarray([self.__get_input(x, self.input_size) for x in path_batch])

#         # y0_batch = np.asarray([self.__get_output(y, self.n_teeth) for y in teeth_batch])
#         # y1_batch = np.asarray([self.__get_output(y, self.n_breed) for y in breed_batch])

#         y0 = np.asarray([tf.cast(y, tf.float32) for y in weight_in_kg_batch])
#         y1 = np.asarray([tf.cast(y, tf.float32) for y in height_in_inch_batch])
#         y2 = np.asarray([tf.cast(y, tf.float32) for y in age_in_year_batch])

#         return X0, tuple([y0, y1, y2])
    
#     def __getitem__(self, index):
        
#         batches = self.df[index * self.batch_size:(index + 1) * self.batch_size]
#         X, y = self.__get_data(batches)        
#         return X, y
    
#     def __len__(self):
#         return self.n // self.batch_size

In [19]:
# traingen = CustomDataGen(train_df,
#                          X_col={'file_path':'file_path', 'teeth': 'teeth'},
#                          y_col={'breed': 'breed', 'weight_in_kg': 'weight_in_kg', 'height_in_inch': 'height_in_inch', 'age_in_year': 'age_in_year'},
#                          batch_size=128, input_size=(450, 800, 3))

In [20]:
# testgen = CustomDataGen(test_df,
#                          X_col={'file_path':'file_path', 'teeth': 'teeth'},
#                          y_col={'breed': 'breed', 'weight_in_kg': 'weight_in_kg', 'height_in_inch': 'height_in_inch', 'age_in_year': 'age_in_year'},
#                          batch_size=128, input_size=(450, 800, 3))

In [21]:
# validgen = CustomDataGen(validation_df,
#                          X_col={'file_path':'file_path', 'teeth': 'teeth'},
#                          y_col={'breed': 'breed', 'weight_in_kg': 'weight_in_kg', 'height_in_inch': 'height_in_inch', 'age_in_year': 'age_in_year'},
#                          batch_size=128, input_size=(450, 800, 3))

In [16]:
def __get_input(path, target_size):
  image = tf.keras.preprocessing.image.load_img(path)
  image_arr = tf.keras.preprocessing.image.img_to_array(image)
  image_arr = tf.image.resize(image_arr,(target_size[0], target_size[1])).numpy()
  return image_arr/255.

def data_loader(df, image_size=(450, 800, 3)):
  y0 = tf.cast(df.weight_in_kg, tf.float32)
  print(y0.shape)
  y1 = tf.cast(df.height_in_inch, tf.float32)
  print(y1.shape)
  y2 = tf.cast(df.age_in_year, tf.float32)
  print(y2.shape)
  path_batch = df.file_path
  X0 = tf.cast([__get_input(x, image_size) for x in path_batch], tf.float32)
  print(X0.shape)

  return X0, (y0, y1, y2)

In [17]:
X0, (y0, y1, y2) = data_loader(f_df, (128, 128, 3))

(17864,)
(17864,)
(17864,)
(17864, 128, 128, 3)


In [18]:
input = keras.Input(shape=(128, 128, 3), name="original_img")
x = layers.Conv2D(64, 3, activation="relu")(input)
x = layers.Conv2D(128, 3, activation="relu")(x)
x = layers.MaxPooling2D(3)(x)
x = layers.Conv2D(128, 3, activation="relu")(x)
x = layers.Conv2D(64, 3, activation="relu")(x)
x = layers.GlobalMaxPooling2D()(x)

out_a = keras.layers.Dense(1, activation='linear', name='wt_rg')(x)
out_b = keras.layers.Dense(1, activation='linear', name='ht_rg')(x)
out_c = keras.layers.Dense(1, activation='linear', name='ag_rg')(x)

encoder = keras.Model( inputs = input, outputs = [out_a, out_b, out_c], name="encoder")

In [19]:
encoder.compile(
    loss = {
        "wt_rg": tf.keras.losses.MeanSquaredError(),
        "ht_rg": tf.keras.losses.MeanSquaredError(),
        "ag_rg": tf.keras.losses.MeanSquaredError()
    },

    metrics = {
        "wt_rg": 'mse',
        "ht_rg": 'mse',
        "ag_rg": 'mse'
    },

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
)

In [20]:
encoder.fit(X0, [y0, y1, y2], epochs=30, verbose=2, batch_size=32, validation_split=0.2)

Epoch 1/30
447/447 - 79s - loss: 7784.1914 - wt_rg_loss: 7623.8691 - ht_rg_loss: 82.7961 - ag_rg_loss: 77.5288 - wt_rg_mse: 7623.8691 - ht_rg_mse: 82.7961 - ag_rg_mse: 77.5288 - val_loss: 5579.6699 - val_wt_rg_loss: 5524.0474 - val_ht_rg_loss: 54.3921 - val_ag_rg_loss: 1.2297 - val_wt_rg_mse: 5524.0474 - val_ht_rg_mse: 54.3921 - val_ag_rg_mse: 1.2297 - 79s/epoch - 176ms/step
Epoch 2/30
447/447 - 65s - loss: 5805.7778 - wt_rg_loss: 5775.2319 - ht_rg_loss: 29.9432 - ag_rg_loss: 0.5985 - wt_rg_mse: 5775.2319 - ht_rg_mse: 29.9432 - ag_rg_mse: 0.5985 - val_loss: 5210.5176 - val_wt_rg_loss: 5171.5747 - val_ht_rg_loss: 37.6445 - val_ag_rg_loss: 1.2976 - val_wt_rg_mse: 5171.5747 - val_ht_rg_mse: 37.6445 - val_ag_rg_mse: 1.2976 - 65s/epoch - 145ms/step
Epoch 3/30
447/447 - 64s - loss: 5352.9272 - wt_rg_loss: 5325.6318 - ht_rg_loss: 26.6129 - ag_rg_loss: 0.6875 - wt_rg_mse: 5325.6318 - ht_rg_mse: 26.6129 - ag_rg_mse: 0.6875 - val_loss: 5195.7939 - val_wt_rg_loss: 5167.7700 - val_ht_rg_loss: 27.4

<keras.callbacks.History at 0x7f5fa5326bd0>

In [21]:
encoder.output

[<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'wt_rg')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'ht_rg')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'ag_rg')>]

In [22]:
pred0, pred1, pred2 = encoder.predict(tf.expand_dims(X0[0], 0))

In [23]:
pred0

array([[179.49106]], dtype=float32)

In [24]:
pred1

array([[41.392338]], dtype=float32)

In [25]:
pred2

array([[1.1926677]], dtype=float32)

In [26]:
y0[0]

<tf.Tensor: shape=(), dtype=float32, numpy=270.0>

In [27]:
y1[0]

<tf.Tensor: shape=(), dtype=float32, numpy=50.9>

In [28]:
y2[0]

<tf.Tensor: shape=(), dtype=float32, numpy=2.0>