<a href="https://colab.research.google.com/github/baroodb/code/blob/main/videos2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from google.colab import auth
auth.authenticate_user()

In [5]:
# import 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import os


In [6]:
!git clone https://github.com/jurjsorinliviu/Sports-Type-Classifier.git clone

Cloning into 'clone'...
remote: Enumerating objects: 14521, done.[K
remote: Total 14521 (delta 0), reused 0 (delta 0), pack-reused 14521[K
Receiving objects: 100% (14521/14521), 592.88 MiB | 38.54 MiB/s, done.
Resolving deltas: 100% (6/6), done.
Checking out files: 100% (14619/14619), done.


In [7]:
%%writefile train.py
import argparse
from imutils import paths
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import AveragePooling2D, Flatten, Dense, Dropout, Input
import pickle

LABELS = ["weight_lifting", "tennis", "football"]


parser = argparse.ArgumentParser()
parser.add_argument('-d', '--dataset', help='This is the path to the dataset', required=True)
parser.add_argument('-m', '--model', help='Path to save the model to', required=True)
parser.add_argument('-e', '--epochs', type=int,  help='The number of epochs', required=True)
parser.add_argument('-l', '--label-bin', help='Path to save the label binarizer to...')
parser.add_argument('-p', '--plot', type=str,  help='Path to output the plot..')

args = vars(parser.parse_args())

# create the plot path if it does not exists

print('INFO: Creating the plot path ...')
os.makedirs(os.path.dirname(args['plot']))

list_files = list(paths.list_images(args['dataset']))

images = []
labels = []

for path in list_files:
  label = path.split(os.path.sep)[-2]


  if label not in LABELS:
    continue
  
  img = cv2.imread(path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  img = cv2.resize(img, (224, 224))

  images.append(img)
  labels.append(label)

images = np.array(images)
labels = np.array(labels)


print(images.shape)
print(labels.shape)

lb = LabelBinarizer()
labels = lb.fit_transform(labels)
print('INFO: Printing some elements from the label binarizer')
print(labels)

# split the data
train_ds, val_ds, train_labels, val_labels = train_test_split(images, 
                                                              labels, 
                                                              test_size=0.2, 
                                                              stratify=labels, 
                                                              random_state=42)
assert train_ds.shape[0] == train_labels.shape[0]
assert val_ds.shape[0] == val_labels.shape[0]

# Create the data augmentation 
train_augmentation = tf.keras.preprocessing.image.ImageDataGenerator(
    	rotation_range=30,
      zoom_range=0.15,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.15,
      horizontal_flip=True,
      fill_mode="nearest"
)

val_augmentation = tf.keras.preprocessing.image.ImageDataGenerator()

mean = np.array([123.68, 116.779, 103.939], dtype="float32")

train_augmentation.mean = mean
val_augmentation.mean = mean


#builing the model

base = ResNet50(include_top=False)
for layer in base.layers:
  layer.trainable = False

inputs = Input(shape=(224, 224, 3))
x = base(inputs, training=False)
x = AveragePooling2D((7, 7))(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(len(LABELS), activation='softmax')(x)

model = tf.keras.models.Model(inputs=inputs, outputs=outputs)

print('INFO: Printing the model summary')
print(model.summary())

print('INFO: Compiling the model')
model.compile(loss='categorical_crossentropy', 
              optimizer=tf.keras.optimizers.SGD(learning_rate=1e-4, momentum=0.9, decay=1e-4/args['epochs']), 
              metrics=['accuracy'])

#
print('INFO: Launching the training job')

H = model.fit(x=train_augmentation.flow(train_ds, train_labels, batch_size=32), 
              steps_per_epoch=len(train_ds) // 32, 
              validation_data=val_augmentation.flow(val_ds, val_labels), 
              validation_steps=len(val_ds) // 32, 
              epochs=args['epochs'])

print(f"INFO: Saving the model to the path  {args['model']}")
model.save(args['model'])
print(f"INFO: Saving the labels to the path {args['label_bin']}")

with open(args['label_bin'], 'wb') as f:
  f.write(pickle.dumps(lb))

# plot the training loss and accuracy
N = args["epochs"]
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")

print(f"INFO: Saving the plot to {args['plot']}")
plt.savefig(args["plot"])


Writing train.py


In [8]:
!python train.py --dataset /content/clone/data --epochs 10 --model trained/model --plot trained/plot.png --label-bin trained/lb.pickle

INFO: Creating the plot path ...
(2070, 224, 224, 3)
(2070,)
INFO: Printing some elements from the label binarizer
[[0 0 1]
 [0 0 1]
 [0 0 1]
 ...
 [1 0 0]
 [1 0 0]
 [1 0 0]]
2021-09-18 19:48:38.631094: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-18 19:48:39.084528: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-18 19:48:39.085580: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-18 19:48:39.087060: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must 

In [9]:
!zip -r model.zip trained/

  adding: trained/ (stored 0%)
  adding: trained/model/ (stored 0%)
  adding: trained/model/assets/ (stored 0%)
  adding: trained/model/variables/ (stored 0%)
  adding: trained/model/variables/variables.data-00000-of-00001 (deflated 7%)
  adding: trained/model/variables/variables.index (deflated 78%)
  adding: trained/model/saved_model.pb (deflated 92%)
  adding: trained/model/keras_metadata.pb (deflated 96%)
  adding: trained/lb.pickle (deflated 38%)
  adding: trained/plot.png (deflated 5%)


In [10]:
from google.colab import files
files.download('model.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [24]:
import os
import tensorflow as tf


BUCKET = 'gs://tf_model_bucket/'
PROJECT = 'project my-third-project-326309'

os.environ['BUCKET']=BUCKET
MODEL_PATH = "${BUCKET}/new/model"

In [13]:
!gcloud config set project my-third-project-326309

Updated property [core/project].


In [16]:
!gsutil ls $BUCKET

gs://tf_model_bucket/local/
gs://tf_model_bucket/model/
gs://tf_model_bucket/models/


In [23]:
!echo $MODEL_PATH

gs://tf_model_bucket//new/model


In [25]:
model = tf.keras.models.load_model('/content/trained/model')

In [30]:
!saved_model_cli show --tag_set serve --dir /content/trained/model --signature serving_default

The given SavedModel SignatureDef contains the following input(s):
  inputs['input_2'] tensor_info:
      dtype: DT_FLOAT
      shape: (-1, 224, 224, 3)
      name: serving_default_input_2:0
The given SavedModel SignatureDef contains the following output(s):
  outputs['dense_1'] tensor_info:
      dtype: DT_FLOAT
      shape: (-1, 3)
      name: StatefulPartitionedCall:0
Method name is: tensorflow/serving/predict


In [32]:
def read_from_jpeg(img):
    img_data = tf.io.read_file(img)
    return img_data

def preprocess(img_bytes):
    img_data = tf.image.decode_jpeg(img_bytes, channels=3)
    img_data = tf.image.convert_image_dtype(img_data, tf.float32)
    img_data = tf.image.resize(img_data, (224, 224))
    return img_data

filenames = [
    'clone/data/football/00000007.jpg', 
    'clone/data/football/00000017.jpg', 
    'clone/data/weight_lifting/00000028.jpg', 
    'clone/data/tennis/00000094.jpg'
]

for filename in filenames:
    img_bytes = read_from_jpeg(filename)
    image_data = preprocess(img_bytes)
    image_data = tf.expand_dims(image_data, axis=0)
    
    pred = model.predict(image_data)
    print(pred)

[[0.20586473 0.30180103 0.49233428]]
[[0.20029865 0.25959003 0.54011136]]
[[0.2200291  0.29819492 0.48177603]]
[[0.20027296 0.28066313 0.5190639 ]]


In [33]:
@tf.function(input_signature=[tf.TensorSpec([None,], dtype=tf.string)])
def predict_bytes(img_bytes):
    input_images = tf.map_fn(
        preprocess,
        img_bytes,
        fn_output_signature=tf.float32
    )
    
    batch_pred = model(input_images) # same as model.predict()
    top_prob = tf.math.reduce_max(batch_pred, axis=[1])
    pred_label_index = tf.math.argmax(batch_pred, axis=1)
    
    return {
        'probability': top_prob,
        'acivity': pred_label_index
    }

@tf.function(input_signature=[tf.TensorSpec([None,], dtype=tf.string)])
def predict_filename(filenames):
    img_bytes = tf.map_fn(
        tf.io.read_file,
        filenames
    )
    result = predict_bytes(img_bytes)
    result['filename'] = filenames
    return result



In [36]:
MODEL_PATH = 'gs://tf_model_bucket/new/model'
# Time to save the  model 
model.save(MODEL_PATH, signatures={
    'serving_default': predict_bytes,
    'from_file':  predict_filename
})

INFO:tensorflow:Assets written to: gs://tf_model_bucket/new/model/assets




In [37]:
loaded = tf.keras.models.load_model(MODEL_PATH)

In [42]:
!saved_model_cli show --tag_set serve --dir $MODEL_PATH --signature serving_default

The given SavedModel SignatureDef contains the following input(s):
  inputs['img_bytes'] tensor_info:
      dtype: DT_STRING
      shape: (-1)
      name: serving_default_img_bytes:0
The given SavedModel SignatureDef contains the following output(s):
  outputs['acivity'] tensor_info:
      dtype: DT_INT64
      shape: (-1)
      name: StatefulPartitionedCall_1:0
  outputs['probability'] tensor_info:
      dtype: DT_FLOAT
      shape: (-1)
      name: StatefulPartitionedCall_1:1
Method name is: tensorflow/serving/predict


In [44]:
with open(filenames[1], 'rb') as f:
  data_bytes = f.read()
  data_bytes = tf.convert_to_tensor([data_bytes])
  serving_fn = loaded.signatures['serving_default']
  pred = serving_fn(data_bytes)
  print(pred)

{'probability': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.54011136], dtype=float32)>, 'acivity': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([2])>}
