<a href="https://colab.research.google.com/github/jbsher/capstone/blob/main/modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/tensorflow/models.git
!cd models/research/ && protoc object_detection/protos/*.proto --python_out=. && cp object_detection/packages/tf2/setup.py . && python -m pip install .
%cd models/research/object_detection


In [None]:
!pip install tensorflow-io
!pip install tf_slim
!pip install tf-models-official
!pip install tf-object-detection-api

In [11]:
import numpy as np
import pandas as pd
import os
import glob
import xml.etree.ElementTree as ET
import cv2
import matplotlib.pyplot as plt
import json
import tensorflow as tf
import re

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from object_detection.utils import config_util, label_map_util
from object_detection.builders import model_builder
from object_detection.utils import visualization_utils as viz_utils
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelEncoder



In [8]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!unzip /content/drive/MyDrive/capstone/data/object-detection.zip

In [17]:
!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz
!tar -xvf /content/models/research/object_detection/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz -C /content/models/research/object_detection/

--2023-09-15 02:40:32--  http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz
Resolving download.tensorflow.org (download.tensorflow.org)... 142.251.18.128, 2a00:1450:4013:c01::80
Connecting to download.tensorflow.org (download.tensorflow.org)|142.251.18.128|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 20515344 (20M) [application/x-tar]
Saving to: ‘ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz’


2023-09-15 02:40:32 (82.1 MB/s) - ‘ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz’ saved [20515344/20515344]



In [9]:
# Load combined_json_data
with open('/content/drive/MyDrive/capstone/data/combined_json_data.json', 'r') as f:
    combined_json_data = json.load(f)

# Load master_df
master_df = pd.read_csv('/content/drive/MyDrive/capstone/data/master_df.csv')

# image path dictionary
with open('/content/drive/MyDrive/capstone/data/image_path_dict.json', 'r') as f:
    image_path_dict = json.load(f)


In [12]:
# Path where label_map.pbtxt was saved
label_map_path = '/content/drive/MyDrive/capstone/data/label_map.pbtxt'

# Load the label map using the utility function
label_map = label_map_util.load_labelmap(label_map_path)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=100, use_display_name=True)
category_index = label_map_util.create_category_index(categories)


In [13]:
# Extract unique category names from master_df
unique_categories = master_df['category'].unique()

# Path to save the label_map.pbtxt in Google Drive
label_map_path = '/content/drive/MyDrive/capstone/data/label_map.pbtxt'

# Write the label_map.pbtxt file
with open(label_map_path, "w") as f:
    for idx, category in enumerate(unique_categories, 1):
        f.write("item {\n")
        f.write(f"  id: {idx}\n")
        f.write(f"  name: '{category}'\n")
        f.write("}\n\n")


In [18]:
# Use os.path.abspath to get absolute paths
pipeline_config_absolute_path = os.path.abspath('ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/pipeline.config')
checkpoint_absolute_path = os.path.abspath('ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0')

# Check if the pipeline.config file exists
if not os.path.exists(pipeline_config_absolute_path):
    print(f"Error: {pipeline_config_absolute_path} does not exist!")
else:
    configs = config_util.get_configs_from_pipeline_file(pipeline_config_absolute_path)
    pipeline_proto = config_util.create_pipeline_proto_from_configs(configs)
    model_config = pipeline_proto.model

    # Set model parameters
    model_config.ssd.num_classes = 1
    pipeline_proto.train_config.fine_tune_checkpoint = checkpoint_absolute_path
    pipeline_proto.train_config.fine_tune_checkpoint_type = 'detection'
    pipeline_proto.train_input_reader.tf_record_input_reader.input_path[:] = ['/content/drive/MyDrive/capstone/data/combined_data.tfrecord']
    pipeline_proto.train_input_reader.label_map_path = label_map_path


In [19]:
def int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def int64_list_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

def bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def bytes_list_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))

def float_list_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))


In [20]:
def create_tf_example(entry):
    image_folder = "/content/drive/MyDrive/capstone/data/train_val/"
    category_folder = entry[0][0]  # Extracting category name
    image_filename = entry[0][1] + ".jpg"
    image_path = os.path.join(image_folder, category_folder, image_filename)
    #print(f"Reading image: {image_path}")  # Add this print statement
    with tf.io.gfile.GFile(image_path, 'rb') as fid:
        encoded_image = fid.read()
    width = 320
    height = 320
    filename = image_path.encode('utf8')
    image_format = b'jpeg'
    xmins, xmaxs, ymins, ymaxs, classes_text, classes = [], [], [], [], [], []
    for box in entry[1]['bboxes']:
        xmins.append(box[0])
        xmaxs.append(box[0] + box[2])
        ymins.append(box[1])
        ymaxs.append(box[1] + box[3])
        classes_text.append('call'.encode('utf8'))
        classes.append(1)
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': int64_feature(height),
        'image/width': int64_feature(width),
        'image/filename': bytes_feature(filename),
        'image/source_id': bytes_feature(filename),
        'image/encoded': bytes_feature(encoded_image),
        'image/format': bytes_feature(image_format),
        'image/object/bbox/xmin': float_list_feature(xmins),
        'image/object/bbox/xmax': float_list_feature(xmaxs),
        'image/object/bbox/ymin': float_list_feature(ymins),
        'image/object/bbox/ymax': float_list_feature(ymaxs),
        'image/object/class/text': bytes_list_feature(classes_text),
        'image/object/class/label': int64_list_feature(classes),
    }))
    return tf_example


In [47]:
writer = tf.io.TFRecordWriter('/content/drive/MyDrive/capstone/data/combined_data.tfrecord')
for entry in combined_json_data:
    tf_example = create_tf_example(entry)
    writer.write(tf_example.SerializeToString())
writer.close()


In [22]:
def _parse_function(proto):
    # Define the features as they were stored
    keys_to_features = {
        'image/filename': tf.io.FixedLenFeature([], tf.string),
        'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
        'image/object/class/text': tf.io.VarLenFeature(tf.string),
    }

    parsed_features = tf.io.parse_single_example(proto, keys_to_features)

    # Extract the values
    uuid = parsed_features['image/filename']
    xmin = tf.sparse.to_dense(parsed_features['image/object/bbox/xmin'])
    ymin = tf.sparse.to_dense(parsed_features['image/object/bbox/ymin'])
    xmax = tf.sparse.to_dense(parsed_features['image/object/bbox/xmax'])
    ymax = tf.sparse.to_dense(parsed_features['image/object/bbox/ymax'])


    return uuid, (xmin, ymin, xmax, ymax)


In [23]:
record_file = '/content/drive/MyDrive/capstone/data/combined_data.tfrecord'
dataset = tf.data.TFRecordDataset(record_file)
dataset = dataset.map(_parse_function)
for uuid, bbox in dataset.take(5):
    print(f"UUID: {uuid.numpy().decode()}, BBox: {bbox}")

UUID: /content/drive/MyDrive/capstone/data/train_val/call/001d6e5e-b59d-4cee-a622-5c6b58998e6e.jpg, BBox: (<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.30253565], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.539332], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.53696454], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.71908313], dtype=float32)>)
UUID: /content/drive/MyDrive/capstone/data/train_val/call/00a32f31-ae9f-4eab-b70a-76d2245a3151.jpg, BBox: (<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.4476844], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.5201626], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.6363242], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.6409666], dtype=float32)>)
UUID: /content/drive/MyDrive/capstone/data/train_val/call/00c07d91-a770-4a82-bc42-f498b5c722cc.jpg, BBox: (<tf.Tensor: shape=(1,), dtype

Load Pretrained Model

In [24]:
# Prep for training
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 1000

dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE).repeat()


In [33]:
# Set the path to the pipeline.config
pipeline_config_path = "/content/models/research/object_detection/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/pipeline.config"

# Modify the pipeline.config
with open(pipeline_config_path) as f:
    config = f.read()

# Change the checkpoint to the pretrained model's checkpoint
config = re.sub('fine_tune_checkpoint: ".*?"',
                'fine_tune_checkpoint: "/content/models/research/object_detection/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0"',
                config)


# Change the number of classes to dataset's number of classes
config = re.sub('num_classes: \d+', 'num_classes: 18', config)

# Change the path to TFRecord and label map
config = re.sub('input_path: ".*?"',
                'input_path: "/content/drive/MyDrive/capstone/data/combined_data.tfrecord"',
                config)
config = re.sub('label_map_path: ".*?"',
                'label_map_path: "/content/drive/MyDrive/capstone/data/label_map.pbtxt"',
                config)

# Save the modified config
with open(pipeline_config_path, 'w') as f:
    f.write(config)


In [None]:
!python /content/models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path=/content/models/research/object_detection/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/pipeline.config \
    --model_dir=/content/drive/MyDrive/capstone/training \
    --alsologtostderr


In [35]:
master_df.head()

Unnamed: 0,category,uuid,image_path,label,bbox
0,call,001d6e5e-b59d-4cee-a622-5c6b58998e6e,/content/drive/MyDrive/capstone/data/train_val...,call,"[[0.30253566, 0.53933196, 0.23442885, 0.179751..."
1,call,00a32f31-ae9f-4eab-b70a-76d2245a3151,/content/drive/MyDrive/capstone/data/train_val...,call,"[[0.44768442, 0.52016261, 0.18863979, 0.120803..."
2,call,00c07d91-a770-4a82-bc42-f498b5c722cc,/content/drive/MyDrive/capstone/data/train_val...,call,"[[0.50488776, 0.39571494, 0.10762673, 0.073535..."
3,call,00d13cc0-1baf-4d1f-90a3-e3c5cdcc94a0,/content/drive/MyDrive/capstone/data/train_val...,call,"[[0.37399583, 0.3908337, 0.25430503, 0.2480748..."
4,call,00eaaf8a-0d7d-44be-8836-94a0a3c7ac61,/content/drive/MyDrive/capstone/data/train_val...,call,"[[0.41426556, 0.54359167, 0.22766, 0.20668667]..."


In [90]:
# Convert list to DataFrame
df = pd.DataFrame(combined_json_data, columns=['Attributes', 'Details'])

# Flattening the nested dictionary structure
df_details = df['Details'].apply(pd.Series)

# Concatenate the flattened details dataframe to the original dataframe and drop the 'Details' column
df = pd.concat([df.drop(['Details'], axis=1), df_details], axis=1)

df['label'] = df['Attributes'].str[0]
df['identifier'] = df['Attributes'].str[1]
df = df.drop('Attributes', axis=1)  # drop the original 'Attributes' column

In [91]:
df.drop(columns = 'labels', inplace=True)

In [92]:
# Mark rows with two bounding boxes as 'no_gesture'
mask_length_2 = df['bboxes'].apply(lambda x: len(x) if x is not None else 0) == 2
df.loc[mask_length_2, 'label'] = 'no_gesture'

# Drop the bounding box data for these rows
df.loc[mask_length_2, 'bboxes'] = None

# Convert any list labels into their first element, or keep them if they're not lists
df['label'] = df['label'].apply(lambda x: x[0] if isinstance(x, list) and x else x)  # added "and x" for safety

# Encode labels into numerical categories
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

# Replace None in bounding boxes with a default value
df['bboxes'] = df['bboxes'].apply(lambda x: x if x is not None else [0, 0, 0, 0])

# Filter to keep only rows with 21 landmarks
df = df[df['landmarks'].apply(lambda x: len(x[0])) == 21]


In [67]:
df.head()

Unnamed: 0,bboxes,landmarks,leading_conf,leading_hand,user_id,label,identifier
0,"[[0.30253566, 0.53933196, 0.23442885, 0.179751...","[[[0.5233240087262392, 0.6442184605096084], [0...",1.0,left,84bf44b41c986f5bfd74eb60cdfd41d2396334fb4b35f3...,0,001d6e5e-b59d-4cee-a622-5c6b58998e6e
1,"[[0.44768442, 0.52016261, 0.18863979, 0.120803...","[[[0.6032656913313392, 0.6287220446754884], [0...",1.0,left,7648b8642c1c5700f175653303704daed97712033af577...,0,00a32f31-ae9f-4eab-b70a-76d2245a3151
2,"[[0.50488776, 0.39571494, 0.10762673, 0.0735354]]","[[[0.5153850352073398, 0.4458925596774853], [0...",1.0,right,dba5d0a6ce06045928fe283e7604c0e8edd5221cfe6575...,0,00c07d91-a770-4a82-bc42-f498b5c722cc
3,"[[0.37399583, 0.3908337, 0.25430503, 0.24807481]]","[[[0.38935813430274846, 0.5566982353396809], [...",1.0,right,61e519d2f8a3b53a82e6614259be064590b64e489a89d0...,0,00d13cc0-1baf-4d1f-90a3-e3c5cdcc94a0
4,"[[0.41426556, 0.54359167, 0.22766, 0.20668667]]","[[[0.6185674667358398, 0.677894115447998], [0....",1.0,left,84bf44b41c986f5bfd74eb60cdfd41d2396334fb4b35f3...,0,00eaaf8a-0d7d-44be-8836-94a0a3c7ac61


In [93]:
# Flatten 'bboxes'
df_bboxes = pd.DataFrame(df['bboxes'].tolist(), columns=['bbox_x', 'bbox_y', 'bbox_width', 'bbox_height'])

# Flatten 'landmarks'
landmark_cols = []
for i in range(21):  # Assuming there are 21 landmarks
    df[f'landmark{i+1}_x'] = df['landmarks'].apply(lambda x: x[0][i][0])
    df[f'landmark{i+1}_y'] = df['landmarks'].apply(lambda x: x[0][i][1])
    landmark_cols.extend([f'landmark{i+1}_x', f'landmark{i+1}_y'])

# Join the flattened bboxes and landmarks to the main dataframe
df = pd.concat([df, df_bboxes], axis=1)
df.drop(['bboxes', 'landmarks'], axis=1, inplace=True)

# Define X and y
y = df['label']
X = df.drop(['label', 'identifier'], axis=1)
# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Restore the checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join('ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0')).expect_partial()


In [58]:
landmark_lengths = df['landmarks'].apply(lambda x: len(x[0]))
print(landmark_lengths.value_counts())


21    17449
0       551
Name: landmarks, dtype: int64
