<a href="https://colab.research.google.com/github/babo989/Pyrocystis-Tracking/blob/main/PyroObjectDetector_SSD_mobilenetV2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**TFrecord creation using annotated images from labelimg**
https://github.com/HumanSignal/labelImg


In [None]:
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
import io
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple

def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df

def main():
    # paths to your directory containing the XML files
    xml_path = '/content/drive/MyDrive/Woods_Hole_1_9_4/annotations'
    output_path = '/content/drive/MyDrive/Woods_Hole_1_9_4'
    xml_df = xml_to_csv(xml_path)
    xml_df.to_csv(os.path.join(output_path, 'labels.csv'), index=None)
    print('Successfully converted xml to csv.')

main()


In [None]:
# Paths
DATA_BASE_PATH = '/content/drive/MyDrive/Woods_Hole_1_9_4/'
IMAGE_PATH = os.path.join(DATA_BASE_PATH, 'images/')
OUTPUT_TRAIN_PATH = os.path.join(DATA_BASE_PATH, 'train.record')
OUTPUT_TEST_PATH = os.path.join(DATA_BASE_PATH, 'test.record')

# Helper functions
def class_text_to_int(row_label):
    if row_label == 'pyro':
        return 1
    else:
        raise ValueError(f"Class {row_label} not found in the mapping.")


def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]

def bytes_list_feature(values):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=values))

def create_tf_example(group, path):
    with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example

# Main function
def main():
    writer = tf.io.TFRecordWriter(OUTPUT_TRAIN_PATH)
    path = IMAGE_PATH
    examples = pd.read_csv(os.path.join(DATA_BASE_PATH, 'labels.csv'))
    grouped = split(examples, 'filename')
    for group in grouped:
        tf_example = create_tf_example(group, path)
        writer.write(tf_example.SerializeToString())

    writer.close()
    print('Successfully created the TFRecord file:', OUTPUT_TRAIN_PATH)

main()


**Install dependencies and Tensorflow**


if you are fine-tuning a detector, pick a suitable model from the tensorflow model zoo https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md
You can then store these in your google drive for easy access


In [None]:
!pip install tf_slim



In [None]:
!pip install lvis
!pip install tf_slim

!pip install tensorflow_io
!pip install cython
!pip install pycocotools
!pip install tf-models-official


Collecting lvis
  Downloading lvis-0.5.3-py3-none-any.whl (14 kB)
Installing collected packages: lvis
Successfully installed lvis-0.5.3
Collecting tensorflow_io
  Downloading tensorflow_io-0.33.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (28.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m28.6/28.6 MB[0m [31m42.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow_io
Successfully installed tensorflow_io-0.33.0


In [None]:
!git clone https://github.com/tensorflow/models.git

Cloning into 'models'...
remote: Enumerating objects: 87135, done.[K
remote: Counting objects: 100% (889/889), done.[K
remote: Compressing objects: 100% (409/409), done.[K
remote: Total 87135 (delta 530), reused 800 (delta 472), pack-reused 86246[K
Receiving objects: 100% (87135/87135), 599.13 MiB | 48.54 MiB/s, done.
Resolving deltas: 100% (62412/62412), done.


In [None]:
%cd models/research
!protoc object_detection/protos/*.proto --python_out=.

/content/models/research


In [None]:
import os
os.environ['PYTHONPATH'] += ':/content/models/research/:/content/models/research/slim/'


**Train model**

In [None]:

from object_detection.model_main_tf2 import main as main_detector

model_dir = '/content/drive/MyDrive/Woods_Hole_1_9_4'
pipeline_config_path = '/content/drive/MyDrive/Woods_Hole_1_9_4/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/pipeline.config'

!python /content/models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path=$pipeline_config_path \
    --model_dir=$model_dir \
    --alsologtostderr \
    --num_train_steps=10000 \
    --sample_1_of_n_eval_examples=0 \
    --num_eval_steps=0

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
I0815 16:22:02.997944 140400719425536 mirrored_strategy.py:419] Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
INFO:tensorflow:Maybe overwriting train_steps: 10000
I0815 16:22:03.024913 140400719425536 config_util.py:552] Maybe overwriting train_steps: 10000
INFO:tensorflow:Maybe overwriting use_bfloat16: False
I0815 16:22:03.025109 140400719425536 config_util.py:552] Maybe overwriting use_bfloat16: False
Instructions for updating:
rename to distribute_datasets_from_function
W0815 16:22:03.056862 140400719425536 deprecation.py:364] From /content/models/research/object_detection/model_lib_v2.py:563: StrategyBase.experimental_distribute_datasets_from_function (from tensorflow.python.distribute.distribute_lib) is deprecated and will be removed in a future version.
Instructions for updating:
rename to distribute_datasets_from_function
INFO:tensorflow:

In [None]:
import tensorflow as tf

from object_detection.utils import config_util
from object_detection.builders import model_builder
from object_detection.exporter import export_inference_graph

pipeline_config = pipeline_config_path
model_dir = "/content/drive/MyDrive/Woods_Hole_1_9_4"
output_directory = '/content/drive/MyDrive/Woods_Hole_1_9_4/exported_model'

# Build the model
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
detection_model = model_builder.build(model_config=model_config, is_training=False)

# Restore the checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(model_dir, 'ckpt-11')).expect_partial()

# Export the saved model
export_inference_graph(input_type='image_tensor',
                       pipeline_config=pipeline_config,
                       trained_checkpoint_dir=model_dir,
                       output_directory=output_directory)


TypeError: ignored

In [None]:
import os
from object_detection.utils import config_util
from object_detection.builders import model_builder
from object_detection.exporter import export_inference_graph

model_dir = '/content/drive/MyDrive/Woods_Hole_1_9_4'
pipeline_config_path = '/content/drive/MyDrive/Woods_Hole_1_9_4/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/pipeline.config'
trained_checkpoint_dir = model_dir
output_directory = '/content/exported_model'
!python /content/models/research/object_detection/exporter_main_v2.py \
    --pipeline_config_path {pipeline_config_path} \
    --trained_checkpoint_dir {trained_checkpoint_dir} \
    --output_directory {output_directory}


Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.map_fn(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.map_fn(fn, elems))
W0815 18:35:51.224714 134095775469568 deprecation.py:641] From /usr/local/lib/python3.10/dist-packages/tensorflow/python/autograph/impl/api.py:459: calling map_fn_v2 (from tensorflow.python.ops.map_fn) with back_prop=False is deprecated and will be removed in a future version.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.map_fn(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.map_fn(fn, elems))
I0815 18:35:55.833153 134095775469568 api.py:460] feature_map_spatial_dims: [(40, 40), (20, 20), (10, 10), (5, 5), (3, 3)]
I0815 18:36:06.124674 134095775469568 api.py:460] feature_map_spatial_dims: [(40, 40), (20, 20), (10, 10), (5, 5), (3, 3)]
I081

In [None]:
!zip -r /content/exported_model.zip /content/exported_model



  adding: content/exported_model/ (stored 0%)
  adding: content/exported_model/checkpoint/ (stored 0%)
  adding: content/exported_model/checkpoint/checkpoint (deflated 41%)
  adding: content/exported_model/checkpoint/ckpt-0.data-00000-of-00001 (deflated 8%)
  adding: content/exported_model/checkpoint/ckpt-0.index (deflated 80%)
  adding: content/exported_model/saved_model/ (stored 0%)
  adding: content/exported_model/saved_model/saved_model.pb (deflated 92%)
  adding: content/exported_model/saved_model/assets/ (stored 0%)
  adding: content/exported_model/saved_model/variables/ (stored 0%)
  adding: content/exported_model/saved_model/variables/variables.data-00000-of-00001 (deflated 9%)
  adding: content/exported_model/saved_model/variables/variables.index (deflated 78%)
  adding: content/exported_model/saved_model/fingerprint.pb (stored 0%)
  adding: content/exported_model/pipeline.config (deflated 69%)


In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 37.8 gigabytes of available RAM

You are using a high-RAM runtime!
