# Создание данных

Создание масштабированных картинок и разметки

Скрипт в /content/drive/MyDrive/ML_and_CNN/notebooks/script_create_data/

>\> python -m scaler ...

Входные данные

* --video - путь к видео 
* --csv - путь к аннотации этого видео
* --framescsv - путь к csv с перечислением сохраняемых файлов
* --result - путь к сохраняемому csv с характеристиками
* --width - новая ширина кадра
* --height - новая высота кадра

Выходные данные

* В data/image/ - картинки
* В data/result.csv - характеристики изображений с ссылками на конкретные картинки в data/image/


Перезапись/дозапись result.csv нужно менять в скрипте. При дозаписи не нужно сохранять заголовки csv.

In [None]:
%cd /content/drive/MyDrive/ML_and_CNN/notebooks/script_create_data/

/content/drive/MyDrive/ML_and_CNN/notebooks/script_create_data


Пример

In [None]:
!python -m scaler \
--video /content/drive/MyDrive/ML_and_CNN/drone_annotations/video/uniyar_part_1/ch01_20200605113709.mp4 \
--csv /content/drive/MyDrive/ML_and_CNN/drone_annotations/annotations/uniyar_part_1/ch01_20200605113709/result_annotations_ch01_20200605113709.csv \
--framescsv /content/drive/MyDrive/ML_and_CNN/data/input_frames/example_input_ch01_20200605113709.csv \
--result /content/drive/MyDrive/ML_and_CNN/data/csv_for_tfrecord_creating/example_result.csv \
--width 512 \
--height 512

# Данные 

Пусть в каждом наборе будет по N фото 1024х1024

N=25

### train

In [None]:
!python -m scaler \
--video /content/drive/MyDrive/ML_and_CNN/drone_annotations/video/uniyar_part_1/ch01_20200605113709.mp4 \
--csv /content/drive/MyDrive/ML_and_CNN/drone_annotations/annotations/uniyar_part_1/ch01_20200605113709/result_annotations_ch01_20200605113709.csv \
--framescsv /content/drive/MyDrive/ML_and_CNN/data/input_frames/input_train_00000_ch01_20200605113709.csv \
--result /content/drive/MyDrive/ML_and_CNN/data/csv_for_tfrecord_creating/train_00000_ch01_20200605113709.csv \
--width 1024 \
--height 1024

In [None]:
!python -m scaler \
--video /content/drive/MyDrive/ML_and_CNN/drone_annotations/video/uniyar_part_1/ch01_20200605113709.mp4 \
--csv /content/drive/MyDrive/ML_and_CNN/drone_annotations/annotations/uniyar_part_1/ch01_20200605113709/result_annotations_ch01_20200605113709.csv \
--framescsv /content/drive/MyDrive/ML_and_CNN/data/input_frames/input_train_00001_ch01_20200605113709.csv \
--result /content/drive/MyDrive/ML_and_CNN/data/csv_for_tfrecord_creating/train_00001_ch01_20200605113709.csv \
--width 1024 \
--height 1024

### eval

In [None]:
!python -m scaler \
--video /content/drive/MyDrive/ML_and_CNN/drone_annotations/video/uniyar_part_1/ch01_20200605113709.mp4 \
--csv /content/drive/MyDrive/ML_and_CNN/drone_annotations/annotations/uniyar_part_1/ch01_20200605113709/result_annotations_ch01_20200605113709.csv \
--framescsv /content/drive/MyDrive/ML_and_CNN/data/input_frames/input_eval_00000_ch01_20200605113709.csv \
--result /content/drive/MyDrive/ML_and_CNN/data/csv_for_tfrecord_creating/eval_00000_ch01_20200605113709.csv \
--width 1024 \
--height 1024

# Конвертация данных в tfrecord

Основа feature

https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md

In [None]:
import tensorflow as tf
import csv

In [None]:
def _bytes_feature(value):
  """Преобразует string / byte в bytes_list."""
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy() # BytesList не будет распаковывать строку из EagerTensor.
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """Преобразует float / double в float_list."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  """Преобразует bool / enum / int / uint в int64_list."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [None]:
def create_tf_example(row):
  height = int(row["height"]) # Image height
  width = int(row["width"]) # Image width
  filename = bytes(row["name"] + ".jpg", 'utf-8') # Filename of the image. Empty if image is not from file
  encoded_image_data = open(row["path"], 'rb').read() # Encoded image bytes
  image_format = b".jpg" # b'jpeg' or b'png'

  xmins = float(row["xtl"]) / width # normalized left x coordinates in bounding box (1 per box)
  xmaxs = float(row["xbr"]) / width # normalized right x coordinates in bounding box
             # (1 per box)
  ymins = float(row["ytl"]) / height # normalized top y coordinates in bounding box (1 per box)
  ymaxs = float(row["ybr"]) / height # normalized bottom y coordinates in bounding box
             # (1 per box)
  classes_text = b"drone" # string class name of bounding box (1 per box)
  classes = 1 # integer class id of bounding box (1 per box)

  feature={
      'image/height': _int64_feature(height),
      'image/width': _int64_feature(width),
      'image/filename': _bytes_feature(filename),
      'image/source_id': _bytes_feature(filename),
      'image/encoded': _bytes_feature(encoded_image_data),
      'image/format': _bytes_feature(image_format),
      'image/object/bbox/xmin': _float_feature(xmins),
      'image/object/bbox/xmax': _float_feature(xmaxs),
      'image/object/bbox/ymin': _float_feature(ymins),
      'image/object/bbox/ymax': _float_feature(ymaxs),
      'image/object/class/text': _bytes_feature(classes_text),
      'image/object/class/label': _int64_feature(classes),
  }

  tf_example = tf.train.Example(features=tf.train.Features(feature=feature))
  return tf_example

Пример

In [None]:
input_csv = "/content/drive/MyDrive/ML_and_CNN/data/csv_for_tfrecord_creating/example_result.csv"
tfrecord_file = "/content/drive/MyDrive/ML_and_CNN/data/tfrecord/example_dataset.record"

Данные

In [None]:
input_csv = "/content/drive/MyDrive/ML_and_CNN/data/csv_for_tfrecord_creating/train_00000_ch01_20200605113709.csv"
tfrecord_file = "/content/drive/MyDrive/ML_and_CNN/data/tfrecord/train-00000-of-00002.tfrecord"

In [None]:
input_csv = "/content/drive/MyDrive/ML_and_CNN/data/csv_for_tfrecord_creating/train_00001_ch01_20200605113709.csv"
tfrecord_file = "/content/drive/MyDrive/ML_and_CNN/data/tfrecord/train-00001-of-00002.tfrecord"

In [None]:
input_csv = "/content/drive/MyDrive/ML_and_CNN/data/csv_for_tfrecord_creating/eval_00000_ch01_20200605113709.csv"
tfrecord_file = "/content/drive/MyDrive/ML_and_CNN/data/tfrecord/eval-00000-of-00001.tfrecord"

In [None]:
def create_tf_record():
  with tf.io.TFRecordWriter(tfrecord_file) as writer:
    with open(input_csv, newline="") as csv_file:
      reader = csv.DictReader(csv_file)            
      row = next(reader, None) 
      while(True): 
        if row is None:
            break
        tf_example = create_tf_example(row)
        writer.write(tf_example.SerializeToString())
        row = next(reader, None)

In [None]:
create_tf_record()

По идее должно работать..

In [None]:
filenames = [tfrecord_file]
raw_dataset = tf.data.TFRecordDataset(filenames)
raw_dataset

<TFRecordDatasetV2 shapes: (), types: tf.string>

In [None]:
for raw_record in raw_dataset.take(1):
  example = tf.train.Example()
  example.ParseFromString(raw_record.numpy())
  print(example)

features {
  feature {
    key: "image/encoded"
    value {
      bytes_list {
        value: "\377\330\377\340\000\020JFIF\000\001\001\000\000\001\000\001\000\000\377\333\000C\000\002\001\001\001\001\001\002\001\001\001\002\002\002\002\002\004\003\002\002\002\002\005\004\004\003\004\006\005\006\006\006\005\006\006\006\007\t\010\006\007\t\007\006\006\010\013\010\t\n\n\n\n\n\006\010\013\014\013\n\014\t\n\n\n\377\333\000C\001\002\002\002\002\002\002\005\003\003\005\n\007\006\007\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\377\300\000\021\010\004\000\004\000\003\001\"\000\002\021\001\003\021\001\377\304\000\037\000\000\001\005\001\001\001\001\001\001\000\000\000\000\000\000\000\000\001\002\003\004\005\006\007\010\t\n\013\377\304\000\265\020\000\002\001\003\003\002\004\003\005\005\004\004\000\000\001}\001\002\003\000\004\021\005\022!1A\006\023Qa\007\"q\0242\201\221\241\010#B\261\301\025R\321\360$3br\202\t\n\026\027\030\031\032%&\'()*4

Да-да, точно работает