# Задание № 4

Обучить модель семантической сегментации (человек-vs-фон) на подмножестве датасета MS COCO
Библиотеки: [Python, Tensorflow]


План следующий:
- Скачать данные на локальную машину
- Отфильтровать по тегу "человек"
- загружать на colab

Данные торрент-клиентом качал отсюда [Academic Torrents](https://academictorrents.com/details/74dec1dd21ae4994dfd9069f9cb0443eb960c962)
train2017.zip - 19 Гб
val2017.zip - 0.8 Гб
annotations_trainval2017.zip - 0.3 Гб



In [29]:
import os
import skimage.io as io
import numpy as np

import tensorflow as tf
import matplotlib.pyplot as plt

## Подготовка COCO API

In [14]:
from pycocotools.coco import COCO

## Установка COCO API

### Компиляция COCO API

Установка Microsoft Visual C++ 14.0 or greater из [Microsoft C++ Build Tools](https://visualstudio.microsoft.com/ru/visual-cpp-build-tools/) 1,7 Г

In [None]:
# !cd PythonAPI
# !pip install Cython
# !pip install git+https://github.com/philferriere/cocoapi.git#egg=pycocotools^&subdirectory=PythonAPI

In [13]:
import pycocotools

In [21]:
COCO_ROOT = 'C:/prj/GeekBrainsAI/fast-ai-coco'

## Универсальный класс Dataset для сегментации

In [18]:
class Dataset():

    def crop_images(self, img, inp_size, random_crop=False):
        shape = tf.shape(img)
        pad = (
            [0, tf.maximum(inp_size - shape[0], 0)],
            [0, tf.maximum(inp_size - shape[1], 0)],
            [0, 0],
        )
        img = tf.pad(img, pad)

        if random_crop:
            img = tf.image.random_crop(img, (inp_size, inp_size, shape[2]))
        else: # central crop
            shape = tf.shape(img)
            ho = (shape[0] - inp_size) // 2
            wo = (shape[1] - inp_size) // 2
            img = img[ho:ho+inp_size, wo:wo+inp_size, :]

        return img

    def train_dataset(self, batch_size, epochs, inp_size):

        def item_to_images(item):
            random_crop = True
            img_combined = tf.py_function(self.read_images, [item], tf.uint8)
            img_combined = self.crop_images(img_combined, inp_size, random_crop)

            img = tf.cast(img_combined[...,:3], tf.float32) / np.float32(255.)
            mask_class = tf.cast(img_combined[...,3:4], tf.float32)
            return img, mask_class

        dataset = tf.data.Dataset.from_tensor_slices(self.img_list)
        dataset = dataset.shuffle(buffer_size=len(self.img_list))
        dataset = dataset.map(item_to_images)
        dataset = dataset.repeat(epochs)
        dataset = dataset.batch(batch_size, drop_remainder=True)

        return dataset

    def val_dataset(self, batch_size, inp_size):

        def item_to_images(item):
            random_crop = False
            img_combined = tf.py_function(self.read_images, [item], tf.uint8)
            img_combined = self.crop_images(img_combined, inp_size, random_crop)

            img = tf.cast(img_combined[...,:3], tf.float32) / np.float32(255.)
            mask_class = tf.cast(img_combined[...,3:4], tf.float32)
            return img, mask_class

        dataset = tf.data.Dataset.from_tensor_slices(self.img_list)
        dataset = dataset.map(item_to_images)
        dataset = dataset.batch(batch_size, drop_remainder=True)

        return dataset

## Класс для сегментационного датасета COCO
Класс наследутся от универсльного `Dataset` и реализует кастомную функцию чтения данных.

In [19]:
class COCO_Dataset(Dataset):

    def __init__(self, sublist):
        ann_file_fpath = os.path.join(COCO_ROOT, 'annotations', 'instances_'+sublist+'2017.json')
        self.coco = COCO(ann_file_fpath)
        self.cat_ids = self.coco.getCatIds(catNms=['person'])
        self.img_list = self.coco.getImgIds(catIds=self.cat_ids)

    def read_images(self, img_id):
        img_id = int(img_id.numpy())
        img_data = self.coco.loadImgs(img_id)[0]
        img_fname = '/'.join(img_data['coco_url'].split('/')[-2:])

        img = io.imread(os.path.join(COCO_ROOT, img_fname))
        if len(img.shape) == 2:
            img = np.tile(img[..., None], (1, 1, 3))

        ann_ids = self.coco.getAnnIds(imgIds=img_data['id'], catIds=self.cat_ids, iscrowd=None)
        anns = self.coco.loadAnns(ann_ids)
        mask_class = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
        for i in range(len(anns)):
            mask_class += self.coco.annToMask(anns[i])
        mask_class = (mask_class > 0).astype(np.uint8)

        img_combined = np.concatenate([img, mask_class[..., None]], axis=2)

        return img_combined

In [22]:
COCO_dataset_train = COCO_Dataset('train')
COCO_dataset_val = COCO_Dataset('val')

loading annotations into memory...
Done (t=21.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.85s)
creating index...
index created!


In [24]:
len(COCO_dataset_train.img_list)

64115

In [25]:
len(COCO_dataset_val.img_list)

2693

In [27]:
train_ds = COCO_dataset_train.train_dataset(batch_size=32, epochs=5, inp_size=240)

In [28]:
train_ds

<BatchDataset element_spec=(TensorSpec(shape=(32, 240, 240, None), dtype=tf.float32, name=None), TensorSpec(shape=(32, 240, 240, None), dtype=tf.float32, name=None))>

In [30]:
plt.imshow(train_ds[0]), plt.show()

TypeError: 'BatchDataset' object is not subscriptable

In [33]:
COCO_dataset_train.read_images(1)

AttributeError: 'int' object has no attribute 'numpy'

In [35]:
val_ds = COCO_dataset_val.val_dataset(batch_size=32,inp_size=240)

In [36]:
val_ds

<BatchDataset element_spec=(TensorSpec(shape=(32, None, None, None), dtype=tf.float32, name=None), TensorSpec(shape=(32, None, None, None), dtype=tf.float32, name=None))>

In [38]:
val_ds.next()[1]

AttributeError: 'BatchDataset' object has no attribute 'next'