# Tutorial: Model Training / Inference based on MixedDatasets

# 0 Configure ENVS

In [None]:
DATA_ROOT_PATH="/mnt"

## 0.1 Clone our repo

In [None]:
!git clone https://github.com/jichengyuan/mmdetectionCust.git

## 0.2 check gcc, g++ and cuda

In [None]:
!gcc --version
!g++ --version
!nvcc --version

## 0.3 install mmcv-full and other dependencies

In [None]:
# You may need to restart your runtime after this cell, to let your installation take effect.

%cd mmdetectionCust/
!pip install cython mmcv-full funcy && pip --no-cache-dir install -r requirements.txt
!python setup.py develop

## 0.4 download a pretrained model if desired

In [None]:
import os
path_to_pth = 'mmdetectionCust/pths/focs/'
os.makedirs(path_to_pth, exist_ok=True)

%cd mmdetectionCust/
!wget -P $path_to_pth https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco/fcos_r50_caffe_fpn_gn-head_1x_coco-821213aa.pth

# 1 MixedDataset: Querying, Training and Testing

## 1.1 Query a MixedDataset

In [None]:
# clone vision project
!rm -r vision
!git clone https://github.com/cqels/vision.git

In [None]:
# import SemkgAPI
import json
import vision.src.utils.semkg_api as semkg_api
import vision.src.utils.data as data

# # query from string

# # query from file

In [None]:
query_string='''#Give me the images containing person and cat
prefix cv:<http://vision.semkg.org/onto/v0.1/>
SELECT DISTINCT ?image
WHERE {
    ?ann1 a cv:Annotation.
    ?ann1 cv:isAnnotationOfImage ?image.
    ?ann1 cv:hasAnnotatedObject ?obj1.
    ?obj1 cv:hasLabel "person".
    ?ann2 a cv:Annotation.
    ?ann2 cv:isAnnotationOfImage ?image.
    ?ann2 cv:hasAnnotatedObject ?obj2.
    ?obj2 cv:hasLabel "cat".
    ?image cv:hasLocalPath ?localPath.
}
LIMIT 20'''

result=semkg_api.query(query_string)
ANNOTATION_FILENAME="annotation.json"
with open(ANNOTATION_FILENAME,"w") as f:
    json.dump(result,f)

In [None]:
#prepare dataset from annotation
data.prepare_data(result["images"])

## 1.1 FakeData

In [5]:
import json
import os
from os.path import join as opj
from utils import dataset_split, check_instances_categories, check_download_images

ROOT_PATH = os.path.abspath('./')

json_f_name = 'test_query_api_image.json'
path_to_anno = opj(ROOT_PATH, 'testData/mixedDatasets/', json_f_name)

with open(path_to_anno, 'r') as annotations:
  anno = json.load(annotations)

check_download_images(anno["images"])
categories = [category['name'] for category in anno['categories']]
number_of_categories = len(categories)

## 1.2 Split the quried MixedDatasets and check the #instances per category

In [None]:
path_to_anno_mixedDatasets = opj(ROOT_PATH, 'testData/mixedDatasets/annotations/')
os.makedirs(path_to_anno_mixedDatasets, exist_ok=True)

path_to_train_val = opj(path_to_anno_mixedDatasets,'train_val_set.json')
path_to_train = opj(path_to_anno_mixedDatasets,'train_set.json')
path_to_val = opj(path_to_anno_mixedDatasets,'val_set.json')
path_to_test = opj(path_to_anno_mixedDatasets,'test_set.json')
ratio = 0.8

dataset_split(path_to_anno, path_to_train_val, path_to_test, ratio)
dataset_split(path_to_train_val, path_to_train, path_to_val, ratio)

## 1.3 Set paths for config and work-dir

In [7]:
path_to_config = 'configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_mixedDataset_loadFromSeparatedFile.py'
path_to_work_dir = 'training_logs_example/fcos_coco_format_anno_example/'
path_to_pretrained_weights = 'pths/focs/fcos_r50_caffe_fpn_gn-head_1x_coco-821213aa.pth'

## 1.4 Training on the MixedDataset

In [None]:
%run tools/train.py \
{path_to_config} \
--work-dir {path_to_work_dir} \
--cfg-options model.bbox_head.num_classes={number_of_categories} \
data.train.ann_file={path_to_anno} \
data.val.ann_file={path_to_anno} \
runner.max_epochs=4 \
data.samples_per_gpu=1 \
load_from={path_to_pretrained_weights} \
data.train.classes="$categories" \
data.val.classes="$categories"

## 1.5 Verify the checkpoint file.

In [None]:
checkpoint_file = os.path.join(path_to_work_dir, "latest.pth")
assert os.path.isfile(checkpoint_file), '{} not exist'.format(checkpoint_file)
checkpoint_file = os.path.abspath(checkpoint_file)
output_pkl = os.path.join(path_to_work_dir, 'result_test.pkl')

## 1.6 Testing on the MixedDataset

In [None]:
%run tools/test.py \
{path_to_config} \
{checkpoint_file} \
--work-dir {path_to_work_dir} \
--out {output_pkl} \
--cfg-options data.test.ann_file={path_to_anno} \
model.bbox_head.num_classes={number_of_categories} \
data.samples_per_gpu=1 \
data.test.classes="$categories" \
--eval bbox \
--show