# Tutorial: Model Training / Inference based on MixedDatasets

# 0 Configure ENVS

In [None]:
import os
ROOT_PATH = os.path.abspath("./")

## 0.1 Clone our repo

In [None]:
!git clone https://github.com/jichengyuan/mmclassificationCust.git

## 0.2 check gcc, g++ and cuda

In [None]:
!gcc --version
!g++ --version
!nvcc --version

## 0.3 install mmcv-full and other dependencies

In [None]:
# You may need to restart your runtime after this cell, to let your installation take effect.

%cd mmclassificationCust/
!python -m pip install cython funcy mmcv-full && python -m pip --no-cache-dir install -r requirements.txt
!python setup.py develop
%cd ..

## 0.4 download a pretrained model if desired

In [None]:
path_to_pth = ROOT_PATH+'/mmclassificationCust/pths/resnet18/'
os.makedirs(path_to_pth, exist_ok=True)

%cd mmclassificationCust/
!wget -P $path_to_pth https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_batch256_imagenet_20200708-34ab8f90.pth

# 1 MixedDataset: Querying, Training and Testing

## 1.1 Query a MixedDataset

In [None]:
# install our vision utils
#!python -m pip install git+https://github.com/cqels/vision.git --force


In [None]:
# import SemkgAPI
#import json
#from vision_utils import semkg_api, data

# # query from string

# # query from file

In [None]:
import json
import os
from os.path import join as opj
# from utils import dataset_split, check_instances_categories, check_download_images


# query_string='''#Give me the images containing person and cat
# prefix cv:<http://vision.semkg.org/onto/v0.1/>
# SELECT DISTINCT ?image
# WHERE {
#     ?ann1 a cv:Annotation.
#     ?ann1 cv:isAnnotationOfImage ?image.
#     ?ann1 cv:hasAnnotatedObject ?obj1.
#     ?obj1 cv:hasLabel "person".
#     ?ann2 a cv:Annotation.
#     ?ann2 cv:isAnnotationOfImage ?image.
#     ?ann2 cv:hasAnnotatedObject ?obj2.
#     ?obj2 cv:hasLabel "cat".
#     ?image cv:hasLocalPath ?localPath.
# }
# LIMIT 20'''

# result=semkg_api.query(query_string)

ROOT_PATH = os.path.abspath('./')
# json_f_name = 'test_query_api_image.json'
path_to_anno_mixedDatasets = opj(ROOT_PATH, 'testData/mixedDatasets/imagenette2_tiny/meta/')
path_to_images_mixedDatasets = opj(ROOT_PATH, 'testData/mixedDatasets/imagenette2_tiny/')
# os.makedirs(path_to_anno_mixedDatasets, exist_ok=True)
# path_to_anno = opj(path_to_anno_mixedDatasets, json_f_name)

# with open(path_to_anno, "w") as f:
#     json.dump(result,f)

# check_download_images(result["images"])
# categories = [category['name'] for category in result['categories']]
# number_of_categories = len(categories)

## 1.2 Split the quried MixedDatasets and check the #instances per category

In [None]:
path_to_train_images = opj(path_to_images_mixedDatasets,'train')
path_to_train_labels = opj(path_to_anno_mixedDatasets,'train.txt')

path_to_val_images = opj(path_to_images_mixedDatasets,'val')
path_to_val_labels = opj(path_to_anno_mixedDatasets,'val.txt')

path_label_numbers = opj(path_to_anno_mixedDatasets,'classmap.txt')
# dataset_split(path_to_anno, path_to_train_val, path_to_test, ratio)
# dataset_split(path_to_train_val, path_to_train, path_to_val, ratio)

## 1.3 Set paths for config and work-dir

In [None]:
path_to_config = 'configs/resnet/resnet18_b32x8_imagenet_mixedDatasets.py'
path_to_work_dir = 'training_mixedDatasets_example/resnet18_b32x8_imagenet_mixedDatasets/'
path_to_pretrained_weights = 'pths/resnet18/resnet18_batch256_imagenet_20200708-34ab8f90.pth'

## 1.4 Training on the MixedDataset

In [None]:
%run tools/train.py \
{path_to_config} \
--work-dir {path_to_work_dir} \
--cfg-options data.train.data_prefix={path_to_train_images} \
data.train.ann_file={path_to_train_labels} \
data.val.data_prefix={path_to_val_images} \
data.val.ann_file={path_to_val_labels} \
runner.max_epochs=100

## 1.5 Verify the checkpoint file.

In [None]:
checkpoint_file = os.path.join(path_to_work_dir, "latest.pth")
assert os.path.isfile(checkpoint_file), '{} not exist'.format(checkpoint_file)
checkpoint_file = os.path.abspath(checkpoint_file)
output_pkl = os.path.join(path_to_work_dir, 'result_test.pkl')

## 1.6 Testing on the MixedDataset

In [None]:
%run tools/test.py \
{path_to_config} \
{checkpoint_file} \
--out {output_pkl} \
--cfg-options data.test.data_prefix={path_to_val_images} \
data.test.ann_file={path_to_val_labels} \
--show