In [1]:
# Copyright 2021 Fagner Cunha
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [2]:
import os
import sys
import inspect

currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)

In [3]:
import json
import numpy as np
import pandas as pd

from classification.iwildcamlib import CategoryMap

In [4]:
def _get_data_from_dict(row, dictionary, dictionary_key):
    if str(row['location']) in dictionary:
        return dictionary[str(row['location'])][dictionary_key]
    else:
        return np.NaN

def prepare_location_info(data_info, locations):
    images = pd.DataFrame(data_info)
    images['date'] = images['datetime']
    images['latitude'] = images.apply(lambda row: _get_data_from_dict(row, locations, 'latitude'), axis=1)
    images['longitude'] = images.apply(lambda row: _get_data_from_dict(row, locations, 'longitude'), axis=1)
    
    return images.to_dict('records')

def _map_categ(row, categ_map):
    return categ_map.category_to_index(row['category_id'])

def prepare_category(data_info, categ_map):
    ann = pd.DataFrame(data_info)
    ann['category_id'] = ann.apply(lambda row: _map_categ(row, categ_map), axis=1)
    return ann.to_dict('records')

def filter_locations(data_info, locations):
    images = pd.DataFrame(data_info)
    images = images[images.location.isin(locations)].copy()
    
    return images.to_dict('records')

### Loading metadata

In [5]:
locations_file = '/data/fagner/iWildCam2021/data/metadata/gps_locations.json'
train_file = '/data/fagner/iWildCam2021/data/metadata/iwildcam2021_train_annotations.json'
test_file = '/data/fagner/iWildCam2021/data/metadata/iwildcam2021_test_information.json'
train_dataset_split = '../data/data_split.json'

In [6]:
with open(locations_file) as json_file:
    locations = json.load(json_file)

with open(train_file) as json_file:
    train_info = json.load(json_file)

with open(test_file) as json_file:
    test_info = json.load(json_file)

with open(train_dataset_split) as json_file:
    split_info = json.load(json_file)

category_map = CategoryMap(train_file)

### Converting data

In [7]:
train_info['images'] = prepare_location_info(train_info['images'], locations)
train_info['annotations'] = prepare_category(train_info['annotations'], category_map)

In [8]:
test_info['images'] = prepare_location_info(test_info['images'], locations)

In [9]:
trainmini_info = train_info.copy()
trainmini_info['images'] = filter_locations(trainmini_info['images'], split_info['train'])

In [10]:
val_info = train_info.copy()
val_info['images'] = filter_locations(val_info['images'], split_info['validation'])

### Save data

In [11]:
train_geo_file = '/data/fagner/iWildCam2021/data/metadata/iwildcam2021_train_annotations_geoprior.json'
trainmin_geo_file = '/data/fagner/iWildCam2021/data/metadata/iwildcam2021_trainmini_annotations_geoprior.json'
val_geo_file = '/data/fagner/iWildCam2021/data/metadata/iwildcam2021_val_annotations_geoprior.json'
test_geo_file = '/data/fagner/iWildCam2021/data/metadata/iwildcam2021_test_information_geoprior.json'

In [12]:
with open(train_geo_file, 'w') as json_file:
    json.dump(train_info, json_file)

with open(trainmin_geo_file, 'w') as json_file:
    json.dump(trainmini_info, json_file)

with open(val_geo_file, 'w') as json_file:
    json.dump(val_info, json_file)

with open(test_geo_file, 'w') as json_file:
    json.dump(test_info, json_file)