In [None]:
# Copyright 2021 Fagner Cunha
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [1]:
import json
import pandas as pd

In [2]:
file = '/data/fagner/iWildCam2021/data/metadata/iwildcam2021_test_information.json'
with open(file) as json_data:
    data = json.load(json_data)
test_set = pd.DataFrame(data['images'])

In [3]:
file = '/data/fagner/iWildCam2021/data/metadata/iwildcam2021_train_annotations.json'
with open(file) as json_data:
    data = json.load(json_data)
categories_2021 = [cat['id'] for cat in data['categories']]

In [4]:
submission2020 = pd.read_csv('./rule_base_on_newsequetionid.csv')

In [5]:
sample_sub = pd.read_csv('sample_submission.csv')

#### Set new images to background

In [28]:
test_sub = pd.merge(test_set,
                      submission2020,
                      how='left',
                      left_on='id',
                      right_on='Id')
test_sub['Category'] = test_sub['Category'].fillna(0)
test_sub['Category'] = test_sub['Category'].astype(int)

#### Set not found categories to background

In [32]:
def _remove_old_categories(row, categories):
    if row['Category'] not in categories:
        return 0
    else:
        return row['Category']

In [33]:
test_sub['Category'] = test_sub.apply(lambda row: _remove_old_categories(row, categories_2021), axis=1)

#### Generate submission

In [49]:
def generate_zero_submission(seq_ids, categories):
    sub = pd.DataFrame(seq_ids, columns=['Id'])
    for categ in categories[1:]:
        column = 'Predicted' + str(categ)
        sub[column] = 0
    
    return sub

In [71]:
def prediction_4_seq(df, seq_id):
    categories_ids = df[df.seq_id == seq_id].Category.unique()
    
    for category in categories_ids:
        if category != 0:
            return category
    
    return categories_ids[0]

In [93]:
submission = generate_zero_submission(test_sub.seq_id.unique(), categories_2021)
for seq_id in test_sub.seq_id.unique():
    pred = prediction_4_seq(test_sub, seq_id)
    if pred > 0:
        column = 'Predicted' + str(pred)
        submission.loc[submission.Id == seq_id, column] = 1

In [94]:
submission

Unnamed: 0,Id,Predicted2,Predicted3,Predicted4,Predicted6,Predicted7,Predicted8,Predicted9,Predicted10,Predicted12,...,Predicted559,Predicted562,Predicted563,Predicted564,Predicted565,Predicted566,Predicted567,Predicted568,Predicted570,Predicted571
0,a91ebc18-0cd3-11eb-bed1-0242ac1c0002,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,a91d6764-0cd3-11eb-bed1-0242ac1c0002,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,91ccb676-21bc-11ea-a13a-137349068a90,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,386914ce-6fe2-11eb-844f-0242ac1c0002,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,988ae29e-21bc-11ea-a13a-137349068a90,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11052,a918faee-0cd3-11eb-bed1-0242ac1c0002,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11053,94284f52-21bc-11ea-a13a-137349068a90,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11054,9912f080-21bc-11ea-a13a-137349068a90,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11055,96fe10f4-21bc-11ea-a13a-137349068a90,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [96]:
submission.to_csv('predictions2020_test.csv', index=False)