In [1]:
import pandas as pd
import glob
import os
from utils.utils import remap, cats_to_list

In [None]:
def generate_inference_df(input_path='../runs/detect/predict/labels/', conf_threshold=0.25):
    '''Generates dataframe of information from infrence output files.
    Args:
        input_path: (string) path to prediction labels files
        conf_thrreshold: (float) minimum confidence threshold for valid detection
    Returns:
        df: (pd.DataFrame) dataframe of inference output
    '''
    out = {}
    filelist = glob.glob(input_path + '*.txt')

    cat_df = pd.read_json('../category_key.json')
    shallow = cat_df[cat_df.shallow_species == True]['index'].to_list()
    mapper = cat_df[['id', 'index']].to_dict()['id']

    for i, file in enumerate(filelist):
        with open(file, 'r') as f:

            cats = []
            conf = []
            location = []
            weak_shallow = 0
            strong_shallow = 0
            no_detection = 0

            for line in f.readlines():

                category, x, y, w, h, conf_value = line.split(' ')
                category = int(category)
                conf_value = float(conf_value)
                loc = tuple([float(i) for i in [x, y, w, h]])

                if category in shallow:
                    weak_shallow = 1    # weakly shallow if there is a shallow detection at any confidence
                    if conf_value >= conf_threshold:
                        strong_shallow = 1  # strongly shallow if a high conf shallow detection

                if (category not in cats) and (conf_value >= conf_threshold): # dedup and add to list
                    cats.append(category)
                    conf.append(conf_value)
                    location.append(loc)
            
            cats = remap(cats, mapper)

            if len(cats) == 0:
                no_detection = 1

        out[i] = {'id': os.path.basename(file)[:-4],
                  'categories': cats,
                  'location': location,
                  'conf': conf,
                  'weak_shallow': weak_shallow,
                  'strong_shallow': strong_shallow,
                  'no_detection': no_detection,
                  #   'osd': osd,
                  }

    df = pd.DataFrame.from_dict(out, orient='index')
    return df

In [None]:
# pd.read_json('../supercat_key.json')

In [None]:
# pd.read_json('../category_key.json')

In [None]:
cat_df = pd.read_json('../category_key.json')
shallow = cat_df[cat_df.shallow_species == True]['id'].to_list()
shallow

In [None]:
def generate_inference_df_sup(input_path='../runs/detect/predict_superL/labels/', conf_threshold=0.15):
    '''Generates dataframe of information from infrence output files.
    Args:
        input_path: (string) path to prediction labels files
        conf_thrreshold: (float) minimum confidence threshold for valid detection
    Returns:
        df: (pd.DataFrame) dataframe of inference output
    '''
    out = {}
    filelist = glob.glob(input_path + '*.txt')

    cat_df = pd.read_json('../category_key.json')
    scat_df = pd.read_json('../supercat_key.json')
    shallow = cat_df[cat_df.shallow_species == True]['id'].to_list()
    mapper = scat_df[['top_category_id', 'supercat_id']].to_dict()['top_category_id']
    mapper = {str(k): mapper[k] for k in mapper.keys()}

    for i, file in enumerate(filelist):
        with open(file, 'r') as f:

            cats = []
            supercats = []
            conf = []
            location = []
            weak_shallow = 0
            strong_shallow = 0
            no_detection = 0

            for line in f.readlines():

                supercat, x, y, w, h, conf_value = line.split(' ')
                category = remap(supercat, mapper)[0]
                supercat = int(supercat)
                conf_value = float(conf_value)
                loc = tuple([float(i) for i in [x, y, w, h]])

                if category in shallow:
                    weak_shallow = 1    # weakly shallow if there is a shallow detection at any confidence
                    if conf_value >= conf_threshold:
                        strong_shallow = 1  # strongly shallow if a high conf shallow detection

                if (category not in cats) and (conf_value >= conf_threshold): # dedup and add to list
                    cats.append(category)
                    supercats.append(supercat)
                    conf.append(conf_value)
                    location.append(loc)
            

            if len(cats) == 0:
                no_detection = 1

        out[i] = {'id': os.path.basename(file)[:-4],
                  'supercategory': supercats,
                  'categories_s': cats,
                  'location_s': location,
                  'conf_s': conf,
                  'weak_shallow_s': weak_shallow,
                  'strong_shallow_s': strong_shallow,
                  'no_detection_s': no_detection
                  }

    df = pd.DataFrame.from_dict(out, orient='index')
    return df

In [None]:
df_sup = generate_inference_df_sup(conf_threshold=0.25)
df_sup

In [None]:
df_cat = generate_inference_df('../runs/detect/predict40m/labels/', 0.5)
df_cat

In [None]:
df = df_cat.merge(df_sup, on='id')
df

In [None]:
def detect_osd(row):
    shallow = (row.strong_shallow, row.strong_shallow_s, row.weak_shallow, row.weak_shallow_s)
    # no detections from either model - must be osd
    if row.no_detection and row.no_detection_s:
        row['osd'] = 1.0
        row['categories'] = [52] # setting to the most common deep object

    if row.no_detection and not row.no_detection_s:
        row['categories'] = row.categories_s
    # nothing detected by cat, something detected by super
    if shallow == (0,0,0,0):
        row['osd'] = 0.9
    if shallow == (0,0,0,1):
        row['osd'] = 0.7
    if shallow == (0,1,0,1):
        row['osd'] = 0.5
    
    if shallow == (0,0,1,0):
        row['osd'] = 0.4
    if shallow == (0,1,0,1):
        row['osd'] = 0.4
    if shallow == (0,0,1,1):
        row['osd'] = 0.3
    if shallow == (1,0,1,0):
        row['osd'] = 0.2
    if shallow == (0,1,1,1):
        row['osd'] = 0.1
    if shallow == (1,0,1,1):
        row['osd'] = 0.1
    if shallow == (1,1,1,1):
        row['osd'] = 0.0
        

    return row

In [None]:
out_df = df.apply(detect_osd, axis=1)

In [None]:
out_df[out_df['osd'].isnull()]

In [None]:
def select_top(lst):
    return f'[{lst[0]}]'


In [None]:
def format_cat(lst):
    if len(lst) == 1:
        return f'[{lst[0]}]'
    else:
        return ' '.join([str(x) for x in lst])

In [None]:
out = out_df[['id', 'categories', 'osd']].copy()
out

In [None]:
# out.categories = out.categories.apply(select_top)
out.categories = out.categories.apply(format_cat)
out

In [None]:
out.to_csv('../submissions/submission_27.csv', index=False)

In [None]:
out_df.isnull().head(20)

In [None]:
# df.to_json('runs/predict.json')

In [None]:
pd.read_json('../runs/predict133m.json')

In [None]:
pd.read_json('../runs/predict40m.json')