# Imports

In [83]:
import pandas as pd
import copy
import numpy as np

# Define strategies

In [84]:
def calculate_best_confidence_max_value(df,df_specific_id):
    '''
    Calculate the best confidence score based on the strategy max value. 
    @params df: data where 1 row is an image where tracking and classification where made 
    @paramns df_specific_id: data filtered on a specific track id
    Best test: track_id == 1
    '''
    max_confidence_index = df_specific_id['confidence'].idxmax()
    new_label = df.iloc[max_confidence_index,df.columns.get_loc("subclass")]
    return max_confidence_index,new_label

def calculate_best_confidence_most_frequent(df,df_specific_id):
    '''
    Calculate the best confidence score based on the strategy most frequent
    @params df: data where 1 row is an image where tracking and classification where made 
    @paramns df_specific_id: data filtered on a specific track id
    Best test: track_id == 39
    '''
    most_frequent_species = df_specific_id.subclass.mode()
    if most_frequent_species.size != 1:
        pass # TO DO: if two most values -> get the one with best confidence
    else:
        new_label = most_frequent_species[0]
    most_frequent_specie = df_specific_id[df_specific_id['subclass'] == new_label]
    max_confidence_index = most_frequent_specie['confidence'].idxmax()
    return max_confidence_index,new_label

def calculate_best_confidence_avg_max(df,df_specific_id):
    pass

# Main

In [92]:
def count_species(csv):
    '''
    Count unique individual per species
    @params csv: take a csv
    '''
    df = pd.read_csv(csv)
    df_count = df.groupby(['subclass']).size().sort_values(ascending=False)
    df_count.to_csv('count.txt',header=False)
    return df_count

def modify_label_confidence(df,df_specific_id):
    max_confidence_index,new_label = calculate_best_confidence_most_frequent(df,df_specific_id)
    #print(f"Selected index and value: {new_label}: {max_confidence_index}")
    for index, row in df_specific_id.iterrows():
        if (index != max_confidence_index) : # & (row['subclass'] != new_label)
            df.iloc[index, df.columns.get_loc("subclass")] = new_label
            df.iloc[index, df.columns.get_loc("confidence")] = 'smoothed'
        
def smoothing_confidence_with_tracking(df_raw):
    '''
    Use the tracking insect results to smooth the classification
    
    '''
    df = df_raw.copy()
    #max_id_tracked = int(df['track_id'].max())
    max_id_tracked = 39
    for track_id in range(39,max_id_tracked+1):
        df_ids_raw = df[df['track_id'] == track_id]
        if df_ids_raw.shape[0] > 1:
            #print("\n", df_ids_raw[['subclass','confidence']])
            df_specific_id = df_ids_raw.astype({'confidence': int}) # necessary, otherwise: confidence = object instead of int
            #print(df_specific_id[["image","track_id","subclass", "confidence"]])
            #print(f"\t{track_id}")
            #print('size',df_specific_id.subclass.mode())
            modify_label_confidence(df,df_specific_id)
    #df_final = df.mask(df == '') # replace empty by nan
    #print('Df modified')
    #print(df[df['track_id'] == track_id])
    return df

def main():
    df_raw = pd.read_csv("~/code/moth_project/0_database/tracking/track_localize_classify_annotation-2022_05_13.csv")
    df_modified = smoothing_confidence_with_tracking(df_raw)
    return df_modified


In [93]:
df = main()

In [94]:
df.to_csv('test_most_frequent.csv')