In [263]:
import json, os, re, math, random
import pandas as pd

def time2frame(time, fps=30, frameFreq=4):
    h, m, s = time.split(":")
    frame = ( int(h)*3600 + int(m)*60 + float(s)*1 )*fps
    frame = str(int( frame//frameFreq*frameFreq + (frame%frameFreq>=frameFreq/2)*frameFreq ))
    return "0"*(5-len(frame)) + frame

def frame2time(frame, fps=30):
    frame = int(frame)/fps
    h, frame = str(int(frame//3600)), frame%3600
    m, s = str(int(frame//60)), round(frame%60,2)
    return f"{ '0'*(2-len(h)) + h }:{ '0'*(2-len(m))+m }:{ '0'*(float(s)<10) + str(s) }"

print("00:01:27.50", time2frameStr("00:01:27.50"), frameStr2time("02624"))

entropy = lambda L: round(sum( -x*math.log(x) if x>0 else 0 for x in L ),4)
def thresholdConversion(minThreshold, numClass):
    return entropy( [minThreshold]+[(1-minThreshold)/(numClass-1)]*(numClass-1) )

print( entropy([0.7,0.1,0.1,0.1]), thresholdConversion(0.7,4) )

class UnlabeledResult: # Active learning
    def __init__(self, testCsvPath, resultFolder, minThreshold=0.7): # pd_cls=class-1=others if max(confidence)<minThreshold (i.e. entropy<self.entropyThreshold)
        # load: test.csv, result.json
        self.df = pd.read_csv(testCsvPath, header=None, delimiter=' ')[[0]].rename(columns={0:"imgPath"})
        resultL = json.load(open(f"{resultFolder}/result.json",'r')) # shape=(data,classes)
        self.entropyThreshold = thresholdConversion(minThreshold, len(resultL[0]))
        
        # add columns: confidence of each class, entropy, pd_cls
        for j in range(len(resultL[0])):
            self.df[f"cf_{j}"] = [ row[j] for row in resultL ]
        self.df["entropy"] = [ entropy(row) for row in resultL ]
        self.df["pd_cls" ] = [ row.index(max(row)) if entropy<self.entropyThreshold else len(resultL[0])-1 for row,entropy in zip(resultL,self.df['entropy']) ]
        print( self.df['pd_cls'].value_counts() )
        
        # save: result
        self.df.to_csv(f"{resultFolder}/result.csv", index=False) # ['imgPath', 'cf_0', 'cf_1', 'cf_2', 'cf_3', 'entropy', 'pd_cls']
        self.resultFolder = resultFolder
    
    def activeEntropy(self, maxN=30):
        df_entropy         = self.df[ self.df['entropy']>self.entropyThreshold ][['imgPath','entropy']].sort_values(['entropy'], ascending=False).iloc[:maxN]
        df_entropy['time'] = df_entropy['imgPath'].apply(lambda s: frame2time(s[-9:-4]) )
        df_entropy['gt']   = [None]*len(df_entropy)
        df_entropy.sort_values(['imgPath']).to_csv( f"{self.resultFolder}/active_entropy.csv", index=False ) # ['imgPath','entropy','time','gt']
        
    def activeSeries(self, maxN=30):
        classN         = len(self.df.columns)-3
        df_series      = self.df[ self.df['pd_cls']!=classN-1 ][['imgPath','pd_cls']].reset_index(drop=True)#; self.df_series=df_series
        prev_cls, idxL = df_series['pd_cls'][0], []
        for i,pd_cls in enumerate(df_series['pd_cls']):
            if pd_cls not in [prev_cls, (prev_cls+1)%(classN-1)]:
                idxL.append(i)
            prev_cls = pd_cls
        print( f"len(df_series)={len(df_series)}, len(idxL)={len(idxL)}, regular_accuracy={round(1-len(idxL)/len(df_series),3)}" )
        df_series         = df_series.loc[ random.Random(7).sample(idxL,maxN) ]
        df_series['time'] = df_series['imgPath'].apply( lambda s: frame2time(s[-9:-4]) )
        df_series['gt']   = [None]*len(df_series)
        df_series.sort_values(['imgPath']).to_csv( f"{self.resultFolder}/active_series.csv", index=False ) # ['imgPath','pd_cls','time','gt']

00:01:27.50 02624 00:01:27.47
0.9404 0.9404


In [264]:
obj = UnlabeledResult(testCsvPath="../_data/csvUnlabeled/20220826_all/test.csv", resultFolder="../_exps/unlabeled_0826_all")
obj.activeEntropy()
obj.activeSeries()

3    57239
2     2879
1     2853
0     1209
Name: pd_cls, dtype: int64
len(df_series)=6941, len(idxL)=852, regular_accuracy=0.877


In [260]:
#s = "/home/jovyan/data-vol-1/VideoMAE/_data/imgs/20220826/video_20220826222233_02712.jpg"
#obj.df[ obj.df['imgPath']==s ].index
#obj.df.loc[ [31156+i for i in range(-2,2)] ]

# import matplotlib.pyplot as plt
# plt.figure(figsize=(24,4))
# plt.plot( obj.df_series['pd_cls'][:300] )
# plt.show()