In [17]:
import pandas as pd
import numpy as np
import glob

In [18]:
def read_files(root):
    csv_files = glob.glob('*.csv')
    global data
    data = pd.DataFrame()

    i=0
    last_ts = 0
    last_frame = 0
    for f in csv_files:
        df = pd.read_csv(f)
        df, last_ts, last_frame = adjust_indices(df, last_ts, last_frame)
        df.insert(0, "Interval", len(df.index.values)*["Clip " + str(i+1) + ": " + str(f)][:3])
        data = pd.concat([data, df])
        i += 1
        
    data.index = pd.Index(range(len(data.index)))

def adjust_indices(df, ts, frame):
    df.loc[:, "Frame Index"] = df.loc[:, "Frame Index"].values + frame
    df.loc[:, "Timestamp(x)"] = df.loc[:, "Timestamp(x)"].values + ts
    return df, ts+30, frame+7200

read_files(None)
data

In [11]:
len(data.index)

60

In [12]:
FAU_NAMES = data.columns[3:7]
CONF_NAMES = data.columns[10:14]

# have user input viable indices where bounding boxes are accurate
viables = [11, 24, 25]
faus = data.loc[viables, FAU_NAMES]
fau_arr = np.asarray(faus)
confs = data.loc[viables, CONF_NAMES]
conf_arr = np.asarray(confs)

clean_data = faus.join(confs)
clean_data

Unnamed: 0,Timestamp(x),Orbital,Nose,Ears,AU Scored,Confidence(Orbital),Confidence(Nose),Confidence(Ears)
11,11.011,0,-1,0,3,0.999901,0.0,0.998044
24,24.024,0,-1,1,2,0.908244,0.0,0.566989
25,25.025,1,-1,1,3,1.0,0.0,0.999747


In [13]:
fau_arr
conf_arr

array([[3.        , 0.99990129, 0.        , 0.99804413],
       [2.        , 0.90824449, 0.        , 0.56698865],
       [3.        , 0.99999976, 0.        , 0.99974674]])

In [15]:
def get_all_fau_scores(data):
    """
        Get data per FAU without filtering for manually confirmed bounding box accuracy.
        Run the same analytics as with clean data on all timestamps of the data matrix without checking
        whether the bounding boxes were correctly identified at those points.
    """
    
    THRESHOLD = 0.90
    
    faus = {}
    fs_sorted = {}
    times = {}
    
    for i in range(len(FAU_NAMES)):
        fau = FAU_NAMES[i]
        conf = CONF_NAMES[i]
        
        mgs = data[fau].where(data[conf] >= THRESHOLD).dropna()
        mgs_sorted = data.sort_values(conf, ascending=False)[fau]
        sorted_arrs = np.asarray(mgs_sorted)
        print(mgs)
        #print(mgs_sorted, "\n")
        faus[fau] = np.asarray(mgs)
        print("index", mgs.index, "\n")
        times[fau] = mgs.index
        fs_sorted[fau] = sorted_arrs
    
    return faus, times, fs_sorted

def get_clean_fau_analytics():
    """
        Get clean data per FAU after filtering for high confidence scores only.
        
        For each Facial Action Unit, filter for datapoints that correspond to high confidence values.
        
        Returns 3 dictionaries with:
            - Faus: data arrays with all high confidence scores organized by FAU identity
            - Times: label arrays containing timestamp identity for each included confidence score per FAU
            - Scores sorted: dataframes containing all FAU scores sorted by confidence across each individual column
    """
    THRESHOLD = 0.90
    
    faus = {}
    fs_sorted = {}
    times = {}
    
    for i in range(len(FAU_NAMES)):
        fau = FAU_NAMES[i]
        conf = CONF_NAMES[i]
        
        mgs = clean_data[fau].where(clean_data[conf] >= THRESHOLD).dropna()
        mgs_sorted = clean_data.sort_values(conf, ascending=False)[fau]
        sorted_arrs = np.asarray(mgs_sorted)
        faus[fau] = np.asarray(mgs)
        times[fau] = mgs.index
        fs_sorted[fau] = sorted_arrs
    
    return faus, times, fs_sorted

def display_analytics_report(faus, times, fs_sorted):
    # user friendly report:
    
    tstamps = {}
    [tstamps.update({k:np.asarray(data.loc[times[k], "Timestamp(x)"])}) for k in times.keys()]
    
    print("*** COMPLETED FAU ANALYTICS ON CLEAN DATAFRAME ***\n\n")
    
    for fau in faus.keys():
        print("\n\n**", fau, "**\n\n")
        print("For", fau, "Facial Action Unit:\n")
        print("High confidence Mouse Grimace Scale scores for this clip:\n", faus[fau])
        print("\nCorresponding timestamps for high confidence MGS score:\n", tstamps[fau])
        print("\nAll viable Mouse Grimace Scale scores descending order of Confidence Score:\n", fs_sorted[fau])
    

def get_fau_scores(viables, fau, conf):
    """
        Get arrays of Facial Action Unit scores across each row of data that has been determined to be viable through visual inspection.
        IGNORE. not pursuing this anymore.
    """
    THRESHOLD = 0.90
    
    fau_mgs = {}
    for fau in FAU_NAMES:
        fau_mgs[str(fau)] = []
    
    for v in range(len(viables)):
        for fi in range(len(fau)):
            if conf[v][fi] >= THRESHOLD:
                pass
                #print(fau_mgs[list(fau_mgs.keys())[fi]], fau[v][fi])
                #fau_mgs[list(fau_mgs.keys())[fi]].append(fau[v][fi])
    
    print(fau_mgs)
    return NotImplemented



f,t,s = get_clean_fau_analytics()
display_analytics_report(f,t,s)

*** COMPLETED FAU ANALYTICS ON CLEAN DATAFRAME ***




** Timestamp(x) **


For Timestamp(x) Facial Action Unit:

High confidence Mouse Grimace Scale scores for this clip:
 [11.011 24.024 25.025]

Corresponding timestamps for high confidence MGS score:
 [11.011 24.024 25.025]

All viable Mouse Grimace Scale scores descending order of Confidence Score:
 [11.011 25.025 24.024]


** Orbital **


For Orbital Facial Action Unit:

High confidence Mouse Grimace Scale scores for this clip:
 [0 0 1]

Corresponding timestamps for high confidence MGS score:
 [11.011 24.024 25.025]

All viable Mouse Grimace Scale scores descending order of Confidence Score:
 [1 0 0]


** Nose **


For Nose Facial Action Unit:

High confidence Mouse Grimace Scale scores for this clip:
 []

Corresponding timestamps for high confidence MGS score:
 []

All viable Mouse Grimace Scale scores descending order of Confidence Score:
 [-1 -1 -1]


** Ears **


For Ears Facial Action Unit:

High confidence Mouse Grimace Scale

In [16]:
def get_clean_rows(temp):
    """
        Get clean data per row after filtering for high confidence rows only.
        
        Filter for rows that correspond to high average confidence value across Facial Action Units.
        Returns a dictionary with viable FAU scores at timepoints of accurate bounding box classification and high confidence scores across facial action units.
    """
    THRESHOLD = 0.90
    
    # insert new column with average confidence value
    if "Avg Confidence" in temp.columns.values:
        temp = temp.drop(columns=['Avg Confidence'])
    temp.insert(len(temp.columns), "Avg Confidence", np.mean(np.asarray(temp.loc[:, CONF_NAMES]), axis=1))
    filtered = temp.sort_values(by='Avg Confidence', ascending=False)
    fau = filtered.loc[:, FAU_NAMES]
    conf = filtered.loc[:, CONF_NAMES]
    avg = filtered.loc[:, 'Avg Confidence']
    filtered = fau.join(avg)
    # join confidence columns as well
    #filtered = fau.join(avg).join(conf)
    
    return filtered
    
get_clean_rows(data)

Unnamed: 0,Timestamp(x),Orbital,Nose,Ears,Avg Confidence
28,28.028,0,0,2,1.74941
52,52.022,1,0,0,1.745738
38,38.008,1,2,2,1.745405
53,53.023,1,0,0,1.720748
49,49.019,1,0,0,1.715644
56,56.026,1,0,0,1.639012
30,30.0,0,2,0,1.499948
46,46.016,0,0,2,1.499862
54,54.024,1,0,1,1.483923
50,50.02,1,2,1,1.343722
