In [None]:
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sns

%matplotlib inline
%config InlineBackend.figure_format ='retina'
sns.set_style('whitegrid')

CSV_LOC = 'results/predictions_dlibResnet_threshold_0.4.csv'
NAME = CSV_LOC[:-4]

In [None]:
def create_source_cols(df):
    """
    Given a dataframe df, create all necessary source columns.
    
    The input dataframe should have a column named source_class from which each row
    has a name in the form of
        'class_sess_pose_illum_expr_pitch_yaw_roll.extension'
    
    The output is a copy of the original dataframe with added columns.

    NOTE: the if/else is required because targets can have a name of '-1_'.
    """
    
    df = df.copy()
    
    for name in ['source_name', 'target_name']:
        n = name.split('_')[0] + '_'

        df[n + 'class'] = df[name].apply(lambda x: int(x.split('_')[0]) if (x != '-1_') else None)
        df[n + 'session'] = df[name].apply(lambda x: float(x.split('_')[1]) if (x != '-1_') else None)

        df[n + 'pose'] = df[name].apply(lambda x: float(x.split('_')[2]) if (x != '-1_') else None)
        df[n + 'illumination'] = df[name].apply(lambda x: float(x.split('_')[3]) if (x != '-1_') else None)
        df[n + 'expression'] = df[name].apply(lambda x: float(x.split('_')[4]) if (x != '-1_') else None)


        df[n + 'pitch'] = df[name].apply(lambda x: float(x.split('_')[5][1:]) if (x != '-1_') else None)
        df[n + 'yaw'] = df[name].apply(lambda x: float(x.split('_')[6][1:]) if (x != '-1_') else None)
        df[n + 'roll'] = df[name].apply(lambda x: float(x.split('_')[7].split('.')[0][1:]) if (x != '-1_') else None)

        # Illumination changed, modify this and uncomment the previous roll
        #df[n + 'roll'] = df[name].apply(lambda x: float(x.split('_')[7][1:]) if (x != '-1_') else None)
        #df[n + 'illum_augmented'] = df[name].apply(lambda x: float(x.split('_')[8][1:] if (x 1= '-1_') else None))
        #df[n + 'intensity_augmented'] = df[name].apply(lambda x: float(x.split('_')[8].split('.')[0][1:] if (x 1= '-1_') else None))
        
    return df

### Results analysis
1. create the dataframe for a CSV results file at ```CSV_LOC```.
2. Append all possible variables (pose, illumination etc), extracted from both source and target name through ```create_source_cols```.
2. Next, generate some interesting results.

In [None]:
df = pd.read_csv(CSV_LOC)

# TESTING
df = df.append({'source_name': '005_66_051_17_0_p0_y0_r0.png',
 'target_name': '005_66_051_17_0_p5.4_y4.3_r7.5.png',
 'distance': .666, 'confidence': None, 'predicted_class': 5,
 'true_class': 5, 'correct': True}, ignore_index=True)
df = df.append({'source_name': '006_67_051_17_0_p0_y0_r0.png',
 'target_name': '009_67_051_17_0_p5.4_y0.0_r0.0.png',
 'distance': .666, 'confidence': None, 'predicted_class': 9,
 'true_class': 6, 'correct': True}, ignore_index=True)
# END TESTING


# Note that the below edits should not be problematic, they should always be true

## If predicted class is -1 for true targets, it should always be false
zoom = df[df['true_class'] <= 100]['predicted_class'] == -1
zoom = zoom.where(zoom == True).dropna()
df.loc[zoom.keys(), 'correct'] = False

## 
zoom = df[df['true_class'] > 100]['predicted_class'] == -1
zoom = zoom.where(zoom == True).dropna()
df.loc[zoom.keys(), 'correct'] = True

new_df = create_source_cols(df)

### Accuracy

In [None]:
print(f"Accuracy of the model: {new_df['correct'].mean()}")

In [None]:
# For false positives etc we need only concern ourselves with wrong classifications
false_df = new_df[new_df['correct'] == False]
false_df.head()

### Histogram of wrongly classified images

Where does the model make mistakes? A simple histogram for false positives and false negatives can perhaps shed some light into this.

In [None]:
false_pos = false_df[(false_df['source_class'].astype(int) >  100)]
false_neg = false_df[(false_df['source_class'].astype(int) <= 100)]

In [None]:
plt.figure(figsize=(18, 8))

sns.distplot(false_pos['source_class'].astype(int),
             norm_hist=False,
             kde=False,
             bins=false_pos['source_class'].nunique(),
             label=f"False Positives, num = {false_pos['source_class'].count()}"
            )
sns.distplot(false_neg['source_class'].astype(int),
             norm_hist=False,
             kde=False,
             bins=false_neg['source_class'].nunique(),
             label=f"False Negatives, num = {false_neg['source_class'].count()}"
            )

plt.xlabel("Source Class", fontsize=20) ; plt.ylabel('Number of Errors', fontsize=20)
plt.legend(fontsize=15)
#plt.savefig(NAME + "_hist_fp_fn.png", dpi=400)

plt.show();

In [None]:
len(new_df) - sum(new_df['true_class'] > 100)

## False Negatives

Let's look more into the false negatives, false positives should be able toe be removed mostly through the use of thresholding, but false negatives can be quite a problem

In [None]:
import numpy as np

plt.figure(figsize = (20, 6))

plt.subplot(131)  # EXPRESSION
uniqs_expres = sorted(false_neg['source_expression'].astype(int).unique())
expres_dict = {uniq.astype(int): sum(false_neg['source_expression'].astype(int) == uniq) for uniq in uniqs_expres}
plt.bar(x = range(len(uniqs_expres)), height = expres_dict.values(), alpha=0.5)
plt.xticks(range(len(uniqs_expres)), uniqs_expres)
plt.xlabel("Expression type", fontsize=20) ; plt.ylabel("Number of Errors", fontsize=20);

plt.subplot(132)  # POSE
uniqs_pose = np.array([80, 130, 140, 51, 50, 41, 190])
pose_dict = {uniq.astype(int): sum(false_neg['source_pose'].astype(int) == uniq) for uniq in uniqs_pose}
plt.bar(x = range(len(uniqs_pose)), height = pose_dict.values(), alpha=0.5)
plt.xticks(range(len(uniqs_pose)), uniqs_pose)
plt.xlabel("Pose angle", fontsize=20) ; plt.ylabel("Number of Errors", fontsize=20);

plt.subplot(133)  # ILLUMINATION
uniqs_illum = np.array([2, 7, 17, 12])
illumination_dict = {uniq.astype(int): sum(false_neg['source_illumination'].astype(int) == uniq) for uniq in uniqs_illum}
plt.bar(x = range(len(uniqs_illum)), height = illumination_dict.values(), alpha=0.5)
plt.xticks(range(len(uniqs_illum)), uniqs_illum)
plt.xlabel("Illumination type", fontsize=20) ; plt.ylabel("Number of Errors", fontsize=20);

#plt.savefig(NAME + "_hist_exp_pose_ill.png", dpi=400)

## Measure the effect of augmented yaw/pitch/roll/illumination/intensity

In [None]:
def filter_yprii(df):
    """In order to understand how augmented increases/decreases the accuracy,
    filter out any target prediction on these. The default values are None/nan under pandas.
    TODO: when new files arrive, uncommend the commented sectionand comment the currently uncommented"""
    
    df = df[df['correct'] == True]
    
    # First filter out any nans
    filter_nans = ~ (pd.isna(df['target_pitch']) | pd.isna(df['target_yaw']) | pd.isna(df['target_roll']))
    
#     filter_nans = ~ (pd.isna(df['target_pitch']) |
#                      pd.isna(df['target_yaw'])   |
#                      pd.isna(df['target_roll'])  | 
#                      pd.isna(df['target_illumination_augmented']) |
#                      pd.isna(df['target_intensity_augmented']))
    
    # Second, only select where at least one attribute has been changed
    filter_0 = ~ (df['target_pitch'] + df['target_yaw'] + df['target_roll'] == 0.0)
#     filter_0 = ~ (df['target_pitch']        + 
#                   df['target_yaw']          + 
#                   df['target_roll']         + 
#                   df['target_illumination_augmented'] +
#                   df['target_intensity_augmented'] == 0.0)
    
    f = filter_nans & filter_0
    print(f"{len(df[f])/len(df[~f]):.6f}% of correctly classified matched to augmented data (n={len(df[f])}).")
    
    return df[f]

filtered_yprii = filter_yprii(new_df)

In [None]:
filtered_yprii[['target_pitch', 'target_yaw', 'target_roll']].head()

In [None]:
filtered_yprii[['target_pitch', 'target_yaw', 'target_roll']].hist(
    figsize=(20, 4),
    sharey=True,
    layout=(1, 3)) ;

# filtered_yprii[['target_pitch', 'target_yaw', 'target_roll',
#                 'target_illumination_augmented',
#                 'target_intensity_augmented']].hist(
#     figsize=(25, 4),
#     sharey=True,
#     layout=(1, 5)) ;