# import modules

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler

import plotly.graph_objects as go
import matplotlib.pyplot as plt

import dash
from dash import dcc, html, Input, Output

# load data and sample

In [3]:
df_tsne_metrics = pd.read_csv("../data/processed/tsne_metrics.csv", index_col=[0])

# rename for better labels in app
df_tsne_metrics.rename(columns={'0': 'Projection 1'}, inplace=True)
df_tsne_metrics.rename(columns={'1': 'Projection 2'}, inplace=True)
df_tsne_metrics.rename(columns={'action': 'Action'}, inplace=True)

df_tsne_metrics

Unnamed: 0,Projection 1,Projection 2,idx,Action,mean_mpjpe_STSGCN,mean_mpjpe_motionmixer,CBLOF_full,CBLOF_Ti_10,CBLOF_To_25,IFOREST_full,IFOREST_Ti_10,IFOREST_To_25,HDBSCAN_full,HDBSCAN_Ti_10,HDBSCAN_To_25,LOF_full,LOF_Ti_10,LOF_To_25,min_mean_mpjpe
0,-1.491875,-108.968216,0,walking,93.675567,102.448229,6.826516,4.801257,6.284274,-0.071506,-0.103554,-0.061220,0.225378,0.129219,0.096772,1.342149,1.133492,1.298507,93.675567
1,-1.542240,-108.997650,1,walking,85.310110,94.610764,6.778658,4.784079,6.272123,-0.070384,-0.103796,-0.057540,0.217411,0.125704,0.097167,1.328314,1.136863,1.291393,85.310110
2,-1.600920,-109.033640,2,walking,83.354780,99.023203,6.705774,4.822663,6.241388,-0.074504,-0.103825,-0.056933,0.205538,0.134777,0.093353,1.309284,1.149762,1.280669,83.354780
3,-1.678499,-109.072540,3,walking,87.318396,104.901132,6.608236,4.907010,6.178011,-0.071427,-0.100295,-0.057331,0.189529,0.150669,0.082388,1.284328,1.167995,1.262712,87.318396
4,-1.787714,-109.122210,4,walking,89.802102,107.749316,6.323247,4.981930,6.099157,-0.072563,-0.097557,-0.062177,0.173460,0.165176,0.070758,1.262906,1.181005,1.244329,89.802102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180072,-67.498130,-76.802600,9270,walkingtogether,37.963082,39.220811,4.280818,5.447292,4.396982,-0.077916,-0.087516,-0.092054,0.346219,0.025580,0.023360,1.041184,1.073231,1.065025,37.963082
180073,-67.102540,-76.509630,9271,walkingtogether,37.647556,34.372172,4.432358,5.610270,4.265305,-0.079021,-0.081029,-0.095020,0.345278,0.038833,0.004577,1.039324,1.074453,1.061717,34.372172
180074,-66.749420,-76.127110,9272,walkingtogether,41.737064,32.446786,4.631757,5.733671,4.174752,-0.079837,-0.083097,-0.099845,0.341166,0.044855,0.083452,1.038706,1.072405,1.059400,32.446786
180075,-66.459190,-75.689150,9273,walkingtogether,39.341552,31.302170,4.869488,5.748328,4.188549,-0.082146,-0.084715,-0.097257,0.336184,0.036809,0.058962,1.038026,1.070272,1.054587,31.302170


In [4]:
# function to sample randomly from actions in list actions
def sample_by_action(df, n = 1000, actions=['walking', 'eating'], random_state = 42):
    df_sample = pd.DataFrame()
    for action in actions:
        df_sample= pd.concat([df_sample, df[df.Action == action].sample(n=n, random_state=random_state)])
    return df_sample

In [5]:
df_tsne_metrics["Action"].unique()

array(['walking', 'eating', 'smoking', 'discussion', 'directions',
       'greeting', 'phoning', 'posing', 'purchases', 'sitting',
       'sittingdown', 'takingphoto', 'waiting', 'walkingdog',
       'walkingtogether'], dtype=object)

In [6]:
# for nicer plots in app only consider 7 actions and 100 sequences per action
actions = ['walking', 'eating', 'discussion', 'directions', 'greeting', 'sitting', 'waiting']
df_sample = sample_by_action(df_tsne_metrics, n=100, actions=actions, random_state = 42)
df_sample

Unnamed: 0,Projection 1,Projection 2,idx,Action,mean_mpjpe_STSGCN,mean_mpjpe_motionmixer,CBLOF_full,CBLOF_Ti_10,CBLOF_To_25,IFOREST_full,IFOREST_Ti_10,IFOREST_To_25,HDBSCAN_full,HDBSCAN_Ti_10,HDBSCAN_To_25,LOF_full,LOF_Ti_10,LOF_To_25,min_mean_mpjpe
3116,-93.167620,74.029290,3116,walking,37.308055,35.490965,4.849226,6.953999,5.722817,-0.073104,-0.055284,-0.065945,0.010343,0.013739,0.014408,1.041582,1.172646,1.100537,35.490965
14490,-71.934350,-61.324726,14490,walking,44.560363,51.674447,5.315620,6.029859,5.764158,-0.079457,-0.051268,-0.074286,0.124446,0.076333,0.027187,1.123687,1.103043,1.085511,44.560363
14416,-73.751980,-63.028355,14416,walking,45.434795,52.276969,5.197551,5.734965,5.694318,-0.093236,-0.085196,-0.074192,0.038283,0.042022,0.005718,1.069552,1.113829,1.085680,45.434795
14711,-51.916620,-69.005280,14711,walking,32.930991,43.683820,5.149294,5.658388,5.455088,-0.058082,-0.078576,-0.083818,0.069968,0.059675,0.048870,1.077815,1.054475,1.049560,32.930991
3307,-0.490633,-89.492744,3307,walking,49.074074,40.456684,7.142880,5.659097,7.190692,-0.035939,-0.075651,-0.046680,0.105932,0.018158,0.075175,1.065702,1.105696,1.101048,40.456684
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154050,-92.135130,17.557468,4877,waiting,55.115422,58.037701,7.023521,5.828053,7.248826,-0.057852,-0.083781,-0.056053,0.215572,0.045977,0.205104,1.231407,1.130632,1.224141,55.115422
150450,-55.126200,92.703740,1277,waiting,91.728657,77.316586,8.945518,7.080402,8.368841,-0.030043,-0.054267,-0.023852,0.191002,0.122332,0.150030,1.337784,1.121142,1.344779,77.316586
158782,-31.170362,-131.907330,9609,waiting,28.487513,31.580377,5.837021,5.955866,5.843719,-0.059545,-0.079874,-0.065405,0.338864,0.012932,0.077889,1.047087,1.135429,1.056023,28.487513
159337,0.562799,-59.649130,10164,waiting,64.839415,74.598054,5.406252,5.522779,5.358224,-0.075548,-0.104543,-0.085086,0.079583,0.004491,0.050626,1.205506,1.251884,1.184010,64.839415


In [7]:
# process data such that column for number of frames can be added (not added for this app)

suffixes = ['_full', '_Ti_10', '_To_25']

dfs_reshaped = []

for suffix in suffixes:
    cols_current = [col for col in df_sample.columns if col.endswith(suffix)]
    other_cols = [col for col in df_sample.columns if not any(col.endswith(s) for s in suffixes)]

    df_current = df_sample[other_cols + cols_current]
    df_current["n_frames"] = suffix[1:]
    df_current.columns = df_current.columns.str.replace(suffix, '') 
    dfs_reshaped.append(df_current)

df_sample_reshaped = pd.concat(dfs_reshaped)
df_sample_reshaped 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_current["n_frames"] = suffix[1:]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_current["n_frames"] = suffix[1:]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_current["n_frames"] = suffix[1:]


Unnamed: 0,Projection 1,Projection 2,idx,Action,mean_mpjpe_STSGCN,mean_mpjpe_motionmixer,min_mean_mpjpe,CBLOF,IFOREST,HDBSCAN,LOF,n_frames
3116,-93.167620,74.029290,3116,walking,37.308055,35.490965,35.490965,4.849226,-0.073104,0.010343,1.041582,full
14490,-71.934350,-61.324726,14490,walking,44.560363,51.674447,44.560363,5.315620,-0.079457,0.124446,1.123687,full
14416,-73.751980,-63.028355,14416,walking,45.434795,52.276969,45.434795,5.197551,-0.093236,0.038283,1.069552,full
14711,-51.916620,-69.005280,14711,walking,32.930991,43.683820,32.930991,5.149294,-0.058082,0.069968,1.077815,full
3307,-0.490633,-89.492744,3307,walking,49.074074,40.456684,40.456684,7.142880,-0.035939,0.105932,1.065702,full
...,...,...,...,...,...,...,...,...,...,...,...,...
154050,-92.135130,17.557468,4877,waiting,55.115422,58.037701,55.115422,7.248826,-0.056053,0.205104,1.224141,To_25
150450,-55.126200,92.703740,1277,waiting,91.728657,77.316586,77.316586,8.368841,-0.023852,0.150030,1.344779,To_25
158782,-31.170362,-131.907330,9609,waiting,28.487513,31.580377,28.487513,5.843719,-0.065405,0.077889,1.056023,To_25
159337,0.562799,-59.649130,10164,waiting,64.839415,74.598054,64.839415,5.358224,-0.085086,0.050626,1.184010,To_25


In [8]:
# only focus on full number of frames for this app
df = df_sample_reshaped[df_sample_reshaped["n_frames"]=="full"]

In [9]:
# normalize such that all decision scores are in range 0-1 

columns_to_normalize = ['CBLOF', 'IFOREST', 'HDBSCAN', 'LOF']

scaler = MinMaxScaler()

df[columns_to_normalize] = scaler.fit_transform(df[columns_to_normalize])
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[columns_to_normalize] = scaler.fit_transform(df[columns_to_normalize])


Unnamed: 0,Projection 1,Projection 2,idx,Action,mean_mpjpe_STSGCN,mean_mpjpe_motionmixer,min_mean_mpjpe,CBLOF,IFOREST,HDBSCAN,LOF,n_frames
3116,-93.167620,74.029290,3116,walking,37.308055,35.490965,35.490965,0.145672,0.213858,0.018724,0.048291,full
14490,-71.934350,-61.324726,14490,walking,44.560363,51.674447,44.560363,0.181505,0.180998,0.225272,0.108254,full
14416,-73.751980,-63.028355,14416,walking,45.434795,52.276969,45.434795,0.172433,0.109721,0.069300,0.068718,full
14711,-51.916620,-69.005280,14711,walking,32.930991,43.683820,32.930991,0.168726,0.291560,0.126656,0.074753,full
3307,-0.490633,-89.492744,3307,walking,49.074074,40.456684,40.456684,0.321893,0.406099,0.191758,0.065906,full
...,...,...,...,...,...,...,...,...,...,...,...,...
154050,-92.135130,17.557468,4877,waiting,55.115422,58.037701,55.115422,0.312723,0.292753,0.390229,0.186924,full
150450,-55.126200,92.703740,1277,waiting,91.728657,77.316586,77.316586,0.460390,0.436600,0.345752,0.264612,full
158782,-31.170362,-131.907330,9609,waiting,28.487513,31.580377,28.487513,0.221564,0.283994,0.613413,0.052312,full
159337,0.562799,-59.649130,10164,waiting,64.839415,74.598054,64.839415,0.188468,0.201214,0.144061,0.168008,full


# Application 1: Defining and Detecting Outliers by Method and Action

In [10]:
# initialize app
app = dash.Dash(__name__)

# define layout of the app
app.layout = html.Div([
    # menu left
    html.Div([
        # heading
        html.H4("Select Your Analysis"),

        # dropdown for x-axis decision score threshold
        html.Div([
            html.Label("Set Decision Score Threshold"),
            dcc.Slider(
                id='x-threshold',
                min=0.7,
                max=0.99,
                step=0.01,
                value=0.90,
                marks={i / 100: f"{i / 100:.2f}" for i in range(70, 100, 5)}
            )
        ], style={'margin-bottom': '20px'}),

        # dropdown to select the decision score column
        html.Div([
            html.Label("Select Outlier Detection Model:"),
            dcc.Dropdown(
                id='x-decision-score-column',
                options=[
                    {'label': 'CBLOF', 'value': 'CBLOF'},
                    {'label': 'HDBSCAN', 'value': 'HDBSCAN'},
                    {'label': 'IFOREST', 'value': 'IFOREST'},
                    {'label': 'LOF', 'value': 'LOF'}
                ],
                value='CBLOF', # default value
            )
        ], style={'margin-bottom': '20px'}),

        # dropdown to select color by
        html.Div([
            html.Label("Color by:"),
            dcc.Dropdown(
                id='color-by',
                options=[
                    {'label': 'Decision Score Threshold', 'value': 'Decision Score'},
                    {'label': 'Action', 'value': 'Action'}
                ],
                value='Decision Score', # default value
            )
        ]),

    ], style={'width': '400px', 'float': 'left', 'background-color': 'white', 'font-family': 'Arial', 'margin-top': '80px', 'margin-left':'20px', 'margin-right': '20px', 'padding': '10px'}),

    html.Div([
        # scatter plot
        dcc.Graph(id='scatter-plot', style={'width': '800px', 'height': '600px'})
    ], style={'float': 'left'}),
], style={'font-family': 'Arial'})


# callback to update the scatter plot based on threshold values and selected dropdown values
@app.callback(
    Output('scatter-plot', 'figure'),
    [Input('x-threshold', 'value'),
     Input('x-decision-score-column', 'value'),
     Input('color-by', 'value')]
)

def update_scatter_plot(x_threshold, x_decision_score_column, color_by):
    
    # filter the df based on the selected decision score column
    filtered_df = df.copy()

    x_threshold_value = filtered_df[x_decision_score_column].quantile(x_threshold)

    # color points based on outliers
    filtered_df['Outlier'] = filtered_df[x_decision_score_column] > x_threshold_value

    # create scatter plot figure
    fig = go.Figure()

    # color by decision score
    if color_by == 'Decision Score':
        # Inliers (Circle marker)
        fig.add_trace(go.Scatter(
            x=filtered_df[~filtered_df['Outlier']]['Projection 1'],
            y=filtered_df[~filtered_df['Outlier']]['Projection 2'],
            mode='markers',
            marker=dict(symbol='circle', color='#535354', opacity=0.3),
            name='Inliers',
            hoverinfo='text',
            text=filtered_df[~filtered_df['Outlier']].apply(lambda row: f"idx: {row['idx']}<br>" +
                                                                         f"Action: {row['Action']}<br>" +
                                                                         f"{x_decision_score_column}: {row[x_decision_score_column]:.2f}<br>" +
                                                                         f"Projection 1: {row['Projection 1']:.2f}<br>" +
                                                                         f"Projection 2: {row['Projection 2']:.2f}",
                                                             axis=1)
        ))

        # Outliers (Triangle marker)
        fig.add_trace(go.Scatter(
            x=filtered_df[filtered_df['Outlier']]['Projection 1'],
            y=filtered_df[filtered_df['Outlier']]['Projection 2'],
            mode='markers',
            marker=dict(symbol='triangle-up', color='#7BBA4A', opacity=0.9, size=10, line=dict(color='black', width=1)),
            name='Outliers',
            hoverinfo='text',
            text=filtered_df[filtered_df['Outlier']].apply(lambda row: f"idx: {row['idx']}<br>" +
                                                                     f"Action: {row['Action']}<br>" +
                                                                     f"{x_decision_score_column}: {row[x_decision_score_column]:.2f}<br>" +
                                                                     f"Projection 1: {row['Projection 1']:.2f}<br>" +
                                                                     f"Projection 2: {row['Projection 2']:.2f}",
                                                         axis=1)
        ))

    # color by action
    elif color_by == 'Action':
        actions = filtered_df['Action'].unique()

        # 15 colors, one for each action
        colors = plt.cm.tab20.colors[:15]
        colors = [f'#{r:02x}{g:02x}{b:02x}' for (r, g, b) in (255 * np.array(colors)).astype(int)]

        for i, action in enumerate(actions):
            # Inliers (Circle marker)
            fig.add_trace(go.Scatter(
                x=filtered_df[(filtered_df['Action'] == action) & ~filtered_df['Outlier']]['Projection 1'],
                y=filtered_df[(filtered_df['Action'] == action) & ~filtered_df['Outlier']]['Projection 2'],
                mode='markers',
                marker=dict(symbol='circle', color=colors[i], opacity=0.3),
                name=f'Inliers ({action})',
                hoverinfo='text',
                text=filtered_df[(filtered_df['Action'] == action) & ~filtered_df['Outlier']].apply(lambda row: f"idx: {row['idx']}<br>" +
                                                                                                                f"Action: {row['Action']}<br>" +
                                                                                                                f"{x_decision_score_column}: {row[x_decision_score_column]:.2f}<br>" +
                                                                                                                f"Projection 1: {row['Projection 1']:.2f}<br>" +
                                                                                                                f"Projection 2: {row['Projection 2']:.2f}",
                                                                                                    axis=1)
            ))

            # Outliers (Triangle marker)
            fig.add_trace(go.Scatter(
                x=filtered_df[(filtered_df['Action'] == action) & filtered_df['Outlier']]['Projection 1'],
                y=filtered_df[(filtered_df['Action'] == action) & filtered_df['Outlier']]['Projection 2'],
                mode='markers',
                marker=dict(symbol='triangle-up', size=8, color=colors[i], opacity=0.9, line=dict(color='black', width=1)),
                name=f'Outliers ({action})',
                hoverinfo='text',
                text=filtered_df[(filtered_df['Action'] == action) & filtered_df['Outlier']].apply(lambda row: f"idx: {row['idx']}<br>" +
                                                                                                                f"Action: {row['Action']}<br>" +
                                                                                                                f"{x_decision_score_column}: {row[x_decision_score_column]:.2f}<br>" +
                                                                                                                f"Projection 1: {row['Projection 1']:.2f}<br>" +
                                                                                                                f"Projection 2: {row['Projection 2']:.2f}",
                                                                                                    axis=1)
            ))

    fig.update_layout(
        title={'text':'<b>Outlier Detection App</b>',
               'font': {'size': 20, 'color':'black', 'family': 'Arial'}},
        xaxis={'title': 'Projection 1'},
        yaxis={'title': 'Projection 2'},
        legend={'x': 1.01, 'y': 1},
        plot_bgcolor='white'
    )

    fig.update_xaxes(
        mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey'
    )

    fig.update_yaxes(
        mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey'
    )

    return fig

# run the app
if __name__ == '__main__':
    app.run_server(debug=True, port=8050)

In [None]:
http://127.0.0.1:8050