# Mining predictions for deep learning model behavior understanding and debugging

In [64]:
from pathlib import Path
import os

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [128]:
file_name = "representation.csv"
#file_name = "element_attribute.csv"
#file_name = "attributes.csv"
#file_name = "attribute_pairs.csv"
#file_name = "element_pairs.csv"
#file_name = "element_attribute_pairs.csv"

structured_representation = pd.read_csv(Path("../../../../msc_work/msc_thesis/panagiotis_soilis_19_20/representations_pedestrian/" + file_name))

In [129]:
print(structured_representation)

     image_name true_label predicted_label  shirt  hair  pavement  ear  road  \
0    098441.jpg       male            male      1     0         0    0     0   
1    098445.jpg       male            male      1     1         1    0     0   
2    094524.jpg     female          female      1     1         0    1     1   
3    098447.jpg       male            male      0     1         0    1     0   
4    094525.jpg     female          female      0     1         0    0     0   
..          ...        ...             ...    ...   ...       ...  ...   ...   
495  098816.jpg       male            male      0     0         0    1     1   
496  098818.jpg       male            male      0     0         1    0     0   
497  098819.jpg       male            male      0     1         0    1     0   
498  098823.jpg       male            male      0     1         0    0     0   
499  098825.jpg       male            male      0     1         0    1     0   

     face  background  ...  trees  none

In [130]:
def structuredDataToMiningInput(dataset, ground_truth = False):
    # Get the columns to transform from binary to 0 and values of the columns.
    list_col_to_transform = list(dataset.columns)
    if "classification_check" in list_col_to_transform:
        list_col_to_transform.remove("classification_check")
    list_col_to_transform.remove("image_name")
    list_col_to_transform.remove("true_label")
    list_col_to_transform.remove("predicted_label")
    # Transform the columns.
    dataset.loc[:, list_col_to_transform] = dataset.loc[:, list_col_to_transform].replace(1, pd.Series(dataset.columns, dataset.columns))
    if ground_truth:
        col_to_keep = list_col_to_transform + ["true_label"]
        list_not_in_antecedents = frozenset(["true_label"])
    else:
        col_to_keep = list_col_to_transform + ["predicted_label"]
        list_not_in_antecedents = frozenset(["predicted_label"])
    dataset = dataset[col_to_keep]
    list_dataset = dataset.values.tolist()
    # Filter out the zeros.
    clean_list_dataset = []
    for row in list_dataset:
        clean_list_dataset.append(list(filter(lambda a: a != 0, row)))
        
    list_not_in_consequents = frozenset(list_col_to_transform)
    return clean_list_dataset, list_not_in_consequents, list_not_in_antecedents #list(filter(lambda a: a != 0, list_dataset))

In [131]:
ground_truth = False
dataset, list_not_in_consequents, list_not_in_antecedents = structuredDataToMiningInput(structured_representation.copy(), ground_truth)

In [132]:
print(dataset)

[['shirt', 'male'], ['shirt', 'hair', 'pavement', 'male'], ['shirt', 'hair', 'ear', 'road', 'face', 'female'], ['hair', 'ear', 'face', 'male'], ['hair', 'female'], ['hair', 'face', 'background', 'male'], ['hair', 'face', 'male'], ['hair', 'face', 'background', 'neck', 'male'], ['shirt', 'hair', 'neck', 'male'], ['shirt', 'hair', 'pavement', 'female'], ['hair', 'ear', 'road', 'male'], ['hair', 'ear', 'female'], ['hair', 'road', 'face', 'male'], ['hair', 'ear', 'road', 'face', 'neck', 'glasses', 'male'], ['hair', 'pavement', 'face', 'collar', 'male'], ['hair', 'ear', 'male'], ['hair', 'face', 'background', 'female'], ['shirt', 'hair', 'pavement', 'face', 'glasses', 'male'], ['hair', 'ear', 'female'], ['hair', 'pavement', 'face', 'female'], ['shirt', 'female'], ['hair', 'road', 'male'], ['hair', 'ear', 'female'], ['background', 'hand', 'jacket', 'male'], ['hair', 'face', 'female'], ['hair', 'male'], ['hair', 'pavement', 'female'], ['hair', 'wall', 'female'], ['hair', 'background', 'male']

In [133]:
def getRules(dataset, min_support_score=0.6, min_lift_score=1.2, min_confidence_score=0.75):
    # Get the frequent item set.
    te = TransactionEncoder()
    te_ary = te.fit(dataset).transform(dataset)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    frequent_itemsets = apriori(df, min_support=min_support_score, use_colnames=True)

    # Post filter the rules, for instance to use two metrics
    #association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
    rules = association_rules(frequent_itemsets, metric="lift", min_threshold=min_lift_score)
    rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
    rules = rules[ (rules['antecedent_len'] >= 0) &
           (rules['confidence'] > min_confidence_score) &
           (rules['lift'] > min_lift_score) ]
    return rules, frequent_itemsets

def getPredictionRules(rules, list_not_in_consequents, list_not_in_antecedents):
    # Certain elements can not be in consequents:
    # The dataframes use frozensets!!!
    idx = rules['consequents'].apply(lambda f: False if len(f.intersection(list_not_in_consequents))>0 else True)
    filtered_rules = rules.loc[idx, :]
    # Certain elements can not be in antecedents:
    return filtered_rules.loc[filtered_rules['antecedents'].apply(lambda f: False if len(f.intersection(list_not_in_antecedents))>0 else True)]

In [136]:
rules,frequent_itemsets = getRules(dataset, 0.1, 0.2, 0.1)
filtered_rules = getPredictionRules(rules, list_not_in_consequents, list_not_in_antecedents)

In [137]:
display(filtered_rules)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
2,(background),(female),0.302,0.514,0.138,0.456954,0.889015,-0.017228,0.894951,1
7,(background),(male),0.302,0.486,0.164,0.543046,1.117379,0.017228,1.124841,1
11,(ear),(male),0.232,0.486,0.142,0.612069,1.259401,0.029248,1.324978,1
12,(face),(female),0.324,0.514,0.148,0.45679,0.888697,-0.018536,0.894682,1
17,(face),(male),0.324,0.486,0.176,0.54321,1.117716,0.018536,1.125243,1
18,(hair),(female),0.812,0.514,0.428,0.527094,1.025474,0.010632,1.027687,1
20,(pavement),(female),0.148,0.514,0.11,0.743243,1.445999,0.033928,1.892842,1
23,(hair),(male),0.812,0.486,0.384,0.472906,0.973058,-0.010632,0.975159,1
33,(neck),(male),0.202,0.486,0.154,0.762376,1.568675,0.055828,2.163083,1
35,(shirt),(male),0.216,0.486,0.126,0.583333,1.200274,0.021024,1.2336,1
