In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import pymongo
from pprint import pprint

In [2]:
client = pymongo.MongoClient()
db = client.metrics

In [3]:
def group_by(df, bycols, agg_map):
    """

    @param df:      DataFrame
    @param bycols:  str or list
                        Column(s) to group by
    @param agg_map: dictionary or list of 2-tuples
                        Mapping from column to aggregate function e.g. [("city", "count"), ("salary", "mean"]
    @return:        DataFrame
                        Flattened dataframe, with multi-level index removed
    """
    grps = []
    if type(bycols) == str:
        bycols = [bycols]

    if type(agg_map) == dict:
        agg_map = agg_map.items()

    for k,v in agg_map:
        grp = df[bycols + [k]].groupby(bycols, ).agg(v)
        grp.reset_index(inplace=True)
        grp["%s(%s)" % (v,k)] = grp[k]
        del grp[k]
        grps.append(grp)

    m = grps[0]
    for grp in grps[1:]:
        m = pd.merge(m, grp, on=bycols, how="inner")
    return m

In [4]:
from bson.son import SON # needed to ensure dictionary is ordered (python default is not)
import hashlib

def hash_feats(fts):
    vals = fts.values
    joined = "|".join(map(lambda s: str(s),vals)).encode('utf-8') 
    return hashlib.sha224(joined).hexdigest()

def get_df_sorted_by_f1score(collection, params=None, filter_cols=True):
    if not params:
        params = []
    if type(params) == str:
        params = params.split(",")
    
    project = {
            "weighted_f1_score":"$WEIGHTED_MEAN_CONCEPT_CODES.f1_score",
            "micro_f1_score":  "$MICRO_F1.f1_score",
            "micro_recall":    "$MICRO_F1.recall",
            "micro_precision": "$MICRO_F1.precision",
    
    # PARAMETERS            
            "window_size":    "$parameters.window_size",
            "feats":          "$parameters.extractors",
            "count": {        "$size" : "$parameters.extractors" },
            "asof" :          "$asof",
            "_id":1
    }
    
    # No count for HMM
    if "_hmm" in collection.lower():
        del project["count"]
    
    for param in params:
        project[param] = "$parameters." + param

    feats_pipeline = [{
        "$project": project
    },
    {
        "$match":{
            "micro_f1_score": { "$exists" : True }        
        }
    },
    {
        "$sort":{
            "micro_f1_score": -1
        }
    },
    ]
    
    rows = [row for row in db[collection].aggregate(feats_pipeline)]
    df = pd.DataFrame(rows).sort_values("micro_f1_score", ascending=False)
    if params:
        df["hs_params"] = df[params].apply(hash_feats, axis=1)
        
    if filter_cols:
        cols = ["micro_f1_score", "micro_recall" ,"micro_precision" ] + params
        return df[cols]
    return df

In [5]:
from Metrics import rpf1a_from_tp_fp_tn_fn
from collections import defaultdict

def tally_counts(r, filter):
    tally = defaultdict(int)
    for k,v in r.items():
        if filter(k):
            for prop in "tp,tn,fp,fn".split(","):
                tally[prop] += v[prop]
    return tally

def get_causal_relation_metrics(collection, params, include_concept_codes=True):
    dicts = []
    for r in db[collection].find({}):
        d = {}
        cr_counts = tally_counts(r, lambda c: "->" in c)
        (rec, p, cr_f1, a) = rpf1a_from_tp_fp_tn_fn(cr_counts["tp"],cr_counts["fp"],cr_counts["tn"],cr_counts["fn"])
        d["cr_micro_f1"] = cr_f1
        d["cr_micro_rec"]  = rec
        d["cr_micro_prec"] = p
        if include_concept_codes:
            concept_counts = tally_counts(r, lambda c: c[0].isdigit())
            (rec, p, concept_f1, a) = rpf1a_from_tp_fp_tn_fn(concept_counts["tp"],concept_counts["fp"],concept_counts["tn"],concept_counts["fn"])
            d["concept_micro_f1"] = concept_f1
            d["concept_micro_rec"]  = rec
            d["concept_micro_prec"] = p
        parms = r["parameters"]
        for p in params:
            d[p] = parms[p]
        dicts.append(d)
    df = pd.DataFrame(dicts)
    fields = ("cr_micro_f1,cr_micro_rec,cr_micro_prec,concept_micro_f1,concept_micro_rec,concept_micro_prec," + ",".join(params)).split(",")
    if not include_concept_codes:
        fields = [f for f in fields if "concept" not in f]
    return df[fields].sort_values("cr_micro_f1", ascending=False)

In [6]:
def round_data(df, places=3):
    df_copy = df.copy()
    fmt_str = "{0:." + str(places) + "f}"
    cols = set([v for v in df_copy.columns.values if "micro_" in v])
    for c in cols:
        df_copy[c] = df[c].apply(lambda d: fmt_str.format(d))  
    return df_copy

# Binary Relevance - CR and Concept Codes

## Coral Bleaching
(no skin cancer results for this experiment)

In [7]:
params = "merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size"
df = get_causal_relation_metrics("CR_CB_TAGGING_TD_RNN", params.split(","))
df

Unnamed: 0,cr_micro_f1,cr_micro_rec,cr_micro_prec,concept_micro_f1,concept_micro_rec,concept_micro_prec,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
5,0.39669,0.626529,0.290224,0.583708,0.852237,0.443856,sum,2,True,True,256
4,0.305912,0.328199,0.286459,0.517585,0.661155,0.425244,sum,2,True,True,128
2,0.262015,0.730794,0.159622,0.595313,0.944704,0.434585,sum,1,True,True,256
1,0.21478,0.278353,0.174846,0.489831,0.699067,0.376994,sum,1,True,True,128
0,0.122902,0.118921,0.127158,0.517211,0.63204,0.437691,sum,1,True,True,64
3,0.0,0.0,0.0,0.389768,0.39976,0.380264,sum,2,True,True,64


In [8]:
params = "merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size"
df = get_causal_relation_metrics("CR_CB_TAGGING_VD_RNN", params.split(","))
df.sort_values("cr_micro_f1", ascending=False)

Unnamed: 0,cr_micro_f1,cr_micro_rec,cr_micro_prec,concept_micro_f1,concept_micro_rec,concept_micro_prec,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
5,0.375512,0.59481,0.274359,0.574305,0.837298,0.437034,sum,2,True,True,256
4,0.282257,0.298736,0.267501,0.512915,0.650683,0.423292,sum,2,True,True,128
2,0.249717,0.697937,0.152062,0.595039,0.942434,0.434775,sum,1,True,True,256
1,0.213925,0.274451,0.175271,0.491845,0.70205,0.378513,sum,1,True,True,128
0,0.114974,0.112774,0.11726,0.519371,0.636893,0.438464,sum,1,True,True,64
3,0.0,0.0,0.0,0.388121,0.397647,0.37904,sum,2,True,True,64


# Most Common Tag

## Coral Bleaching

In [9]:
params = "merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size"

### Training

In [10]:
df = get_df_sorted_by_f1score("CR_CB_TAGGING_TD_MOST_COMMON_TAG_RNN", params)
round_data(df,4)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
0,0.8505,0.8601,0.8412,sum,2,True,True,256
1,0.7869,0.7937,0.7802,sum,2,True,True,128
2,0.7851,0.7875,0.7827,sum,1,True,True,256
3,0.781,0.8028,0.7603,sum,2,True,True,64
4,0.7546,0.7998,0.7143,sum,1,True,True,64
5,0.7214,0.7657,0.6821,sum,1,True,True,128


### Validation

In [11]:
df = get_df_sorted_by_f1score("CR_CB_TAGGING_VD_MOST_COMMON_TAG_RNN", params)
round_data(df,4)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
0,0.6755,0.6989,0.6535,sum,2,True,True,256
1,0.6712,0.6727,0.6698,sum,2,True,True,128
2,0.6488,0.648,0.6495,sum,1,True,True,256
3,0.6436,0.662,0.6262,sum,2,True,True,64
4,0.6399,0.6776,0.6061,sum,1,True,True,128
5,0.6389,0.676,0.6057,sum,1,True,True,64


### Test

In [12]:
df = get_df_sorted_by_f1score("TEST_CR_CB_TAGGING_VD_MOST_COMMON_TAG_RNN", params)
round_data(df,4)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
0,0.6883,0.6872,0.6895,sum,2,True,True,256


## Skin Cancer

### Training

In [13]:
df = get_df_sorted_by_f1score("CR_SC_TAGGING_TD_MOST_COMMON_TAG_RNN", params)
round_data(df,4)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
0,0.8636,0.8545,0.873,sum,2,True,True,256
1,0.8436,0.8388,0.8484,sum,2,True,True,128
2,0.8406,0.841,0.8401,sum,1,True,True,256
3,0.8248,0.82,0.8297,sum,2,True,True,64
4,0.8166,0.8407,0.7938,sum,1,True,True,64
5,0.8081,0.8214,0.7952,sum,1,True,True,128


### Validation

In [14]:
df = get_df_sorted_by_f1score("CR_SC_TAGGING_VD_MOST_COMMON_TAG_RNN", params)
round_data(df,4)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
0,0.7639,0.7583,0.7696,sum,2,True,True,256
1,0.7563,0.7551,0.7576,sum,2,True,True,128
2,0.7477,0.7487,0.7467,sum,2,True,True,64
3,0.7462,0.7549,0.7377,sum,1,True,True,256
4,0.7419,0.7549,0.7293,sum,1,True,True,128
5,0.7336,0.7626,0.7067,sum,1,True,True,64


### Test

In [15]:
df = get_df_sorted_by_f1score("TEST_CR_SC_TAGGING_VD_MOST_COMMON_TAG_RNN", params)
round_data(df,4)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
0,0.7827,0.7943,0.7715,sum,2,True,True,256


## Stacked Model

### Coral Bleaching

### Training

In [16]:
sparams = "dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats".split(",")
df = get_df_sorted_by_f1score("CR_CB_STACKED_TD", sparams)
round_data(df,4).head(5)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats
0,0.7612,0.697,0.8384,False,l1,100.0,True,False,True,False,True
1,0.7438,0.6758,0.8269,False,l2,100.0,True,False,True,False,True
2,0.7348,0.6648,0.8213,False,l1,10.0,True,False,True,False,True
3,0.726,0.6645,0.8001,True,l2,100.0,True,False,True,False,True
4,0.7245,0.6524,0.8145,False,l2,10.0,True,False,True,False,True


### Validation

In [17]:
df = get_df_sorted_by_f1score("CR_CB_STACKED_VD", sparams)
round_data(df,4).head(5)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats
0,0.6709,0.5978,0.7644,True,l2,1.0,True,True,False,False,True
1,0.67,0.6005,0.7578,True,l2,1.0,True,False,False,False,True
2,0.67,0.6005,0.7578,True,l2,1.0,False,True,False,False,True
3,0.6695,0.5965,0.763,True,l2,0.5,True,False,True,False,True
4,0.6695,0.5965,0.763,False,l2,0.5,True,False,True,False,True


### Test

In [18]:
df = get_df_sorted_by_f1score("TEST_CR_CB_STACKED_VD", sparams)
round_data(df,4).head(5)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats
0,0.653,0.584,0.7405,True,l2,0.5,True,False,True,False,True


### Skin Cancer

### Training

In [19]:
sparams = "dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats".split(",")
df = get_df_sorted_by_f1score("CR_SC_STACKED_TD", sparams)
round_data(df,4).head(5)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats
0,0.7408,0.6643,0.8372,False,l1,100.0,True,False,False,True,True
1,0.7392,0.6612,0.838,False,l2,100.0,True,False,False,True,True
2,0.7368,0.6579,0.8373,False,l1,10.0,True,False,False,True,True
3,0.7338,0.6534,0.8369,False,l1,5.0,True,False,False,True,True
4,0.7335,0.6526,0.8371,True,l2,10.0,True,False,False,True,True


### Validation

In [20]:
sparams = "dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats".split(",")
df = get_df_sorted_by_f1score("CR_SC_STACKED_VD", sparams)
round_data(df,4).head(5)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats
0,0.7074,0.6271,0.8114,True,l2,10.0,True,False,False,True,True
1,0.707,0.6259,0.8124,False,l2,10.0,True,False,False,True,True
2,0.707,0.6236,0.8161,True,l2,5.0,True,False,False,True,True
3,0.7069,0.6236,0.8159,False,l2,5.0,True,False,False,True,True
4,0.706,0.6279,0.8064,False,l1,5.0,True,False,False,True,True


### Test

In [21]:
sparams = "dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats".split(",")
df = get_df_sorted_by_f1score("TEST_CR_SC_STACKED_TD", sparams)
round_data(df,4).head(5)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats
0,0.7313,0.6494,0.837,True,l2,10.0,True,False,False,True,True
