In [None]:
import numpy as np
import pandas as pd
import cPickle as cpkl
np.random.seed(42)

In [None]:
import matplotlib as mpl
# print(mpl.rcParams.items)
mpl.use('Agg')
mpl.rcParams['text.usetex'] = False
mpl.rcParams['mathtext.rm'] = 'serif'
mpl.rcParams['font.family'] = 'serif'
mpl.rcParams['font.serif'] = ['Times New Roman']
# mpl.rcParams['font.family'] = ['Times New Roman']
mpl.rcParams['axes.titlesize'] = 16
mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['savefig.dpi'] = 250
mpl.rcParams['figure.dpi'] = 250
mpl.rcParams['savefig.format'] = 'pdf'
mpl.rcParams['savefig.bbox'] = 'tight'
import matplotlib.pyplot as plt
%matplotlib inline
# print(mpl.rcParams.items)

import pylab
from mpl_toolkits.axes_grid1 import ImageGrid
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

In [None]:
import proclam
from proclam import *

In [None]:
M_classes = 13

In [None]:
N_objects = 100000
generator = proclam.simulators.LogUnbalanced()
minitruth = generator.simulate(M_classes, N_objects)
pops = np.histogram(minitruth, bins=range(M_classes+1))[0]
# print(pops)
minipops = np.empty(M_classes)
for m in range(M_classes):
    minipops[m] = np.log10(np.max((pops[m], 1.))) /np.log10(N_objects)# / M_classes
# print(np.log10(minipops) / np.log10(N_objects))
# print(np.sum(minipops) / M_classes)

In [None]:
truth = minitruth
d = np.diff(np.unique(truth)).min()
left_of_first_bin = truth.min() - float(d)/2
right_of_last_bin = truth.max() + float(d)/2
plt.hist(truth, np.arange(left_of_first_bin, right_of_last_bin + d, d), log=True, alpha=0.5, color='k')
plt.xticks(range(max(truth)+1))
# plt.hist(truth, log=True, alpha=0.5)
plt.ylabel('counts')
plt.xlabel('class')
plt.savefig('fig/complete_counts.png')
plt.show()
plt.close()

## first, when the systematics are added to almost perfect classifier

In [None]:
classifier = proclam.classifiers.FromCM()

minitest = {}
which_affected = range(M_classes)
systematic_info = {'agnostic':'s', 'tunnel':'o', 'almost':'P', 'noisy':'X', 'cruise':'d', 'subsumed':(3,0,0)}
which_systematics = systematic_info.keys()#['agnostic', 'tunnel', 'almost', 'noisy', 'tunnel', 'cruise', 'subsumed']
markerlist = systematic_info.values()#['', '', 'd', 'o', 'd', 's', (3,0,0)]

minidelta = 0.1
starter = np.eye(M_classes) + minidelta * np.ones((M_classes, M_classes))
starter = starter / np.sum(starter, axis=1)[:, np.newaxis]

In [None]:
def make_cm(start_cm, m, systematic):
    # can wrap this in loop if systematic is a list
    cm = start_cm
    big_M = len(start_cm)
    if systematic == 'agnostic':
        cm[m] = np.ones(big_M)
    if systematic == 'almost':
        cm[m] = 0.5 * np.ones(big_M)
        cm[m][m] += 1.5
    if systematic == 'noisy':
        cm[m] = 0.5 * np.ones(big_M)
        cm[m][m] += 0.5
    if systematic == 'tunnel' or systematic == 'perfect':
        cm[m] = np.zeros(big_M)
        cm[:, m] = np.zeros(big_M).T
        cm[m][m] += 1.
    if systematic == 'cruise' or systematic == 'subsumer':
        cm[:, m] += 1.
    if systematic == 'subsumed':
        cm[m] = cm[m-1]
    cm = cm / np.sum(cm, axis=1)[:, np.newaxis]
    return cm

In [None]:
which_weighted = ['by pop', 'flat', 'up', 'down']
wt_colors = [mpl.cm.winter_r(minipops), [mpl.cm.autumn_r(0.5)]*M_classes, [mpl.cm.autumn_r(1.)]*M_classes, [mpl.cm.autumn_r(0.)]*M_classes]
wt_const = 1. / float(M_classes)
    
which_metrics = ['Brier', 'LogLoss']

In [None]:
def make_wv(m, wt_kw):
#     wv = np.ones(M_classes)
    if wt_kw == 'by pop':
        wv = minipops
    if wt_kw == 'flat': 
        wv = np.ones(M_classes)
    if wt_kw == 'up':
        wv = np.ones(M_classes)
        wv[m] += 1.
    if wt_kw == 'down':
        wv = np.ones(M_classes)
        wv[m] /= 2.
    wv = wv / np.sum(wv)
    assert(np.isclose(np.sum(wv), 1.))
    return wv

In [None]:
miniweights = np.empty((len(which_weighted), M_classes, M_classes))
for m in which_affected:
    for wi in range(len(which_weighted)):
        w = which_weighted[wi]
        miniweights[wi][m] = make_wv(m, w)

In [None]:
# # warning: slow!!!
# for s in which_systematics:
#     minitest[s] = {}
#     for m in which_affected:
#         minitest[s][str(m)] = {}
#         minitest[s][str(m)]['cm'] = make_cm(starter, m, s)
#         minitest[s][str(m)]['probs'] = classifier.classify(minitest[s][str(m)]['cm'], minitruth, 
#                                                            delta=minidelta, other=False)
#         minitest[s][str(m)]['results'] = {}
#         for met in which_metrics:
#             minitest[s][str(m)][met] = {}
#             for wi in range(len(which_weighted)):
#                 w = which_weighted[wi]
#                 D = getattr(proclam.metrics, met)()
#                 minitest[s][str(m)][met][w] = D.evaluate(minitest[s][str(m)]['probs'], minitruth, 
#                                                          averaging=miniweights[wi][m])
#     print('finished '+s)
#     cpkl.dump(minitest[s], open(s+'_data.pkl', 'wb'))
#     print('saved '+s)

In [None]:
# warning: slow!
goodtest = {}
for s in which_systematics:
    goodtest[s] = cpkl.load(open(s+'_data.pkl', 'rb'))
#     minitest[s] = cpkl.load(open('almost_'+s+'_data.pkl', 'rb'))
    print('loaded '+s)

In [None]:
# connect lines along systematic, weighting, and affected class
def complete_metric_plot(dataset, metric_names, shapes, fn=''):
    
    systematics = dataset.keys()
    xs = np.arange(len(systematics))
    fig, ax = plt.subplots(figsize=(5,5))
    
    ax.text(0.01, 0.75, r'size$\sim \log[N_{m}/N]$', 
            verticalalignment='center', transform=ax.transAxes, 
            fontsize=10)
#     ax.text(.3, .9, r'size$\sim \exp[w_{m}]$', 
#             verticalalignment='center', transform=ax.transAxes, 
#             fontsize=10)
    fig.subplots_adjust(right=1.)
#     handles = []
#     seeds = []
    for m in which_affected:
#         one_seed = np.log10(max(1., N_objects * float(minipops[m]))) / 10.
#         seeds.append(one_seed)
        for si in range(len(systematics)):
            s = systematics[si]
            for wi in range(len(which_weighted)):
                w = which_weighted[wi]
#                 print(miniweights[wi][m][m])
#                 rel_wt = (((miniweights[wi][m][m] - wt_const) / wt_const) + 1.) / 2.
#                 print(rel_wt)
                ax.scatter(dataset[s][str(m)][metric_names[0]][w], dataset[s][str(m)][metric_names[1]][w],
                  c=wt_colors[wi][m],
                  s=50*(0.01 + minipops[m]),#N_objects**miniweights[wi][m],#rel_wt,
                  marker=markerlist[si],
                  alpha=0.25)
    for si in range(len(systematics)):
        ax.scatter(0., 0., c='k',
                  marker=markerlist[si],
                  alpha=0.25, label=systematics[si])
    
    ax.set_xlabel(metric_names[0])
    ax.set_ylabel(metric_names[1])
    ax.set_ylim(0.9, 3.5)
    ax.set_xlim(0.035, 0.09)
    ax.legend(loc='lower right')
    
#     seeds = np.array(seeds)
#     print(seeds)
    popaxins = inset_axes(ax,
                    width="25%",  # width = 25% of parent_bbox width
                    height="5%",  # height : 5%
                    loc=2)#bbox_to_anchor=(0.35, 0.8))
    mpl.colorbar.ColorbarBase(popaxins, cmap=mpl.cm.winter_r,
                                norm=mpl.colors.Normalize(vmin=0., vmax=1.), 
                              orientation='horizontal')
#     popaxins.xaxis.set_ticks_position("top")
    popaxins.set_title(r'$w_{m}\sim N_{m}/N$', fontsize=10)
    popaxins.xaxis.set_ticks(np.concatenate((np.concatenate((np.zeros(1), np.flip(minipops, axis=0))), np.ones(1))))
    popaxins.xaxis.set_ticklabels(['0']+[]*M_classes+['1'], fontsize=10) #rotation=270, fontsize=10)
    
    linaxins = inset_axes(ax,
                    width="25%",  # width = 25% of parent_bbox width
                    height="5%",  # height : 5%
                    loc=6)#bbox_to_anchor=(0.35, 0.55))
#     linaxins = axins.add_axes([0., 1.])
    mpl.colorbar.ColorbarBase(linaxins, cmap=mpl.cm.autumn_r,
                                norm=mpl.colors.Normalize(vmin=0., vmax=1.),#vmin=0.04, vmax=0.14285714), 
                              orientation='horizontal')
#     linaxins.xaxis.set_ticks_position("top")
    linaxins.set_title('flat/up/down weights', fontsize=10)
    linaxins.xaxis.set_ticks([0., 0.5, 1.])
    linaxins.xaxis.set_ticklabels(['downweight', 'flat', 'upweight'], 
                                  rotation=270, fontsize=10)
    
#     plt.show()
    plt.savefig('fig/'+fn+'all_effects_isolated.png', dpi=250)
    plt.close()
    return

In [None]:
complete_metric_plot(goodtest, which_metrics, markerlist, fn='almost')

## when the systematics are added to agnostic classifier

In [None]:
starter = np.ones((M_classes, M_classes))
starter = starter / np.sum(starter, axis=1)[:, np.newaxis]

In [None]:
# # warning: slow!!!
# for s in which_systematics:
#     minitest[s] = {}
#     for m in which_affected:
#         minitest[s][str(m)] = {}
#         minitest[s][str(m)]['cm'] = make_cm(starter, m, s)
#         minitest[s][str(m)]['probs'] = classifier.classify(minitest[s][str(m)]['cm'], minitruth, 
#                                                            delta=minidelta, other=False)
#         minitest[s][str(m)]['results'] = {}
#         for met in which_metrics:
#             minitest[s][str(m)][met] = {}
#             for wi in range(len(which_weighted)):
#                 w = which_weighted[wi]
#                 D = getattr(proclam.metrics, met)()
#                 minitest[s][str(m)][met][w] = D.evaluate(minitest[s][str(m)]['probs'], minitruth, 
#                                                          averaging=miniweights[wi][m])
#     print('finished '+s)
#     cpkl.dump(minitest[s], open('agnostic_'+s+'_data.pkl', 'wb'))
#     print('saved '+s)

In [None]:
# warning: slow!
badtest = {}
for s in which_systematics:
#     minitest[s] = cpkl.load(open(s+'_data.pkl', 'rb'))
    badtest[s] = cpkl.load(open('agnostic_'+s+'_data.pkl', 'rb'))
    print('loaded '+s)

In [None]:
complete_metric_plot(badtest, which_metrics, markerlist, fn='agnostic')

Would like to do this many times to generate error bars

In [None]:
# cells with a tag of "hideme" will not appear in html resulting from:
# jupyter nbconvert desc_note/main.ipynb --TagRemovePreprocessor.remove_cell_tags='["hideme"]'
# jupyter nbconvert desc_note/main.ipynb --TagRemovePreprocessor.remove_input_tags='["hidein"]'
