This notebook runs the tests and makes the plots for evaluating the mock classifiers.

In [None]:
import numpy as np
import pandas as pd
import pickle
np.random.seed(42)
import scipy.stats as sps

In [None]:
import matplotlib as mpl
# print(mpl.rcParams.items)
mpl.use('PS')
mpl.rcParams['text.usetex'] = False
mpl.rcParams['mathtext.rm'] = 'serif'
mpl.rcParams['font.family'] = 'serif'
mpl.rcParams["font.family"] = "serif"
mpl.rcParams["mathtext.fontset"] = "dejavuserif"
mpl.rcParams['font.serif'] = 'DejaVu Serif'
# mpl.rcParams['text.usetex'] = False
# mpl.rcParams['mathtext.rm'] = 'serif'
# mpl.rcParams['font.weight'] = 'light'
# mpl.rcParams['font.family'] = 'serif'
# mpl.rcParams['font.serif'] = ['Times New Roman']
# # mpl.rcParams['font.family'] = ['Times New Roman']
mpl.rcParams['axes.titlesize'] = 25
mpl.rcParams['axes.labelsize'] = 20
mpl.rcParams['xtick.labelsize'] = 15
mpl.rcParams['ytick.labelsize'] = 15
mpl.rcParams['savefig.dpi'] = 250
mpl.rcParams['figure.dpi'] = 250
mpl.rcParams['savefig.format'] = 'pdf'
mpl.rcParams['savefig.bbox'] = 'tight'
import matplotlib.pyplot as plt
%matplotlib inline
# print(mpl.rcParams.items)

import pylab
from mpl_toolkits.axes_grid1 import ImageGrid
from mpl_toolkits.axes_grid1.inset_locator import inset_axes


In [None]:
import proclam
from proclam import *

In [None]:
def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
    new_cmap = mpl.colors.LinearSegmentedColormap.from_list(
        'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
        cmap(np.linspace(minval, maxval, n)))
    return new_cmap

cmap = plt.get_cmap('hot_r')
fave_cmap = truncate_colormap(cmap, 0.35, 1.0)

In [None]:
quick = True
if quick:
    M_classes = 8
    oom = 2
else:
    M_classes = 13
    oom = 6

In [None]:
flat_factor = 1. / M_classes
generator = proclam.simulators.LogUnbalanced()
N_objects = int(10 ** oom)
minitruth = generator.simulate(M_classes, N_objects, base=oom)
pops = np.histogram(minitruth, bins=range(M_classes+1))[0]
# print(pops)
minipops = np.empty(M_classes)
for m in range(M_classes):
    minipops[m] = np.log10(np.max((pops[m], 1.))) /np.log10(N_objects)# / M_classes
# print(np.log10(minipops) / np.log10(N_objects))
# print(np.sum(minipops) / M_classes)

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))
d = np.diff(np.unique(minitruth)).min()
left_of_first_bin = minitruth.min() - float(d)/2
right_of_last_bin = minitruth.max() + float(d)/2
ax.hist(minitruth, np.arange(left_of_first_bin, right_of_last_bin + d, d), log=True, alpha=0.5, color=fave_cmap(1.))
# ax.plot(testvals)
ax.set_xticks(range(M_classes))
ax.set_xticklabels(range(1, M_classes+1), fontweight='light')
# plt.hist(truth, log=True, alpha=0.5)
ax.set_xlim(-0.5, M_classes-0.5)
ax.set_ylabel('counts', fontsize=20, fontweight='light')
ax.set_xlabel('true class', fontsize=20, fontweight='light')
plt.savefig('fig/mock_counts.png')
plt.show()
plt.close()

In [None]:
def make_cm(start_cm, m, systematic, accomplice=-1):
    cm = start_cm
    big_M = len(start_cm)
    if m < 0 or m > big_M:
#         print(m)
        cm = cm / np.sum(cm, axis=1)[:, np.newaxis]
        return cm
    if systematic == 'uncertain':
        cm[m] = np.ones(big_M)
    if systematic == 'almost':
        cm[m] = flat_factor * 0.5 * np.ones(big_M)
#         cm[:, m] = flat_factor * 0.5 * np.ones(big_M).T
        cm[m][m] += 1.5 * flat_factor
    if systematic == 'noisy':
        cm[m] = flat_factor * 0.5 * np.ones(big_M)
#         cm[:, m] = flat_factor * 0.5 * np.ones(big_M).T
        cm[m][m] += 0.5 * flat_factor
#     if systematic == 'tunnel':
#         cm[m] = flat_factor * np.zeros(big_M) + np.asarray(1.e-8)
# #         cm[:, m] = flat_factor * np.zeros(big_M).T
#         cm[m][m] += 1. * flat_factor
    if systematic == 'perfect' or systematic == 'tunnel':
        cm[m] = flat_factor * np.zeros(big_M) + np.asarray(1.e-8)
        cm[:, m] = flat_factor * np.zeros(big_M).T + np.asarray(1.e-8)
        cm[m][m] += 1. * flat_factor
    if systematic == 'subsumed' or systematic == 'cruise':
        cm[m] = cm[accomplice]
#     if systematic == 'cruise' or systematic == 'subsumer':
#         cm[m] = np.zeros_like(big_M)
#         cm[m, accomplice] = 1.
    cm = cm / np.sum(cm, axis=1)[:, np.newaxis]
    return cm

In [None]:
classifier = proclam.classifiers.FromCM()

mega_test, alt_mega_test = {}, {}
N_affected = range(M_classes)

delfact = -1
minidelta = 10**delfact
altdelfact = delfact * 2
altminidelta = 10**altdelfact

metric_info = {'LogLoss': fave_cmap(1.), 'Brier': fave_cmap(0.)}
which_metrics = metric_info.keys()
met_evals = {}
for met in which_metrics:
    met_evals[met] = getattr(proclam.metrics, met)()

In [None]:
all_shapes = {'uncertain':'s', 'tunnel':'o', 'almost':'P', 'noisy':'X', 'subsumed': 'D', 'perfect': '*'}
all_shapes_bg = {'perfect':(3,0,0), 'almost':(4,0,0), 'noisy':(4,0,45)}#, 'cruise':(3,0,180)}
all_shapes_fg = {'uncertain':'.', 'almost':(4,2,0), 'noisy':(4,2,45), 'subsumed':(3,2,180)}#, 'tunnel':(3,2,0)

In [None]:
# all_styles = {'per_class':'--', 'per_item':':'}
# which_weight_schemes = list(all_styles.keys())#['per_class', 'per_item']

In [None]:
possible_weights = np.linspace(0., 1., 11)
weight_vecs = np.array([[i] + [(1. - i) / (M_classes - 1.)] * (M_classes - 1) for i in possible_weights])
which_weight_schemes = {str(i): weight_vecs[i] for i in range(len(possible_weights))}



In [None]:
def cm_mega_plot(info_dict, systematics, m, fn='', numrows=2, numcols=4):
    fig = pylab.figure(figsize=(5.1, 5.1))
    bigAxes = pylab.axes(frameon=False)     # hide frame
    bigAxes.set_xticks([])                        # don't want to see any ticks on this axis
    bigAxes.set_yticks([])
    bigAxes.set_title(fn+' with '+str(m)+' affected')
    grid = ImageGrid(fig, 111,          # as in plt.subplot(111)
                 nrows_ncols=(numrows,numcols),
                 axes_pad=0.,
                 share_all=True,
                 cbar_location="right",
                 cbar_mode="single",
                 cbar_size="5%",
                 cbar_pad=0.01,
                 )
    gridnos = np.arange(numrows * numcols).reshape((numcols, numrows)).T.flatten()
    for j in range(numcols * numrows):
        i = gridnos[j]
        ax = grid[j]#fig.add_subplot(numrows,numcols,i+1)#, frameon=False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        position = ax.get_position()
#         print(position)
        position.x0 += 0.01
        position.y0 += 0.02
        position.x1 += 0.01
        position.y1 += 0.02
        ax.set_position(position)
#         print(position)
        testname = systematics[j]
        
        im = ax.imshow(info_dict[testname]['cm'][m], vmin=0., vmax=1., cmap=fave_cmap)
        ax.text(.5,.9,testname,horizontalalignment='center',transform=ax.transAxes, fontsize=16)
#     pylab.colorbar()
#     fig.subplots_adjust(right=0.5)
#     cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
#     fig.colorbar(im, cax=cbar_ax)
    ax.cax.colorbar(im)
    ax.cax.toggle_label(True)
    bigAxes.set_ylabel(r'true class', fontsize=20)
    bigAxes.set_xlabel(r'predicted class', fontsize=20)
    pylab.tight_layout()
    pylab.savefig('fig/systest_'+fn+'_cm.png', bbox_inches='tight', pad_inches=0)

In [None]:
# cm_mega_plot(alt_mega_test['perfect'], which_systematics, m=12, fn='all_cm', numrows=2, numcols=4)

In [None]:
starter = 0.5 * np.ones((M_classes, M_classes)) + 1.5 * np.eye(M_classes)
starter = starter / np.sum(starter, axis=1)[:, np.newaxis]
cm = starter

# afflicted = np.random.choice(range(0, M_classes), size=10, replace=False)
cruise = [0, 1]#afflicted[2:4]
subsumed = [2, 3, 5, 6]#afflicted[4:8]
swapped = [-4, -5]
tunnel = [0, 7]#afflicted[8:]
noisy_cls = [-2, -1]#afflicted[:2]
uncertain = [-3]
afflicted = cruise + subsumed + tunnel + noisy_cls

systematic_types = [
    'perfect',
    'almost perfect',
    'cruise control by 1',
    'cruise control by 1',
    'almost perfect',
    'subsumed by 2',
    'subsumed by 2',
    'tunnel vision',
    'mutually subsuming',
    'mutually subsuming',
    'uncertain',
    'noisy',
    'noisy'
]
plot_systematic_types = list(reversed(systematic_types))

almost = 0.5 * np.ones((M_classes, M_classes)) + 1.5 * np.eye(M_classes)
almost = almost / np.sum(almost, axis=1)[:, np.newaxis]
cm = almost
perfect = np.eye(M_classes) + 1.e-8
cm[tunnel] = perfect[tunnel]
noisy = 0.5 * np.ones((M_classes, M_classes)) + 0.5 * np.eye(M_classes)
noisy = noisy / np.sum(starter, axis=1)[:, np.newaxis]
cm[noisy_cls] = noisy[noisy_cls]
cm[subsumed[:2]] = cm[cruise[0]]#[np.newaxis] + np.asarray(0.01)[np.newaxis] * np.random.uniform(M_classes)[np.newaxis, :]
cm[subsumed[2:]] = cm[cruise[1]]
cm[uncertain] = 1./float(M_classes) * np.ones(M_classes)
cm[swapped[1]][swapped[0]] = cm[swapped[0]][swapped[0]]
cm[swapped[0]][swapped[1]] = cm[swapped[1]][swapped[1]]
cm[:, 7] = perfect[:, 7]

cm = cm / np.sum(cm, axis=1)[:, np.newaxis]
# plasticc = wrap_up_classifier(cm, 'Combined', plasticc, delta=0.01)
print(np.sum(cm, axis=1))

In [None]:
fig = plt.figure(figsize=(5,5))
grid = ImageGrid(fig, 111,          # as in plt.subplot(111)
                 nrows_ncols=(1,1),
                 axes_pad=0.05,
                 share_all=True,
                 )
fig.subplots_adjust(wspace=0.5)
ax = grid[0]
im = ax.imshow(cm, vmin=0., vmax=1., cmap=fave_cmap)
ax.set_xticks(range(M_classes))
ax.set_xticklabels(range(1, M_classes+1))
ax.set_yticks(range(M_classes))
ax.set_yticklabels(range(1, M_classes+1))
ax.set_ylabel('true class')
ax.set_xlabel('predicted class')
cbar_ax = fig.add_axes([0.1, 0.89, 0.8, 0.04])
cbar = fig.colorbar(im, cax=cbar_ax, orientation='horizontal', pad=0.05)
cbar_ax.xaxis.set_ticks_position("top")
ax.cax.toggle_label(True)
axp = ax.twinx()
axp.set_ylim(-0.5, M_classes-0.5)
axp.set_yticks(range(0, M_classes))
axp.set_yticklabels(plot_systematic_types)
axp.set_ylabel('systematic effect', rotation=270, labelpad=20)
ax.set_title('realistically complex \n conditional probability matrix', pad=50)
plt.savefig('fig/combined.png')
plt.show()
plt.close()

In [None]:
import random
from mpl_toolkits.axes_grid1 import ImageGrid

howmanytoshow = 2 * M_classes
truth_here = list(range(M_classes))+list(range(M_classes))#np.random.choice(M_classes, howmanytoshow)
random.shuffle(truth_here)
truth_here = np.array(truth_here)

classifier = FromCMDM()
res = classifier.classify(cm, truth_here, delta=0.01, other=False)

tickinfo = [systematic_types[i] for i in truth_here]
tickinfo = list(reversed(tickinfo))

# all_shapes_bg = {'uncertain':'o', 'perfect':(3,0,0), 'almost':(4,0,0), 'noisy':(4,0,45)}#, 'cruise':(3,0,180)}
# all_shapes_fg = {'uncertain':'.', 'almost':(4,2,0), 'noisy':(4,2,45), 'subsumed':(3,2,180)}#, 'tunnel':(3,2,0)


# fig, ax = plt.subplots()
# plt.imshow(cm, vmin=0., vmax=1., cmap=mpl.cm.winter_r)
# for x,y in zip(range(M_classes), range(M_classes)):
#     plt.text(x-0.5, y+0.5, str(x), color="red", fontsize=15)
# ax.set_xticks(range(M_classes))
# ax.set_yticks(range(M_classes))
# plt.show()
# plt.close()
    
# fig, ax = plt.subplots(figsize=(5, 10))
fig = plt.figure(figsize=(5,10))
axs = ImageGrid(fig, 111, nrows_ncols=(1,1))
ax = axs[0]
#     _ = ax.imshow(d)
ax.set_title('classification probabilities')
ax.set_xlabel('possible classes')
ax.set_xticks(range(M_classes))
ax.set_xticklabels(range(1, M_classes+1))

ax.imshow(res, vmin=0., vmax=1., cmap=fave_cmap)
ax.set_yticks(range(howmanytoshow))
ax.set_yticklabels(truth_here+1)
ax.set_ylabel('true class')
ax.tick_params(axis=u'both', which=u'both',length=0)
for x, y in zip(truth_here, range(howmanytoshow)):
#     ax.text(x-0.25, y+0.25, str(x), color="red", fontsize=15)
    ax.scatter(x, y, marker='*', color="darkorchid", s=100)#, fontsize=15)

ax2 = ax.twinx()
ax2.set_yticks(range(howmanytoshow))
ax2.set_yticklabels(tickinfo)
ax2.set_ylabel('systematic effect', rotation=270, labelpad=20)
ax2.set_ylim(-0.5, howmanytoshow-0.5)
ax2.tick_params(axis=u'both', which=u'both',length=0)

# plt.tight_layout()
plt.savefig('fig/examples.png', dpi=250)
plt.show()

# test number affected by each systematic relative to different baselines

In [None]:
def one_loop_naff(collector, name, which_systematics, starter, dirichlet=False, fn=None):# mega_test, 'reltoagnostic'
    systest = {}
    if not dirichlet:
        classifier = FromCM()
        delta = minidelta
    else:
        classifier = FromCMDM()
        delta = altminidelta
#     collector['delta'] = delta
    print('dirichlet='+str(dirichlet)+', delta='+str(delta))
#     systematic_info = all_shapes[which_systematics]
#     w = 'per_class'
    for s in which_systematics:
        systest[s] = {}
        for met in which_metrics:
            systest[s][met] = {}
            for w in which_weight_schemes:
                systest[s][met][w] = np.empty(M_classes+1)
        m = -1
        temp_cm = starter
        systest[s]['cm'] = np.empty((M_classes+1, M_classes, M_classes))
#         systest[s]['probs'] = np.empty((M_classes+1, N_objects, M_classes))
        temp_prob = classifier.classify(temp_cm, minitruth, delta=delta, other=False)
#         systest[s]['probs'][0] = temp_prob
        isolated = range(N_objects)
        while m < M_classes:
            temp_cm = make_cm(temp_cm, m, s)
            systest[s]['cm'][m+1] = temp_cm
            temp_prob[isolated] = classifier.classify(temp_cm, minitruth[isolated], delta=delta, other=False)
#             systest[s]['probs'][m+1] = temp_prob
            for met in which_metrics:
                D = met_evals[met]
                for w in which_weight_schemes.keys():
                    metval = D.evaluate(temp_prob, minitruth, averaging=which_weight_schemes[w])
                    systest[s][met][w][m+1] = metval
#             print(len(np.ones(N_objects)[np.argmax(systest[s]['probs'][m+1], axis=1) == minitruth]))
            thingtoprint = np.sum(np.ones(N_objects)[np.argmax(temp_prob, axis=1) == minitruth])
            print('how many would be right by MAP with systematic '+s+' and '+str(m+1)+' affected classes? '+
                    str(thingtoprint)+' or '+str(thingtoprint/float(N_objects)))
            m += 1
            isolated = np.where(minitruth == m)[0]
#         print(s+' on '+name)
    collector[name] = systest
    if fn is not None:
        with open(fn, 'wb') as place:
            pickle.dump(collector, place)
    return collector

In [None]:
def load_collector(fn):
    with open(fn, 'rb') as place:
        collector = pickle.load(place)
#     print('delta='+str(collector['delta']))
    return collector

In [None]:
# connect lines along systematic, weighting, and affected class
def sys_only_plot(dataset, metric_info, shapes, styles, fn=''):
    
    met_inf = list(metric_info.keys())
    systematics = list(dataset.keys())
    fig = pylab.figure(figsize=(10.2, 10.))
    bigAxes = pylab.axes(frameon=False)     # hide frame
    bigAxes.set_xticks([])                        # don't want to see any ticks on this axis
    bigAxes.set_yticks([])
    bigAxesP = bigAxes.twinx()
    bigAxesP.set_yticks([])
    bigAxes.set_ylabel(met_inf[0], fontsize=20, labelpad=25, color=metric_info[met_inf[0]])
    bigAxes.set_xlabel(r'number of classes affected', fontsize=20, labelpad=25)
    bigAxesP.set_ylabel(met_inf[1], rotation=270, fontsize=20, 
                        labelpad=50, color=metric_info[met_inf[1]])
    bigAxes.set_title(fn+' plus systematics')
    for si in range(len(systematics)):
        s = systematics[si]
        ax = fig.add_subplot(221+si)
        axp = ax.twinx()
        ax.set_xticks([0, 4, 8, 12])
#         ax.set_yticks([0.5, 1.5, 2.5, 3.5])
#         axp.set_yticks([0.02, 0.04, 0.06, 0.08, 0.1])
        ax.set_xticklabels([])
#         ax.set_yticklabels([])
#         axp.set_yticklabels([])
#         if si > 1:
        ax.set_xticklabels([0, 4, 8, 12])
#         if si%2 == 1:
#         axp.set_yticklabels([0.02, 0.04, 0.06, 0.08, 0.1])
#         else: 
#         ax.set_yticklabels([0.5, 1.5, 2.5, 3.5])
            
        position = ax.get_position()

        position.x0 += 0.01
        position.y0 += 0.02
        position.x1 += 0.01
        position.y1 += 0.02
        ax.set_position(position)

        ax.text(.5, .9, s, 
                horizontalalignment='center',
                transform=ax.transAxes, fontsize=20)
        for w in which_weight_schemes:
            ax.plot(np.arange(M_classes+1)-0.1, dataset[s][met_inf[0]][w],
                  marker=shapes[s], linestyle=styles[w],
                  alpha=0.5, c=metric_info[met_inf[0]])
            axp.plot(np.arange(M_classes+1)+0.1, dataset[s][met_inf[1]][w],
                  marker=shapes[s], linestyle=styles[w],
                  alpha=0.5, c=metric_info[met_inf[1]])
#         ax.set_ylim(-0.05, 10.)
#         axp.set_ylim(-0.001, 0.151)
        ax.set_xlim(-0.5, 13.5)
    
    fig.tight_layout()
    fig.subplots_adjust(hspace=0.01)#, wspace=0.01)
    pylab.savefig('fig/systematics_only'+fn+'.png', dpi=250)
    pylab.show()
    return

### Dirichlet perturbation

In [None]:
M_classes = 2
flat_factor = 1. / M_classes

oom = 6
generator = proclam.simulators.LogUnbalanced()
N_objects = int(10 ** oom)
minitruth = generator.simulate(M_classes, N_objects, base=oom)
pops = np.histogram(minitruth, bins=range(M_classes+1))[0]
# print(pops)
minipops = np.empty(M_classes)
for m in range(M_classes):
    minipops[m] = np.log10(np.max((pops[m], 1.))) /np.log10(N_objects)

possible_weights = np.linspace(0., 1., 11)
weight_vecs = np.array([[i] + [(1. - i) / (M_classes - 1.)] * (M_classes - 1) for i in possible_weights])
which_weight_schemes = {str(i): weight_vecs[i] for i in range(len(possible_weights))}

alt_mega_test = load_collector('fig/test'+str(M_classes)+'_fromcmdm.pkl')

## isolating the systematics with flat weights, relative to perfect

In [None]:
which_systematics = ['almost', 'noisy', 'uncertain', 'subsumed']
starter = np.eye(M_classes) + np.asarray(1.e-8)
starter = starter / np.sum(starter, axis=1)[:, np.newaxis]
alt_mega_test = one_loop_naff(alt_mega_test, 'perfect', which_systematics, starter, dirichlet=True, fn='fig/test'+str(M_classes)+'_fromcmdm.pkl')
# alt_mega_test = load_collector('fig/test'+str(M_classes)+'_fromcmdm.pkl')

In [None]:
# cm_mega_plot(alt_mega_test['perfect'], which_systematics, m=6, fn='all_cmdm', numrows=2, numcols=2)
# cm_mega_plot(alt_mega_test['perfect'], which_systematics, m=4, fn='perfect_fromcmdn', numrows=2, numcols=2)

In [None]:
# sys_only_plot(alt_mega_test['perfect'], metric_info, all_shapes, all_styles, fn='perfect')

## isolating the systematics with flat weights, relative to almost perfect

In [None]:
which_systematics = ['noisy', 'uncertain', 'subsumed']#['tunnel', 'noisy', 'uncertain', 'subsumed']
starter = 0.5 * np.ones((M_classes, M_classes)) + 1.5 * np.eye(M_classes)
starter = starter / np.sum(starter, axis=1)[:, np.newaxis]
alt_mega_test = one_loop_naff(alt_mega_test, 'almost', which_systematics, starter, dirichlet=True, fn='fig/test'+str(M_classes)+'_fromcmdm.pkl')
# alt_mega_test = load_collector('fig/test_fromcmdm.pkl')
# mega_test = one_loop_naff(mega_test, 'almost', which_systematics, starter)

In [None]:
# cm_mega_plot(mega_test['almost'], which_systematics, m=4, fn='almost', numrows=2, numcols=2)

## isolating the systematics with flat weights, relative to noisy

In [None]:
which_systematics = ['uncertain', 'subsumed']#['tunnel', 'almost', 'uncertain', 'subsumed']
starter = 0.5 * np.ones((M_classes, M_classes)) + 0.5 * np.eye(M_classes)
starter = starter / np.sum(starter, axis=1)[:, np.newaxis]
alt_mega_test = one_loop_naff(alt_mega_test, 'noisy', which_systematics, starter, dirichlet=True, fn='fig/test'+str(M_classes)+'_fromcmdm.pkl')
# alt_mega_test = load_collector('fig/test_fromcmdm.pkl')
# mega_test = one_loop_naff(mega_test, 'noisy', which_systematics, starter)

In [None]:
# cm_mega_plot(mega_test['noisy'], which_systematics, m=4, fn='noisy', numrows=2, numcols=2)

## isolating the systematics with flat weights, relative to uncertain

In [None]:
which_systematics = ['subsumed']#, 'tunnel']
starter = np.ones((M_classes, M_classes)) + 1.e-8
starter = starter / np.sum(starter, axis=1)[:, np.newaxis]
# mega_test = one_loop_naff(mega_test, 'uncertain', which_systematics, starter)
alt_mega_test = one_loop_naff(alt_mega_test, 'uncertain', which_systematics, starter, dirichlet=True, fn='fig/test'+str(M_classes)+'_fromcmdm.pkl')
# alt_mega_test = load_collector('fig/test_fromcmdm.pkl')

In [None]:
# cm_mega_plot(mega_test['uncertain'], which_systematics, m=4, fn='uncertain', numrows=2, numcols=2)

## isolating the systematics relative to cruise

In [None]:
# which_systematics = ['tunnel', 'almost', 'uncertain', 'noisy']
# starter = np.eye(M_classes)
# starter[1:] = np.zeros((M_classes-1, M_classes)) + starter[0, np.newaxis]
# starter = starter / np.sum(starter, axis=1)[:, np.newaxis]
# alt_mega_test = one_loop_naff(alt_mega_test, 'cruise', which_systematics, starter, dirichlet=True, fn='fig/test2_fromcmdm.pkl')
# # alt_mega_test = load_collector('fig/test_fromcmdm.pkl')
# # mega_test = one_loop_naff(mega_test, 'noisy', which_systematics, starter)

## summary plots

In [None]:
# def all_sys_only_plots(dataset, top_fn=''):
#     sys_only_plot(dataset['uncertain'], metric_info, all_shapes, fn=top_fn+'_uncertain')
#     sys_only_plot(dataset['perfect'], metric_info, all_shapes, fn=top_fn+'_perfect')
#     sys_only_plot(dataset['almost'], metric_info, all_shapes, fn=top_fn+'_almost')
#     sys_only_plot(dataset['noisy'], metric_info, all_shapes, fn=top_fn+'_noisy')

## Revamping this summary plot for new results

In [None]:
## Color map for 3 panel plot
fave_cmap = truncate_colormap(cmap, 0.4, 1.0)

In [None]:
possible_weights = np.linspace(0., 1., 11)
weight_vecs = np.array([[i] + [(1. - i) / (M_classes - 1.)] * (M_classes - 1) for i in possible_weights])
which_weight_schemes = {str(i): weight_vecs[i] for i in range(len(possible_weights))}

#fave_cmap = mpl.cm.winter_r
all_shapes = {'uncertain':'s', 'tunnel':'o', 'almost':'P', 'noisy':'X', 'subsumed': 'D', 'perfect': '*'}
all_shapes_bg = {'perfect':(3,0,0), 'almost':(4,0,0), 'noisy':(4,0,45)}#, 'cruise':(3,0,180)}
all_shapes_fg = {'uncertain':'.', 'almost':(4,2,0), 'noisy':(4,2,45), 'subsumed':(3,2,180)}#, 'tunnel':(3,2,0)

alt_mega_test = load_collector('fig/test2_fromcmdm.pkl')

In [None]:
minimal_data = {i:alt_mega_test[i] for i in alt_mega_test if (i!='delta' and i!='subsumed')}

In [None]:
upper_data = {i:minimal_data[i] for i in minimal_data if i=='perfect'}
middle_data = {i:minimal_data[i] for i in minimal_data if i!='perfect'}
lower_data = {i:minimal_data[i] for i in minimal_data if (i!='cruise' and i!='uncertain')}
for base in lower_data.keys():
    lower_data[base] = {i:lower_data[base][i] for i in lower_data[base] if i!='subsumed'}
for base in middle_data.keys():
    middle_data[base] = {i:middle_data[base][i] for i in middle_data[base] if i=='subsumed'}#(base!='perfect' or i!='subsumed')}
for base in upper_data.keys():
    upper_data[base] = {i:upper_data[base][i] for i in upper_data[base] if i=='subsumed'}

In [None]:
def complete_metric_plot(dataset, metric_names, xlims=None, ylims=None, fn='test_combo'):
    
#     systematics = dataset.keys()
#     xs = np.arange(len(systematics))
    fig, ax = plt.subplots(figsize=(5,5))
    
#     ax.text(.3, .95, r'color$\sim w_{m}$', 
#             verticalalignment='center', transform=ax.transAxes, 
#             fontsize=10)
#     ax.text(.3, .9, r'size$\sim w_{m}$', 
#             verticalalignment='center', transform=ax.transAxes, 
#             fontsize=10)
    fig.subplots_adjust(right=1.)
#     handles = []
#     seeds = []
    bases, additions = [], []
    for base in dataset.keys():# this is the base upon which a systematic was added
        if base != 'uncertain':
            bases.append(base)
            for added in dataset[base].keys():# this is the systematic added on
                additions.append(added)
                xvals = [dataset[base][added][metric_names[0]][str(i)][1] for i in range(11)]
                yvals = [dataset[base][added][metric_names[1]][str(i)][1] for i in range(11)]
                ax.scatter(xvals, yvals,
                  c=fave_cmap(np.flip(possible_weights, 0)),
                  s=100.,
                  marker=all_shapes_bg[base],
                  alpha=0.25)
                ax.scatter(xvals, yvals,
                  c=fave_cmap(possible_weights),
#                   s=100.*miniweights[wi][m],
                  marker=all_shapes_fg[added],
                  alpha=0.5)
#     for m in which_affected:
#         one_seed = np.log10(max(1., N_objects * float(minipops[m]))) / 10.
#         seeds.append(one_seed)
#         for si in range(len(systematics)):
#             s = systematics[si]
#             for i in range(len(which_weighted)):
#                 w = which_weighted[wi]
# #                 rel_wt = (np.average(miniweights[wi][m]) - miniweights[wi][m][m]) / np.average(miniweights[wi][m]) / 2. + 0.5
# #                 print(rel_wt)
#                 ax.scatter(dataset[s][str(m)][metric_names[0]][w], dataset[s][str(m)][metric_names[1]][w],
#                   c=mpl.cm.winter_r(possible_weights[i]),
#                   s=100.*miniweights[wi][m],
#                   marker=markerlist[si],
#                   alpha=0.25)
#     for si in range(len(systematics)):
#         ax.scatter(0., 0., c='k',
#                   marker=markerlist[si],
#                   alpha=0.25, label=systematics[si])
    bases = list(set(bases))
    additions = list(set(additions))
    b_handles, a_handles = [], []
    for base in bases:
        if base != 'uncertain':
            thing = plt.scatter(-1., -1., label=base, color='k', alpha=0.25, marker=all_shapes_bg[base])
            b_handles.append(thing)
    bg_legend = plt.legend(handles=b_handles, loc='upper left')
    for added in additions:
        thing = plt.scatter(-1., -1., label=added, color='k', alpha=0.75, marker=all_shapes_fg[added])
        a_handles.append(thing)
    lax = plt.gca().add_artist(bg_legend)
    plt.legend(handles=a_handles, loc='lower right', fontsize='large')

    ax.set_xlabel(metric_names[0])
    ax.set_ylabel(metric_names[1])
    if xlims is not None:
        ax.set_xlim(xlims[0], xlims[1])
    if ylims is not None:
        ax.set_ylim(ylims[0], ylims[1])
    ax.semilogx()
    ax.semilogy()
#     ax.legend(loc='lower right')
    
#     seeds = np.array(seeds)
#     print(seeds)
#     axins = inset_axes(ax,
#                     width="25%",  # width = 25% of parent_bbox width
#                     height="5%",  # height : 5%
#                     loc=2)
#     mpl.colorbar.ColorbarBase(axins, cmap=mpl.cm.winter_r,
#                                 norm=mpl.colors.Normalize(vmin=0., vmax=1.), 
#                               orientation='horizontal')
#     axins.xaxis.set_ticks_position("top")
#     axins.xaxis.set_ticks(np.flip(seeds, axis=0))
#     axins.xaxis.set_ticklabels([]) #rotation=270, fontsize=10)
    cax = fig.add_axes([1., 0.125, 0.05, 0.745])#([0.05, 0.80, 0.9, 0.15])
    cb = mpl.colorbar.ColorbarBase(cax, cmap=fave_cmap,
                                norm=mpl.colors.Normalize(vmin=0., vmax=1.),
                                orientation='vertical')
    cb.set_label('Fractional Weight', rotation=270, labelpad=15.)
    plt.subplots_adjust()
#     plt.show()
    plt.savefig('fig/'+fn+'.png', dpi=250)
    plt.show()
    plt.close()
    return

In [None]:
# complete_metric_plot(minimal_data, ['Brier', 'LogLoss'], fn='all', xlims=(1.e-2, 1.05), ylims=(1.e-2, 19.5))

In [None]:
# complete_metric_plot(upper_data, ['Brier', 'LogLoss'], fn='upper_zoom', xlims=(0., 1.05), ylims=(0., 19.5))

In [None]:
# complete_metric_plot(middle_data, ['Brier', 'LogLoss'], fn='middle_zoom', xlims=(0., 0.675), ylims=(0., 1.75))

In [None]:
# complete_metric_plot(lower_data, ['Brier', 'LogLoss'], fn='lower_zoom', xlims=(0., 0.275), ylims=(0., 0.75))

In [None]:
alt_mega_test = load_collector('fig/test2_fromcmdm.pkl')

In [None]:
for base in alt_mega_test.keys():
    for added in alt_mega_test[base].keys():
        numerator = alt_mega_test[base][added]['LogLoss']['10'][1] - alt_mega_test[base][added]['LogLoss']['0'][1]
        denominator = alt_mega_test[base][added]['Brier']['10'][1] - alt_mega_test[base][added]['Brier']['0'][1]
        minx = round(alt_mega_test[base][added]['Brier']['0'][1], 4)
        miny = round(alt_mega_test[base][added]['LogLoss']['0'][1], 4)
        maxx = round(alt_mega_test[base][added]['Brier']['10'][1], 4)
        maxy = round(alt_mega_test[base][added]['LogLoss']['10'][1], 4)
        minimum = (minx, miny)
        maximum = (maxx, maxy)
        print(base+' plus '+added)
        print('slope = '+str(round(numerator / denominator, 4)))
        print('extrema = '+str((minimum, maximum)))

In [None]:
order = ['upper', 'middle', 'lower']
lims = {}
lims['upper'] = (1.05, 19.5)
lims['middle'] = (0.675, 1.75)
lims['lower'] = (0.275, 0.75)
lims['all'] = (1.1, 20.)
boxcols = {}
boxcols['lower'] = '#D179F2'
boxcols['middle'] = '#F27979'
boxcols['upper'] = '#FFE17F'
boxcols['all'] = 'None'
multiscaledata = {}
multiscaledata['all'] = minimal_data
multiscaledata['upper'] = upper_data
multiscaledata['middle'] = middle_data
multiscaledata['lower'] = lower_data
titles = {}
titles['upper'] = 'A) subsumed systematic on perfect baseline'
titles['middle'] = 'B) subsumed systematic on non-perfect baselines'
titles['lower'] = 'C) non-subsumed systematics on all baselines'

In [None]:
def paneled_metric_plot(full_dataset, metric_names, order, lims, boxcols, titles, fn='multipanel_res'):

    from mpl_toolkits.axes_grid1.inset_locator import inset_axes
    from mpl_toolkits.axes_grid1.inset_locator import mark_inset
    import matplotlib.patches as patches
    import matplotlib.gridspec as gridspec
    
    gs = gridspec.GridSpec(3, 2)

    fig = pylab.figure(figsize=(12,12))
    bigAxes = pylab.axes(frameon=False)     # hide frame
    bigAxes.set_xticks([])                        # don't want to see any ticks on this axis
    bigAxes.set_yticks([])
    bigAxes.set_xlabel(metric_names[0], fontsize=20, labelpad=25)
    bigAxes.set_ylabel(metric_names[1], fontsize=20, labelpad=30)
    
    axupper = fig.add_subplot(gs[0, 0]) #fig.add_subplot(131)
    axmiddle = fig.add_subplot(gs[0, 1])
    axlower = fig.add_subplot(gs[1:, :])

#    axmiddle = inset_axes(axupper, width="100%", height="100%",
#                   bbox_to_anchor=(1.10, 0., 1., 1.),
#                   bbox_transform=axupper.transAxes, loc=2, borderpad=0)
#    axmiddle.tick_params(left=True, right=False, labelleft=True, labelright=False)
#    mark_inset(axupper, axmiddle, loc1=2, loc2=3, fc="none", ec="0.5")
    
#    axlower = inset_axes(axmiddle, width="100%", height="100%",
#                   bbox_to_anchor=(1.10, 0., 1., 1.),
#                   bbox_transform=axmiddle.transAxes, loc=2, borderpad=0)
#    axlower.tick_params(left=True, right=False, labelleft=True, labelright=False)
#    mark_inset(axmiddle, axlower, loc1=2, loc2=3, fc="none", ec="0.5")
    
    # make a list of these axes
    # copy the loop from below
    
    allmyaxes = [axupper, axmiddle, axlower]
    
    for i in range(3):
        scale = order[i]
        ax = allmyaxes[i]

        dataset = full_dataset[scale]
        xlims = (-0.05 * lims[scale][0], lims[scale][0])
        ylims = (-0.05 * lims[scale][1], lims[scale][1])

        bases, additions = [], []
        for base in dataset.keys():# this is the base upon which a systematic was added
            if base != 'uncertain':
                bases.append(base + ' as baseline')
                for added in dataset[base].keys():# this is the systematic added on
                    additions.append(added + ' as systematic')
                    xvals = [dataset[base][added][metric_names[0]][str(j)][1] for j in range(11)]
                    yvals = [dataset[base][added][metric_names[1]][str(j)][1] for j in range(11)]
                    ax.scatter(xvals, yvals,
                      c=fave_cmap(np.flip(possible_weights, 0)), 
                      s=350.,
                      marker=all_shapes_bg[base],
                      alpha=0.25)
                    ax.scatter(xvals, yvals,
                      c=fave_cmap(possible_weights),
                      s=350.,
#                   s=100.*miniweights[wi][m],
                      marker=all_shapes_fg[added],
                      alpha=0.5)
        ax.set_title(titles[order[i]], fontsize=17.5)
        bases = list(set(bases))
        additions = list(set(additions))
        all_labels = bases+additions
        b_handles, a_handles = [], []
        for base in bases:
            thing = ax.scatter(-1., -1., label=base, color='k', alpha=0.25, marker=all_shapes_bg[base[:-12]])
            b_handles.append(thing)
#         bg_legend = ax.legend(handles=b_handles, loc='upper left', fontsize='large')
        for added in additions:
            thing = ax.scatter(-1., -1., label=added, color='k', alpha=0.75, marker=all_shapes_fg[added[:-14]])
            a_handles.append(thing)
#         lax = plt.gca().add_artist(bg_legend)
#         fg_legend = ax.legend(handles=a_handles, loc='lower right', fontsize='large')
        if i != 2:
            ax.legend(handles=b_handles+a_handles, loc='upper left', fontsize='x-large', markerscale=2)
        else:
#            axins = inset_axes(ax,
#                    width="2%",  # width = 25% of parent_bbox width
#                    height="100%" , #"90%",  # height : 5%
#                    loc=4)
#            mpl.colorbar.ColorbarBase(axins, cmap=fave_cmap,
#                                norm=mpl.colors.Normalize(vmin=0., vmax=1.))
#             axins.xaxis.set_ticks_position("left")
#     axins.xaxis.set_ticks(np.flip(seeds, axis=0))
#             axins.xaxis.set_ticklabels([]) #rotation=270, fontsize=10)

            cax = fig.add_axes([1., 0.1, 0.025, 0.85])
            cb = mpl.colorbar.ColorbarBase(cax, cmap=fave_cmap,
                                 norm=mpl.colors.Normalize(vmin=0., vmax=1.),
                                 orientation='vertical')
            cax.set_ylabel('Fractional Weight', rotation=270, labelpad=15., fontsize=20.)
            ax.legend(handles=b_handles+a_handles, loc='lower right', fontsize='xx-large', markerscale=2.5)
        if xlims is not None:
            ax.set_xlim(xlims[0], xlims[1])
        if ylims is not None:
            ax.set_ylim(ylims[0], ylims[1])
        if i<2:
            next_scale = order[i+1]
            next_xlims = (-0.05 * lims[next_scale][0], lims[next_scale][0])
            next_ylims = (-0.05 * lims[next_scale][1], lims[next_scale][1])
            rect = patches.Rectangle((next_xlims[0],next_ylims[0]),next_xlims[1],next_ylims[1],linewidth=3,edgecolor='black',alpha=0.09,facecolor='none')
            ax.add_patch(rect)
            ax.text(next_xlims[1],next_ylims[1], titles[next_scale][0])
            
    plt.subplots_adjust()

    fig.tight_layout()
#    fig.subplots_adjust(hspace=0.01, wspace=0.01)
    pylab.savefig('fig/'+fn+'.png', dpi=250)
    pylab.show()
    return

In [None]:
paneled_metric_plot(multiscaledata, ['Brier', 'LogLoss'], order, lims, boxcols, titles, fn='multipanel_res')

In [None]:
def paneled_metric_plot(full_dataset, metric_names, order, lims, boxcols, fn='multipanel_res'):
    
    fig = pylab.figure(figsize=(10, 10))
#     fig = pylab.figure(figsize=(10.2, 10.))
    bigAxes = pylab.axes(frameon=False)     # hide frame
    bigAxes.set_xticks([])                        # don't want to see any ticks on this axis
    bigAxes.set_yticks([])
    bigAxes.set_xlabel(metric_names[0], fontsize=20, labelpad=25)
    bigAxes.set_ylabel(metric_names[1], fontsize=20, labelpad=25)
    
    for i in range(4):
        scale = order[i]
        ax = fig.add_subplot(2, 2, i+1)
    
        dataset = full_dataset[scale]
        
        xlims = (0, lims[scale][0])
        ylims = (0, lims[scale][1])
        
        for o in order[1:]:
            if lims[scale][0] > lims[o][0] and lims[scale][1] > lims[o][1]:
                ax.fill_between([0., lims[o][0]], 0., lims[o][1], color=boxcols[o], alpha=0.2)
    
#     systematics = dataset.keys()
#     xs = np.arange(len(systematics))
#     fig, ax = plt.subplots(figsize=(5,5))
    
#     ax.text(.3, .95, r'color$\sim w_{m}$', 
#             verticalalignment='center', transform=ax.transAxes, 
#             fontsize=10)
#     ax.text(.3, .9, r'size$\sim w_{m}$', 
#             verticalalignment='center', transform=ax.transAxes, 
#             fontsize=10)
#     fig.subplots_adjust(right=1.)
#     handles = []
#     seeds = []
        bases, additions = [], []
        for base in dataset.keys():# this is the base upon which a systematic was added
            if base != 'uncertain':
                bases.append(base)
                for added in dataset[base].keys():# this is the systematic added on
                    additions.append(added)
                    xvals = [dataset[base][added][metric_names[0]][str(i)][1] for i in range(11)]
                    yvals = [dataset[base][added][metric_names[1]][str(i)][1] for i in range(11)]
                    ax.scatter(xvals, yvals,
                      c=fave_cmap(np.flip(possible_weights)),
                      s=100.,
                      marker=all_shapes_bg[base],
                      alpha=0.25)
                    ax.scatter(xvals, yvals,
                      c=fave_cmap(possible_weights),
#                   s=100.*miniweights[wi][m],
                      marker=all_shapes_fg[added],
                      alpha=0.5)
#     for m in which_affected:
#         one_seed = np.log10(max(1., N_objects * float(minipops[m]))) / 10.
#         seeds.append(one_seed)
#         for si in range(len(systematics)):
#             s = systematics[si]
#             for i in range(len(which_weighted)):
#                 w = which_weighted[wi]
# #                 rel_wt = (np.average(miniweights[wi][m]) - miniweights[wi][m][m]) / np.average(miniweights[wi][m]) / 2. + 0.5
# #                 print(rel_wt)
#                 ax.scatter(dataset[s][str(m)][metric_names[0]][w], dataset[s][str(m)][metric_names[1]][w],
#                   c=mpl.cm.winter_r(possible_weights[i]),
#                   s=100.*miniweights[wi][m],
#                   marker=markerlist[si],
#                   alpha=0.25)
#     for si in range(len(systematics)):
#         ax.scatter(0., 0., c='k',
#                   marker=markerlist[si],
#                   alpha=0.25, label=systematics[si])
        bases = list(set(bases))
        additions = list(set(additions))
        b_handles, a_handles = [], []
        for base in bases:
            thing = plt.scatter(-1., -1., label=base, color='k', alpha=0.25, marker=all_shapes_bg[base])
            b_handles.append(thing)
        bg_legend = plt.legend(handles=b_handles, loc='upper left', fontsize='large')
        if i > 0:
            frame = bg_legend.get_frame()
            frame.set_alpha(0.2)
            frame.set_facecolor(boxcols[scale])
        for added in additions:
            thing = plt.scatter(-1., -1., label=added, color='k', alpha=0.75, marker=all_shapes_fg[added])
            a_handles.append(thing)
        lax = plt.gca().add_artist(bg_legend)
        fg_legend = plt.legend(handles=a_handles, loc='lower right', fontsize='large')
        if i > 0:
            frame = fg_legend.get_frame()
            frame.set_alpha(0.2)
            frame.set_facecolor(boxcols[scale])
#         ax.set_xlabel(metric_names[0])
#         ax.set_ylabel(metric_names[1])
        if xlims is not None:
            ax.set_xlim(xlims[0], xlims[1])
        if ylims is not None:
            ax.set_ylim(ylims[0], ylims[1])
        if i%2 == 1:
            ax.yaxis.tick_right()
        if i < 2:
            ax.xaxis.tick_top()
        if i == 1: 
#             ax.set_yticklabels([])
            
#     ax.semilogx()
#     ax.semilogy()
#     ax.legend(loc='lower right')
    
#     seeds = np.array(seeds)
#     print(seeds)
            axins = inset_axes(ax,
                    width="5%",  # width = 25% of parent_bbox width
                    height="75%",  # height : 5%
                    loc=6)
            mpl.colorbar.ColorbarBase(axins, cmap=fave_cmap,
                                norm=mpl.colors.Normalize(vmin=0., vmax=1.))
#             axins.xaxis.set_ticks_position("left")
#     axins.xaxis.set_ticks(np.flip(seeds, axis=0))
            axins.xaxis.set_ticklabels([]) #rotation=270, fontsize=10)

#     cax = fig.add_axes([1., 0.125, 0.05, 0.745])#([0.05, 0.80, 0.9, 0.15])
#     cb = mpl.colorbar.ColorbarBase(cax, cmap=fave_cmap,
#                                 norm=mpl.colors.Normalize(vmin=0., vmax=1.),
#                                 orientation='vertical')
            axins.set_ylabel('Fractional Weight', rotation=270, labelpad=15., fontsize=15.)
    plt.subplots_adjust()

#     fig.tight_layout()
    fig.subplots_adjust(hspace=0.01, wspace=0.01)
    pylab.savefig('fig/'+fn+'.png', dpi=250)
    pylab.show()
    return

# Propagating Dirichlet change: not done past here

# same thing, but as a function of the weight on that class rather than number of classes affected

note, specifying class 6 because of weird behavior when there are no true class members

In [None]:
relwttest = {}
abswttest = {}
only_weight = 0

weight_choices = {}
big_W = {}

weight_choices['rel'] = np.arange(0., 2.25, 0.25)
big_W['rel'] = len(weight_choices['rel'])

weight_choices['abs'] = np.arange(0., 1.1, 0.1)
big_W['abs'] = len(weight_choices['abs'])

wts = {}
wts['rel'] = np.ones((big_W['rel'], M_classes))
for wi in range(big_W['rel']):
    wts['rel'][wi][only_weight] = weight_choices['rel'][wi]
wts['rel'] = wts['rel'] / np.sum(wts['rel'], axis=1)[:, np.newaxis]

wts['abs'] = np.empty((big_W['abs'], M_classes))
for wi in range(big_W['abs']):
    wts['abs'][wi] = (1. - weight_choices['abs'][wi]) / (M_classes - 1.) * np.ones(M_classes)
    wts['abs'][wi][only_weight] = weight_choices['abs'][wi]
wts['abs'] = wts['abs'] / np.sum(wts['abs'], axis=1)[:, np.newaxis]

which_baselines = {'uncertain': np.ones((M_classes, M_classes)), 
                   'perfect': np.eye(M_classes), 
                   'almost': 0.5 * np.ones((M_classes, M_classes)) + 1.5 * np.eye(M_classes), 
                   'noisy': 0.5 * np.ones((M_classes, M_classes)) + 0.5 * np.eye(M_classes)}

In [None]:
def one_loop_wt(collector, name, which_systematics, style='rel'):# mega_test, 'reltoagnostic'
    systest = {}
    starter = which_systematics[name]
    cm = make_cm(starter, only_weight, 'tunnel')
    probs = classifier.classify(cm, minitruth, other=False)
    systest['cm'] = starter
    systest['probs'] = probs
    for met in which_metrics:
        systest[met] = []
    for wi in range(big_W[style]):
        w = weight_choices[style][wi]
#         systest['weights'].append(wts[wi])
        for met in which_metrics:
            D = met_evals[met]
            metval = D.evaluate(probs, minitruth, averaging=wts[style][wi])
            systest[met].append(metval)
#                 print(s+' on '+name+' with '+str(m)+': '+met+' = '+str(metval))
    collector[name] = systest
    return collector

In [None]:
abswttest = one_loop_wt(abswttest, 'uncertain', which_baselines, style='abs')
abswttest = one_loop_wt(abswttest, 'perfect', which_baselines, style='abs')
abswttest = one_loop_wt(abswttest, 'almost', which_baselines, style='abs')
abswttest = one_loop_wt(abswttest, 'noisy', which_baselines, style='abs')

In [None]:
relwttest = one_loop_wt(relwttest, 'uncertain', which_baselines, style='rel')
relwttest = one_loop_wt(relwttest, 'perfect', which_baselines, style='rel')
relwttest = one_loop_wt(relwttest, 'almost', which_baselines, style='rel')
relwttest = one_loop_wt(relwttest, 'noisy', which_baselines, style='rel')

In [None]:
# connect lines along systematic, weighting, and affected class
def wt_only_plot(dataset, metric_info, shapes, style='rel'):
    baselines = list(dataset.keys())
    fig = pylab.figure(figsize=(10.2, 10.))
    bigAxes = pylab.axes(frameon=False)     # hide frame
    bigAxes.set_xticks([])                        # don't want to see any ticks on this axis
    bigAxes.set_yticks([])
    bigAxesP = bigAxes.twinx()
    bigAxesP.set_yticks([])
    plot_names = list(metric_info.keys())
    bigAxes.set_ylabel(plot_names[0], fontsize=20, labelpad=25, color=metric_info[plot_names[0]])
    bigAxes.set_xlabel(style+r'. weight on class', fontsize=20, labelpad=25)
    bigAxesP.set_ylabel(plot_names[1], rotation=270, fontsize=20, 
                        labelpad=50, color=metric_info[plot_names[1]])
    bigAxes.set_title('tunnel on baselines')
    for si in range(len(baselines)):
        s = baselines[si]
        ax = fig.add_subplot(221+si)
        axp = ax.twinx()
        ax.set_xticks(wts[style].T[0])
        ax.set_yticks([0.5, 1.5, 2.5, 3.5])
        axp.set_yticks([0.02, 0.04, 0.06, 0.08, 0.1])
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        axp.set_yticklabels([])
        if si > 1:
            wt_labels = [str(w)[:3] for w in weight_choices[style]]
            ax.set_xticklabels(wt_labels)
        if si%2 == 1:
            axp.set_yticklabels([0.02, 0.04, 0.06, 0.08, 0.1])
        else: 
            ax.set_yticklabels([0.5, 1.5, 2.5, 3.5])
            
        position = ax.get_position()

        position.x0 += 0.01
        position.y0 += 0.02
        position.x1 += 0.01
        position.y1 += 0.02
        ax.set_position(position)
        
        ax.text(.5, .9, s, 
                horizontalalignment='center',
                transform=ax.transAxes, fontsize=20)
        ax.plot(wts[style].T[0], dataset[s][plot_names[0]],
                  marker=shapes[s],
                  alpha=0.5, c=metric_info[plot_names[0]])
        axp.plot(wts[style].T[0], dataset[s][plot_names[1]],
                  marker=shapes[s],
                  alpha=0.5, c=metric_info[plot_names[1]])
        ax.set_ylim(-0.05, 2.75)
        axp.set_ylim(-0.001, 0.081)
#         ax.set_xlim(-0.25, 2.25)
    
    fig.tight_layout()
    fig.subplots_adjust(hspace=0.01, wspace=0.01)
    pylab.savefig('fig/tunnel_'+style+'_weights_on_baselines.png', dpi=250)
    pylab.show()
    return

In [None]:
# wt_only_plot(abswttest, metric_info, all_shapes, style='abs')

In [None]:
# wt_only_plot(relwttest, metric_info, all_shapes, style='rel')

# probably not using below here

In [None]:
def make_wv(m, wt_kw):
#     wv = np.ones(M_classes)
    if wt_kw == 'by pop':
        wv = minipops
    if wt_kw == 'flat': 
        wv = np.ones(M_classes)
    if wt_kw == 'up':
        wv = np.ones(M_classes)
        wv[m] += 1.
    if wt_kw == 'down':
        wv = np.ones(M_classes)
        wv[m] /= 2.
    wv = wv / np.sum(wv)
    assert(np.isclose(np.sum(wv), 1.))
    return wv

In [None]:
which_weighted = ['by pop', 'flat', 'up', 'down']
wt_colors = [mpl.cm.winter_r(minipops), [mpl.cm.autumn_r(0.5)]*M_classes, [mpl.cm.autumn_r(1.)]*M_classes, [mpl.cm.autumn_r(0.)]*M_classes]
wt_const = 1. / float(M_classes)
    
which_metrics = ['Brier', 'LogLoss']

In [None]:
miniweights = np.empty((len(which_weighted), M_classes, M_classes))
for m in which_affected:
    for wi in range(len(which_weighted)):
        w = which_weighted[wi]
        miniweights[wi][m] = make_wv(m, w)

In [None]:
# warning: slow!!!
for s in which_systematics:
    minitest[s] = {}
    for m in which_affected:
        minitest[s][str(m)] = {}
        minitest[s][str(m)]['cm'] = make_cm(starter, m, s)
        minitest[s][str(m)]['probs'] = classifier.classify(minitest[s][str(m)]['cm'], minitruth, 
                                                           delta=minidelta, other=False)
        minitest[s][str(m)]['results'] = {}
        for met in which_metrics:
            minitest[s][str(m)][met] = {}
            for wi in range(len(which_weighted)):
                w = which_weighted[wi]
                D = getattr(proclam.metrics, met)()
                minitest[s][str(m)][met][w] = D.evaluate(minitest[s][str(m)]['probs'], minitruth, 
                                                         averaging=miniweights[wi][m])
    print('finished '+s)
    cpkl.dump(minitest[s], open(s+'_data.pkl', 'wb'))
    print('saved '+s)

In [None]:
for s in which_systematics:
    wttest[s] = {}
    for met in which_metrics:
        wttest[s][met] = np.empty(np.shape(weight_choices))
    temp_cm = make_cm(np.eye(M_classes), 6, s)
    wttest[s]['cm'] = temp_cm
#     print(wttest[s]['cm'])
    probs = classifier.classify(wttest[s]['cm'], minitruth, other=False)
    wttest[s]['probs'] = probs# systest[s]['probs'][1]
    for wi in range(len(weight_choices)):
        w = wts[wi]
        for met in which_metrics:
            D = met_evals[met]
            metval = D.evaluate(wttest[s]['probs'], minitruth, averaging=w)
            wttest[s][met][wi] = metval
    print(s)

In [None]:
def cm_mega_plot(info_dict, systematics, m, fn='', numrows=2, numcols=4):
    fig = pylab.figure(figsize=(5.1, 5.1))
    bigAxes = pylab.axes(frameon=False)     # hide frame
    bigAxes.set_xticks([])                        # don't want to see any ticks on this axis
    bigAxes.set_yticks([])
    bigAxes.set_title(fn+' with '+str(m)+' affected')
    grid = ImageGrid(fig, 111,          # as in plt.subplot(111)
                 nrows_ncols=(numrows,numcols),
                 axes_pad=0.,
                 share_all=True,
                 cbar_location="right",
                 cbar_mode="single",
                 cbar_size="5%",
                 cbar_pad=0.01,
                 )
    gridnos = np.arange(numrows * numcols).reshape((numcols, numrows)).T.flatten()
    for j in range(numcols * numrows):
        i = gridnos[j]
        ax = grid[j]#fig.add_subplot(numrows,numcols,i+1)#, frameon=False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        position = ax.get_position()
#         print(position)
        position.x0 += 0.01
        position.y0 += 0.02
        position.x1 += 0.01
        position.y1 += 0.02
        ax.set_position(position)
#         print(position)
        testname = systematics[j]
        
        im = ax.imshow(info_dict[testname]['cm'][m], vmin=0., vmax=1., cmap=fave_cmap)
        ax.text(.5,.9,testname,horizontalalignment='center',transform=ax.transAxes, fontsize=16)
#     pylab.colorbar()
#     fig.subplots_adjust(right=0.5)
#     cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
#     fig.colorbar(im, cax=cbar_ax)
    ax.cax.colorbar(im)
    ax.cax.toggle_label(True)
    bigAxes.set_ylabel(r'true class', fontsize=20)
    bigAxes.set_xlabel(r'predicted class', fontsize=20)
    pylab.tight_layout()
    pylab.savefig('fig/systest_'+fn+'_cm.png', bbox_inches='tight', pad_inches=0)

In [None]:
# connect lines along systematic, weighting, and affected class
def wt_only_plot(dataset, metric_names, shapes):
    
    systematics = dataset.keys()
    fig = pylab.figure(figsize=(5, 5))
    ax = fig.add_subplot(111)
    axp = ax.twinx()
    for si in range(len(systematics)):
        s = systematics[si]
        axp.plot(weight_choices, dataset[s][metric_names[0]], marker=markerlist[si], color='g', alpha=0.25)
        ax.plot(weight_choices, dataset[s][metric_names[1]], marker=markerlist[si], color='b', alpha=0.25)
        ax.scatter(-1., -1., c='k',
                  marker=markerlist[si],
                  alpha=0.25, label=systematics[si])
    ax.legend(loc='upper left')
    ax.set_xlabel('weight relative to unaffected classes')
    ax.set_ylabel(metric_names[1])
    axp.set_ylabel(metric_names[0])
    ax.set_xlim(-0.25, 2.25)
    ax.set_ylim(1., 1.7)
    axp.set_ylim(0.044, 0.055)
    plt.show()
#     bigAxes = pylab.axes(frameon=False)     # hide frame
#     bigAxes.set_xticks([])                        # don't want to see any ticks on this axis
#     bigAxes.set_yticks([])
#     bigAxesP = bigAxes.twinx()
#     bigAxesP.set_yticks([])
#     bigAxes.set_ylabel(metric_names[1], fontsize=20, labelpad=25)
#     bigAxes.set_xlabel(r'weight relative to other classes', fontsize=20, labelpad=25)
#     bigAxesP.set_ylabel(metric_names[0], rotation=270, fontsize=20, 
#                         labelpad=50)

#     for si in range(len(systematics)):
#         s = systematics[si]
#         ax = fig.add_subplot(221+si)
#         axp = ax.twinx()
#         ax.set_xticks(weight_choices)
# #         axp.set_yticks([0.04, 0.05, 0.06, 0.07, 0.08])
# #         ax.set_yticks([1., 1.5, 2., 2.5, 3., 3.5])
# #         ax.set_xticklabels([])
# #         ax.set_yticklabels([])
# #         axp.set_yticklabels([])
#         if si > 1:
#             ax.set_xticklabels(weight_choices)
# #         if si%2 == 1:
# #             axp.set_yticklabels([0.04, 0.05, 0.06, 0.07, 0.08])
# #         else: 
# #             ax.set_yticklabels([1., 1.5, 2., 2.5, 3., 3.5])
            
#         position = ax.get_position()

#         position.x0 += 0.01
#         position.y0 += 0.02
#         position.x1 += 0.01
#         position.y1 += 0.02
#         ax.set_position(position)

#         ax.text(.5, .9, s, 
#                 horizontalalignment='center',
#                 transform=ax.transAxes, fontsize=20)
#         ax.plot(weight_choices, dataset[s][metric_names[1]],
#                   marker=markerlist[si],
#                   alpha=0.5, c='b')
#         axp.plot(weight_choices, dataset[s][metric_names[0]],
#                   marker=markerlist[si],
#                   alpha=0.5, c='g')
# #         ax.set_ylim(0.95, 3.55)
# #         axp.set_ylim(0.039, 0.081)
# #         ax.set_xlim(-0., 2.)
    
#     fig.tight_layout()
#     fig.subplots_adjust(hspace=0.01, wspace=0.01)
#     pylab.savefig('fig/weighting_only.png', dpi=250)
#     pylab.show()
    return

In [None]:
wt_only_plot(wttest, which_metrics, markerlist)

## first, when the systematics are added to perfect classifier

In [None]:
classifier = proclam.classifiers.FromCM()

minitest = {}
which_affected = range(M_classes)
systematic_info = {'uncertain':'s', 'almost':'P', 'noisy':'X', 'subsumed': 'D'}#{'uncertain':'s', 'almost':'P', 'noisy':'X', 'cruise':'d', 'subsumed':(3,0,0)}
which_systematics = systematic_info.keys()#['agnostic', 'tunnel', 'almost', 'noisy', 'tunnel', 'cruise', 'subsumed']
markerlist = systematic_info.values()#['', '', 'd', 'o', 'd', 's', (3,0,0)]

minidelta = 0.1
starter = np.eye(M_classes) + 1.e-8# + minidelta * np.ones((M_classes, M_classes))
# starter = starter / np.sum(starter, axis=1)[:, np.newaxis]

In [None]:
which_weighted = ['by pop']#['by pop', 'flat', 'up', 'down']
wt_colors = [mpl.cm.winter_r(minipops)]#[mpl.cm.winter_r(minipops), [mpl.cm.autumn_r(0.5)]*M_classes, [mpl.cm.autumn_r(1.)]*M_classes, [mpl.cm.autumn_r(0.)]*M_classes]
wt_const = 1. / float(M_classes)
    
which_metrics = ['Brier', 'LogLoss']

In [None]:
miniweights = np.empty((len(which_weighted), M_classes, M_classes))
for m in which_affected:
    for wi in range(len(which_weighted)):
        w = which_weighted[wi]
        miniweights[wi][m] = make_wv(m, w)

In [None]:
# warning: slow!!!
for s in which_systematics:
    minitest[s] = {}
    for m in which_affected:
        minitest[s][str(m)] = {}
        minitest[s][str(m)]['cm'] = make_cm(starter, m, s)
        minitest[s][str(m)]['probs'] = classifier.classify(minitest[s][str(m)]['cm'], minitruth, 
                                                           delta=minidelta, other=False)
        minitest[s][str(m)]['results'] = {}
        for met in which_metrics:
            minitest[s][str(m)][met] = {}
            for wi in range(len(which_weighted)):
                w = which_weighted[wi]
                D = getattr(proclam.metrics, met)()
                minitest[s][str(m)][met][w] = D.evaluate(minitest[s][str(m)]['probs'], minitruth, 
                                                         averaging=miniweights[wi][m])
    print('finished '+s)
    cpkl.dump(minitest[s], open('180718_'+s+'_data.pkl', 'wb'))
    print('saved '+s)

In [None]:
# warning: slow!
goodtest = {}
for s in which_systematics:
    goodtest[s] = cpkl.load(open(s+'_data.pkl', 'rb'))
#     minitest[s] = cpkl.load(open('almost_'+s+'_data.pkl', 'rb'))
    print('loaded '+s)

In [None]:
# connect lines along systematic, weighting, and affected class
def complete_metric_plot(dataset, metric_names, shapes, fn=''):
    
    systematics = dataset.keys()
    xs = np.arange(len(systematics))
    fig, ax = plt.subplots(figsize=(5,5))
    
    ax.text(0.01, 0.8, r'size$\sim\log[N_{m}/N]$', 
            verticalalignment='center', transform=ax.transAxes, 
            fontsize=10)
    ax.text(0.01, 0.85, r'color$\sim\exp[w_{m}]$', 
            verticalalignment='center', transform=ax.transAxes, 
            fontsize=10)
    fig.subplots_adjust(right=1.)
#     handles = []
#     seeds = []
    for m in which_affected:
#         one_seed = np.log10(max(1., N_objects * float(minipops[m]))) / 10.
#         seeds.append(one_seed)
        for si in range(len(systematics)):
            s = systematics[si]
            for wi in range(len(which_weighted)):
                w = which_weighted[wi]
#                 print(miniweights[wi][m][m])
#                 rel_wt = (((miniweights[wi][m][m] - wt_const) / wt_const) + 1.) / 2.
#                 print(rel_wt)
                ax.scatter(dataset[s][str(m)][metric_names[0]][w], dataset[s][str(m)][metric_names[1]][w],
                  c=wt_colors[wi][m],
                  s=50*minipops[m] + 10.,#N_objects**miniweights[wi][m],#rel_wt,#wt_colors[wi][m],#
                  marker=markerlist[si],
                  alpha=0.25)
    for si in range(len(systematics)):
        ax.scatter(0., 0., c='k',
                  marker=markerlist[si],
                  alpha=0.25, label=systematics[si])
    
    ax.set_xlabel(metric_names[0])
    ax.set_ylabel(metric_names[1])
    ax.set_ylim(0.9, 3.5)
    ax.set_xlim(0.035, 0.09)
    ax.legend(loc='lower right')
    
#     seeds = np.array(seeds)
#     print(seeds)
    popaxins = inset_axes(ax,
                    width="25%",  # width = 25% of parent_bbox width
                    height="5%",  # height : 5%
                    loc=2)#bbox_to_anchor=(0.35, 0.8))
    mpl.colorbar.ColorbarBase(popaxins, cmap=fave_cmap,
                                norm=mpl.colors.Normalize(vmin=0., vmax=1.), 
                              orientation='horizontal')
#     popaxins.xaxis.set_ticks_position("top")
#     popaxins.set_title(r'$w_{m}\sim N_{m}/N$', fontsize=10)
    popaxins.xaxis.set_ticks(np.concatenate((np.concatenate((np.zeros(1), np.flip(minipops, axis=0))), np.ones(1))))
    popaxins.xaxis.set_ticklabels(['0']+[]*M_classes+['1'], fontsize=10) #rotation=270, fontsize=10)
    
#     linaxins = inset_axes(ax,
#                     width="25%",  # width = 25% of parent_bbox width
#                     height="5%",  # height : 5%
#                     loc=6)#bbox_to_anchor=(0.35, 0.55))
# #     linaxins = axins.add_axes([0., 1.])
#     mpl.colorbar.ColorbarBase(linaxins, cmap=mpl.cm.autumn_r,
#                                 norm=mpl.colors.Normalize(vmin=0., vmax=1.),#vmin=0.04, vmax=0.14285714), 
#                               orientation='horizontal')
# #     linaxins.xaxis.set_ticks_position("top")
#     linaxins.set_title('flat/up/down weights', fontsize=10)
#     linaxins.xaxis.set_ticks([0., 0.5, 1.])
#     linaxins.xaxis.set_ticklabels(['downweight', 'flat', 'upweight'], 
#                                   rotation=270, fontsize=10)
    
#     plt.show()
    plt.savefig('fig/180718'+fn+'all_effects_isolated.png', dpi=250)
#     plt.close()
    return

In [None]:
# complete_metric_plot(goodtest, which_metrics, markerlist, fn='almost')
complete_metric_plot(minitest, which_metrics, markerlist, fn='perfect')

In [None]:
print(20 * (1.+ minipops[5]))

## when the systematics are added to agnostic classifier

In [None]:
starter = np.ones((M_classes, M_classes))
starter = starter / np.sum(starter, axis=1)[:, np.newaxis]

In [None]:
# # warning: slow!!!
# for s in which_systematics:
#     minitest[s] = {}
#     for m in which_affected:
#         minitest[s][str(m)] = {}
#         minitest[s][str(m)]['cm'] = make_cm(starter, m, s)
#         minitest[s][str(m)]['probs'] = classifier.classify(minitest[s][str(m)]['cm'], minitruth, 
#                                                            delta=minidelta, other=False)
#         minitest[s][str(m)]['results'] = {}
#         for met in which_metrics:
#             minitest[s][str(m)][met] = {}
#             for wi in range(len(which_weighted)):
#                 w = which_weighted[wi]
#                 D = getattr(proclam.metrics, met)()
#                 minitest[s][str(m)][met][w] = D.evaluate(minitest[s][str(m)]['probs'], minitruth, 
#                                                          averaging=miniweights[wi][m])
#     print('finished '+s)
#     cpkl.dump(minitest[s], open('agnostic_'+s+'_data.pkl', 'wb'))
#     print('saved '+s)

In [None]:
# warning: slow!
badtest = {}
for s in which_systematics:
#     minitest[s] = cpkl.load(open(s+'_data.pkl', 'rb'))
    badtest[s] = cpkl.load(open('agnostic_'+s+'_data.pkl', 'rb'))
    print('loaded '+s)

In [None]:
complete_metric_plot(badtest, which_metrics, markerlist, fn='agnostic')

Would like to do this many times to generate error bars

In [None]:
# cells with a tag of "hideme" will not appear in html resulting from:
# jupyter nbconvert desc_note/main.ipynb --TagRemovePreprocessor.remove_cell_tags='["hideme"]'
# jupyter nbconvert desc_note/main.ipynb --TagRemovePreprocessor.remove_input_tags='["hidein"]'
