In [5]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
!wandb login 9676e3cc95066e4865586082971f2653245f09b4

Appending key for api.wandb.ai to your netrc file: /Users/guydavidson/.netrc
[32mSuccessfully logged in to Weights & Biases![0m


In [10]:
import numpy as np
import pandas as pd
import scipy
from scipy import stats
from scipy.special import factorial

from mpl_toolkits.mplot3d import Axes3D
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import patches
from matplotlib import path as mpath

import pickle
import tabulate
import wandb
from collections import namedtuple

import meta_learning_data_analysis as analysis
import meta_learning_analysis_plots as plots

In [11]:
np.set_printoptions(linewidth=160)

In [13]:
cache = analysis.refresh_cache()
print(cache.keys())

dict_keys(['six_replications_analyses', 'query_mod_replications'])


In [14]:
six_replications_analyses = cache['six_replications_analyses']
query_mod_replications = cache['query_mod_replications']

# No-query modulation, results by dimension

## Plot the number of examples by dimension

In [None]:
ylim = (1000, 520000)

plot_processed_results(first_replication_analyses.color.examples, 'Color 10-run average', ylim)
plot_processed_results(first_replication_analyses.shape.examples, 'Shape 10-run average', ylim)
plot_processed_results(first_replication_analyses.texture.examples, 'Material 10-run average', ylim)

In [None]:
ylim = (1000, 700000)

plot_processed_results(six_replications_analyses.color.examples, 'Color 60-run average', ylim)
plot_processed_results(six_replications_analyses.shape.examples, 'Shape 60-run average', ylim)
plot_processed_results(six_replications_analyses.texture.examples, 'Material 60-run average', ylim)

## Plot the log of the number of examples to criterion, in each dimension, with error bars

In [None]:
ylim = (7.25, 14)

plot_processed_results(six_replications_analyses.color.log_examples, 'Color 60-run average', 
                       ylim, log_x=True, log_y=True, sem_n=60, shade_error=True)
plot_processed_results(six_replications_analyses.shape.log_examples, 'Shape 60-run average', 
                       ylim, log_x=True, log_y=True, sem_n=60, shade_error=True)
plot_processed_results(six_replications_analyses.texture.log_examples, 'Material 60-run average', 
                       ylim, log_x=True, log_y=True, sem_n=60, shade_error=True)

## Plot the combined results over all 180 runs

In [None]:
plot_processed_results(six_replications_analyses.combined.log_examples, 'Combined 180-run average', 
                       ylim, log_x=(True, False), log_y=True, sem_n=180, shade_error=True)

## Plot the absolute accuracy after introducing a new task

In [None]:
ylim = None

plot_processed_results(six_replications_analyses.color.accuracies, 'Color 60-run average', 
                       ylim, log_x=False, log_y=False, sem_n=60, shade_error=True)
plot_processed_results(six_replications_analyses.shape.accuracies, 'Shape 60-run average', 
                       ylim, log_x=False, log_y=False, sem_n=60, shade_error=True)
plot_processed_results(six_replications_analyses.texture.accuracies, 'Material 60-run average', 
                       ylim, log_x=False, log_y=False, sem_n=60, shade_error=True)

## Plot the accuracy drop after introducing a new task

In [None]:
ylim = None

plot_processed_results(six_replications_analyses.color.accuracy_drops, 'Color 60-run average', 
                       ylim, log_x=False, log_y=False, sem_n=60, shade_error=True)
plot_processed_results(six_replications_analyses.shape.accuracy_drops, 'Shape 60-run average', 
                       ylim, log_x=False, log_y=False, sem_n=60, shade_error=True)
plot_processed_results(six_replications_analyses.texture.accuracy_drops, 'Material 60-run average', 
                       ylim, log_x=False, log_y=False, sem_n=60, shade_error=True)

# Scratch work

## Looking for outlines and analyzing the skewness of these distributions

In [None]:
from scipy.stats import skew, skewtest

In [None]:
output, examples = process_multiple_runs(six_replications_by_dimension_runs[1], debug=True)

In [None]:
ex = np.array(examples)

In [None]:
plt.hist(ex[:,5,9], bins=20)

In [None]:
pretty_print_results(skew(ex),  floatfmt=".3f")

In [None]:
res = skewtest(ex)

In [None]:
res.pvalue < 0.001

## Analyzing the average number of example for each actual query/task

In [None]:
query_difficulty = sum([r[1] for r in six_replications_by_dimension])
query_names = ['blue', 'brown', 'cyan', 'gray', 'green', 'orange', 'pink',
       'purple', 'red', 'yellow', 'cone', 'cube', 'cylinder',
       'dodecahedron', 'ellipsoid', 'octahedron', 'pyramid', 'rectangle',
       'sphere', 'torus', 'chain_mail', 'marble', 'maze', 'metal',
       'metal_weave', 'polka', 'rubber', 'rug', 'tiles', 'wood_plank']
dimension_names = ['color', 'shape', 'texture']

In [None]:
plt.figure(figsize=(8, 18))

plt.suptitle('Average difficulty by query and number of times trained')

for i in range(3):
    ax = plt.subplot(3, 1, i + 1)
    x_values = range(1, 11)
    
    for query_id in range(i * 10, (i + 1) * 10):
        ax.plot(x_values, query_difficulty[query_id,:], label=f'{query_names[query_id]} ({query_id})')
    
    ax.set_title(dimension_names[i])
    ax.set_xlabel('Number of times trained')
    ax.set_ylabel('Average number of examples required')
    ax.legend(loc='best')
    

plt.show()

In [None]:
a = (1, 2, 3)
b = (4, 5, 6)

{c: d for (c, d) in zip(a, b)}