In [1]:
import isaac.constants
isaac.constants.TQDM_DISABLE = True

from torch import nn
from torch.nn import Softmax
from isaac.utils import get_cuda_device_if_available
import joblib

from isaac.dataset import read_dataset, prepare_dataset
from isaac.models import MultiBranchModel, ComplexRNNModel
from isaac.constants import BASIC_TRAINING_COLS, MASS_CLASS_COLS, FORCE_CLASS_COLS, RTHETA_COLS, XY_RTHETA_COLS, XY_VXVY_RTHETA_COLS
from isaac.evaluation import predict_with_a_group_of_saved_models, evaluate_saved_model

import torch
import glob
from torch.autograd import Variable
import numpy as np
import pandas as pd
from tqdm import tqdm


In [2]:
SECONDS_PER_WINDOW = 5 # seconds
FPS = 60
STEP_SIZE = 3
# PD_STEP_SIZE = 10
SEQ_END = 2700

In [3]:
device = get_cuda_device_if_available()
print(device)

cpu


In [4]:
normalise_data = True
model_name = "rtheta"
scaler_path = "scalers/passive_"+model_name+"_scaler.sk"
training_columns = RTHETA_COLS
network_dims = (len(training_columns), 25, 3, 0.5)
dataset_path = "../new_exp_data/exp7_passive_rtheta.h5"
class_columns = [list(MASS_CLASS_COLS), list(FORCE_CLASS_COLS)]
multiclass = True
DATASET = read_dataset(dataset_path)


def get_question_predictions_for_group_of_models(question_type):    
    models = sorted(glob.glob("models/"+model_name+"/best_"+question_type+"_model_seed_*.pt"))

    group_predictions = []
    
    predictions = predict_with_a_group_of_saved_models(tqdm(models), network_dims, None, 
                                                       training_columns=training_columns, 
                                                       class_columns=class_columns, step_size=STEP_SIZE, 
                                                       seq_end=SEQ_END, scaler_path=scaler_path,
                                                       arch=MultiBranchModel, multiclass=multiclass, trials=DATASET,
                                                       predict_rolling_windows=True, seconds_per_window=SECONDS_PER_WINDOW)

    predictions = torch.stack(predictions)
    
    print(predictions.shape)
    if question_type == "mass":
        predictions = predictions[:, :, :, 0]
    else:
        predictions = predictions[:, :, :, 1]

    return predictions

def get_question_accuracy_for_group_of_models(question_type):    
    model_paths = tqdm(sorted(glob.glob("models/"+model_name+"/best_"+question_type+"_model_seed_*.pt")))

    accuracies, predicted = evaluate_saved_model(model_paths, network_dims, dataset_path, 
                                                 training_columns=training_columns, class_columns=class_columns, 
                                                 step_size=STEP_SIZE, seq_end=SEQ_END, scaler_path=scaler_path,
                                                 arch=MultiBranchModel, multiclass=multiclass, trials=None)
    
    if question_type == "mass":
        question_index = 0
    else:
        question_index = 1

    accuracies = np.stack(accuracies)[:, question_index]
    
    predicted = [x[:, question_index].numpy() for x in predicted]

    return accuracies, predicted

# T-test for MASS questions

## Load model's predictions

In [5]:
print("MASS")
question_type = "mass"
group_mass_seq_prediction = get_question_predictions_for_group_of_models(question_type)

print("\nFORCE")
question_type = "force"
group_force_seq_prediction = get_question_predictions_for_group_of_models(question_type)

  0%|          | 0/25 [00:00<?, ?it/s]

MASS


100%|██████████| 25/25 [00:48<00:00,  1.94s/it]


torch.Size([25, 36, 40, 2, 3])

FORCE


100%|██████████| 25/25 [00:50<00:00,  2.01s/it]

torch.Size([25, 36, 40, 2, 3])





In [6]:
mass_solutions = [trial[list(MASS_CLASS_COLS)].idxmax(axis=1).unique()[0] for trial in DATASET]
force_solutions = [trial[list(FORCE_CLASS_COLS)].idxmax(axis=1).unique()[0] for trial in DATASET]

In [7]:
s = Softmax(dim=-1)
group_force_seq_prediction = s(group_force_seq_prediction)
group_mass_seq_prediction = s(group_mass_seq_prediction)

In [8]:
avg_force_seq_prediction = torch.mean(group_force_seq_prediction, dim=0)
avg_mass_seq_prediction = torch.mean(group_mass_seq_prediction, dim=0)

In [9]:
avg_force_seq_prediction.shape

torch.Size([36, 40, 3])

In [10]:
n_trials = avg_force_seq_prediction.shape[0]
n_windows = avg_force_seq_prediction.shape[1]
window_second_start = [i for _ in range(n_trials) for i in range(1, n_windows+1)]
trial_number = [i for i in range(n_trials) for _ in range(1, n_windows+1)]

In [11]:
mass_df = pd.DataFrame(data=avg_mass_seq_prediction.reshape(n_trials*n_windows, 3).numpy(), 
                       columns=["rnn_%s" % cl for cl in MASS_CLASS_COLS])
mass_df["window_second_start"] = window_second_start
mass_df["trial_number"] = trial_number
mass_df["solution"] = [mass_solutions[trial_id] for trial_id in trial_number]

force_df = pd.DataFrame(data=avg_force_seq_prediction.reshape(n_trials*n_windows, 3).numpy(), 
                       columns=["rnn_%s" % cl for cl in FORCE_CLASS_COLS])
force_df["window_second_start"] = window_second_start
force_df["trial_number"] = trial_number
force_df["solution"] = [force_solutions[trial_id] for trial_id in trial_number]

In [12]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(font_scale=3)
sns.set_style("white")

# Plot all Force probabilities throughout the trials

In [13]:
for trial_i in range(force_df["trial_number"].max() + 1):
    trial_df = force_df.query("trial_number == %d" % trial_i)
    
    plt.figure(figsize=(40, 6))
           
    ax = sns.lineplot(y="rnn_attract", x="window_second_start", data=trial_df, linewidth=20, label="attract", markers=True)    
    ax = sns.lineplot(y="rnn_repel", x="window_second_start", data=trial_df, ax=ax, linewidth=20, label="repel")    
    ax = sns.lineplot(y="rnn_none", x="window_second_start", data=trial_df, ax=ax, linewidth=20, label="none")
    sns.despine(ax=ax)
    
    ax.set_ylabel("RNN probability", weight="bold")
    ax.set_yticks([0, 0.5, 1])
    ax.set_yticklabels(ax.get_yticks(), weight="bold")
    ax.set_xlim(0, 45)
    ax.set_xticklabels([])
    ax.set_xlabel("")
    ax.legend(loc=2, prop={'weight':'bold'})
    
    solution = trial_df.solution.unique()[0]
    if solution == "attract":
        ax.lines[1].set_linestyle("--")
        ax.lines[2].set_linestyle("--")
    elif solution == "repel":
        ax.lines[0].set_linestyle("--")
        ax.lines[2].set_linestyle("--")
    else:
        ax.lines[0].set_linestyle("--")
        ax.lines[1].set_linestyle("--")        
        
    plt.savefig("cogsci_images/RNN_rw_all_prob_force_plot_%d.png" % trial_i, bbox_inches='tight')
    plt.close()

In [14]:
for trial_i in range(mass_df["trial_number"].max() + 1):
    trial_df = mass_df.query("trial_number == %d" % trial_i)
    plt.figure(figsize=(40, 6))
           
    ax = sns.lineplot(y="rnn_A", x="window_second_start", data=trial_df, linewidth=20, label="A", markers=True)    
    ax = sns.lineplot(y="rnn_B", x="window_second_start", data=trial_df, ax=ax, linewidth=20, label="B")    
    ax = sns.lineplot(y="rnn_same", x="window_second_start", data=trial_df, ax=ax, linewidth=20, label="same")
    sns.despine(ax=ax)
    
    ax.set_ylabel("RNN probability", weight="bold")
    ax.set_yticks([0, 0.5, 1])
    ax.set_yticklabels(ax.get_yticks(), weight="bold")
    ax.set_xlim(0, 45)
    ax.set_xticklabels([])
    ax.set_xlabel("")
    ax.legend(loc=2, prop={'weight':'bold'})
    
    solution = trial_df.solution.unique()[0]
    if solution == "A":
        ax.lines[1].set_linestyle("--")
        ax.lines[2].set_linestyle("--")
    elif solution == "B":
        ax.lines[0].set_linestyle("--")
        ax.lines[2].set_linestyle("--")
    else:
        ax.lines[0].set_linestyle("--")
        ax.lines[1].set_linestyle("--")        
        
    plt.savefig("cogsci_images/RNN_rw_all_prob_mass_plot_%d.png" % trial_i, bbox_inches='tight')
    plt.close()

# Obtain most confusing intervals
Most confusing = Highest values of probability for a wrong class

In [15]:
def make_probability_of_the_correct_rnn_column_zero(df):
    for solution in df.solution.unique():
        df["rnn_" + solution] = df["rnn_" + solution] * (df["solution"] != solution).astype(int)

def get_confused_probabilities_df(df):
    confused_dfs = []

    for solution in df.solution.unique():
        reshaped_df = df[["window_second_start", "trial_number", "solution"]].copy()
        reshaped_df["rnn_option"] = solution
        reshaped_df["rnn_value"] = df["rnn_" + solution]
        confused_dfs.append(reshaped_df)

    confused_dfs = pd.concat(confused_dfs)
    confused_dfs = confused_dfs.query("solution != rnn_option")
    return confused_dfs
    
make_probability_of_the_correct_rnn_column_zero(mass_df)
confused_mass_dfs = get_confused_probabilities_df(mass_df)
make_probability_of_the_correct_rnn_column_zero(force_df)
confused_force_dfs = get_confused_probabilities_df(force_df)

In [16]:
confused_mass_dfs = confused_mass_dfs.sort_values(by="rnn_value", ascending=False)
confused_force_dfs = confused_force_dfs.sort_values(by="rnn_value", ascending=False)

## Load human results

In [17]:
from isaac.visualization import make_frame_curried
import moviepy.editor as mpy
from scipy import misc


def make_clip(trial_data, window_second_start, solution, rnn_thinks_this_is, rnn_confidence):
            
    trial_data = trial_data.iloc[window_second_start*FPS:(window_second_start + SECONDS_PER_WINDOW)*FPS]
    duration = len(trial_data)
    n_bodies = sum(["o"+str(i)+".x" in list(trial_data.columns) for i in range(1, 5)])
    
    while (len(trial_data) + 1) % 60 != 0:
        trial_data = trial_data.append(trial_data.iloc[-1], ignore_index=True)
        
    make_frame = make_frame_curried(trial_data, n_bodies, None, None)
    clip = mpy.VideoClip(make_frame, duration=duration / 60)
    return clip, trial_data

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [18]:
%load_ext autoreload
%autoreload 2

# Create videos for comparing evidence seen by RNN and by IO

In [19]:
import os
import json

CONFUSING_DATA_PATH = "cogsci_images/confusing_videos/confusing_%s" % model_name
os.makedirs(CONFUSING_DATA_PATH, exist_ok=True)
FILENAME = "confusing_%s_interval_in_trial_%d_sec_%d_to_%d_rnn_thinks_%s_while_solution_is_%s"
VIDEO_PATH = os.path.join(CONFUSING_DATA_PATH, FILENAME + ".mp4")
JSON_PATH = os.path.join(CONFUSING_DATA_PATH, "confusing_"+model_name+"_%s_physics_data.json")

def write_confused_intervals(confused_df, question_type):

    replays = read_dataset("../new_exp_data/exp7_passive.h5")
    print(len(replays))
    written_replays = {}
    number_of_written_replays = 0
    
    json_data = []

    for row_i in range(confused_df.shape[0]):

        window_second_start, trial_number, solution, rnn_thinks_this_is, rnn_confidence = (
            confused_df.iloc[row_i][["window_second_start", "trial_number", "solution", "rnn_option", "rnn_value"]])

        print("RNN thinks the interval (%d, %d) in trial %d is %s with %.4f confidence. In reality, it is %s." % (
            window_second_start, window_second_start + SECONDS_PER_WINDOW, trial_number, rnn_thinks_this_is, rnn_confidence, solution))

        if trial_number in written_replays:
            overlapping_replay = False
            for already_written_window_start in written_replays[trial_number]:
                if abs(already_written_window_start - window_second_start) <= 5:
                    overlapping_replay = True
                    break
            if overlapping_replay:
                print("Skipping trial %d and start %d" % (trial_number, window_second_start))
                continue

        written_replays[trial_number] = written_replays.get(trial_number, []) + [window_second_start]
        clip, trial_data = make_clip(replays[trial_number], window_second_start, solution, rnn_thinks_this_is, rnn_confidence)
        clip.ipython_display(fps=60)
        clip.write_videofile(VIDEO_PATH % (
            question_type, trial_number, window_second_start, window_second_start+SECONDS_PER_WINDOW,
            rnn_thinks_this_is, solution), fps=60)

        trial_data = trial_data.to_dict(orient='list')
        # Simplify attributes whose values are unique throughout the list
        for key in ["trial_type", "condition_world_variant", "tM", "tR", "world_id"]:
            trial_data[key] = trial_data[key][0]
        json_data.append(trial_data)

        number_of_written_replays += 1
        if number_of_written_replays == 10:
            break
            
    with open(JSON_PATH % question_type, "w+") as f: 
        json.dump(json_data, f)

In [20]:
write_confused_intervals(confused_force_dfs, "force")

36
RNN thinks the interval (12, 17) in trial 30 is attract with 0.6580 confidence. In reality, it is none.


100%|█████████▉| 300/301 [00:01<00:00, 151.13it/s]

[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_30_sec_12_to_17_rnn_thinks_attract_while_solution_is_none.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_30_sec_12_to_17_rnn_thinks_attract_while_solution_is_none.mp4



100%|█████████▉| 300/301 [00:02<00:00, 145.99it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_30_sec_12_to_17_rnn_thinks_attract_while_solution_is_none.mp4 

RNN thinks the interval (31, 36) in trial 35 is attract with 0.6459 confidence. In reality, it is repel.


100%|█████████▉| 300/301 [00:02<00:00, 149.92it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_35_sec_31_to_36_rnn_thinks_attract_while_solution_is_repel.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_35_sec_31_to_36_rnn_thinks_attract_while_solution_is_repel.mp4


100%|█████████▉| 300/301 [00:02<00:00, 146.22it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_35_sec_31_to_36_rnn_thinks_attract_while_solution_is_repel.mp4 






RNN thinks the interval (8, 13) in trial 17 is attract with 0.6427 confidence. In reality, it is repel.


100%|█████████▉| 300/301 [00:01<00:00, 153.04it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_17_sec_8_to_13_rnn_thinks_attract_while_solution_is_repel.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_17_sec_8_to_13_rnn_thinks_attract_while_solution_is_repel.mp4


100%|█████████▉| 300/301 [00:02<00:00, 148.19it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_17_sec_8_to_13_rnn_thinks_attract_while_solution_is_repel.mp4 






RNN thinks the interval (4, 9) in trial 25 is attract with 0.6340 confidence. In reality, it is repel.


100%|█████████▉| 300/301 [00:02<00:00, 148.84it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_25_sec_4_to_9_rnn_thinks_attract_while_solution_is_repel.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_25_sec_4_to_9_rnn_thinks_attract_while_solution_is_repel.mp4


100%|█████████▉| 300/301 [00:02<00:00, 138.34it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_25_sec_4_to_9_rnn_thinks_attract_while_solution_is_repel.mp4 

RNN thinks the interval (10, 15) in trial 5 is attract with 0.6205 confidence. In reality, it is none.


100%|█████████▉| 300/301 [00:02<00:00, 148.78it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_5_sec_10_to_15_rnn_thinks_attract_while_solution_is_none.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_5_sec_10_to_15_rnn_thinks_attract_while_solution_is_none.mp4


100%|█████████▉| 300/301 [00:02<00:00, 145.86it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_5_sec_10_to_15_rnn_thinks_attract_while_solution_is_none.mp4 

RNN thinks the interval (40, 45) in trial 32 is attract with 0.6016 confidence. In reality, it is none.


100%|█████████▉| 300/301 [00:02<00:00, 149.51it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_32_sec_40_to_45_rnn_thinks_attract_while_solution_is_none.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_32_sec_40_to_45_rnn_thinks_attract_while_solution_is_none.mp4


100%|█████████▉| 300/301 [00:02<00:00, 138.51it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_32_sec_40_to_45_rnn_thinks_attract_while_solution_is_none.mp4 






RNN thinks the interval (38, 43) in trial 23 is attract with 0.5989 confidence. In reality, it is none.


100%|█████████▉| 300/301 [00:02<00:00, 131.97it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_23_sec_38_to_43_rnn_thinks_attract_while_solution_is_none.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_23_sec_38_to_43_rnn_thinks_attract_while_solution_is_none.mp4


100%|█████████▉| 300/301 [00:02<00:00, 143.13it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_23_sec_38_to_43_rnn_thinks_attract_while_solution_is_none.mp4 






RNN thinks the interval (10, 15) in trial 7 is attract with 0.5977 confidence. In reality, it is repel.


100%|█████████▉| 300/301 [00:02<00:00, 145.47it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_7_sec_10_to_15_rnn_thinks_attract_while_solution_is_repel.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_7_sec_10_to_15_rnn_thinks_attract_while_solution_is_repel.mp4


100%|█████████▉| 300/301 [00:02<00:00, 144.04it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_7_sec_10_to_15_rnn_thinks_attract_while_solution_is_repel.mp4 






RNN thinks the interval (13, 18) in trial 15 is attract with 0.5730 confidence. In reality, it is repel.


100%|█████████▉| 300/301 [00:02<00:00, 149.81it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_15_sec_13_to_18_rnn_thinks_attract_while_solution_is_repel.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_15_sec_13_to_18_rnn_thinks_attract_while_solution_is_repel.mp4


100%|█████████▉| 300/301 [00:02<00:00, 144.99it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_15_sec_13_to_18_rnn_thinks_attract_while_solution_is_repel.mp4 

RNN thinks the interval (38, 43) in trial 4 is attract with 0.5704 confidence. In reality, it is none.


100%|█████████▉| 300/301 [00:02<00:00, 146.47it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_4_sec_38_to_43_rnn_thinks_attract_while_solution_is_none.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_4_sec_38_to_43_rnn_thinks_attract_while_solution_is_none.mp4


100%|█████████▉| 300/301 [00:02<00:00, 144.35it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_force_interval_in_trial_4_sec_38_to_43_rnn_thinks_attract_while_solution_is_none.mp4 






In [22]:
write_confused_intervals(confused_mass_dfs, "mass")

36
RNN thinks the interval (1, 6) in trial 24 is A with 0.9346 confidence. In reality, it is same.


100%|█████████▉| 300/301 [00:02<00:00, 137.29it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_24_sec_1_to_6_rnn_thinks_A_while_solution_is_same.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_24_sec_1_to_6_rnn_thinks_A_while_solution_is_same.mp4


100%|█████████▉| 300/301 [00:02<00:00, 129.17it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_24_sec_1_to_6_rnn_thinks_A_while_solution_is_same.mp4 

RNN thinks the interval (2, 7) in trial 24 is A with 0.9038 confidence. In reality, it is same.
Skipping trial 24 and start 2
RNN thinks the interval (40, 45) in trial 18 is A with 0.8881 confidence. In reality, it is same.


100%|█████████▉| 300/301 [00:02<00:00, 147.11it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_18_sec_40_to_45_rnn_thinks_A_while_solution_is_same.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_18_sec_40_to_45_rnn_thinks_A_while_solution_is_same.mp4


100%|█████████▉| 300/301 [00:02<00:00, 143.14it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_18_sec_40_to_45_rnn_thinks_A_while_solution_is_same.mp4 






RNN thinks the interval (39, 44) in trial 30 is A with 0.8745 confidence. In reality, it is same.


100%|█████████▉| 300/301 [00:02<00:00, 128.58it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_30_sec_39_to_44_rnn_thinks_A_while_solution_is_same.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_30_sec_39_to_44_rnn_thinks_A_while_solution_is_same.mp4


100%|█████████▉| 300/301 [00:02<00:00, 144.88it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_30_sec_39_to_44_rnn_thinks_A_while_solution_is_same.mp4 

RNN thinks the interval (26, 31) in trial 9 is A with 0.8724 confidence. In reality, it is same.


100%|█████████▉| 300/301 [00:02<00:00, 147.10it/s]

[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_9_sec_26_to_31_rnn_thinks_A_while_solution_is_same.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_9_sec_26_to_31_rnn_thinks_A_while_solution_is_same.mp4



100%|█████████▉| 300/301 [00:02<00:00, 144.32it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_9_sec_26_to_31_rnn_thinks_A_while_solution_is_same.mp4 






RNN thinks the interval (39, 44) in trial 9 is A with 0.8721 confidence. In reality, it is same.


100%|█████████▉| 300/301 [00:02<00:00, 143.90it/s]

[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_9_sec_39_to_44_rnn_thinks_A_while_solution_is_same.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_9_sec_39_to_44_rnn_thinks_A_while_solution_is_same.mp4



100%|█████████▉| 300/301 [00:02<00:00, 142.86it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_9_sec_39_to_44_rnn_thinks_A_while_solution_is_same.mp4 






RNN thinks the interval (33, 38) in trial 32 is A with 0.8678 confidence. In reality, it is B.


100%|█████████▉| 300/301 [00:02<00:00, 145.09it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_32_sec_33_to_38_rnn_thinks_A_while_solution_is_B.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_32_sec_33_to_38_rnn_thinks_A_while_solution_is_B.mp4


100%|█████████▉| 300/301 [00:02<00:00, 140.98it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_32_sec_33_to_38_rnn_thinks_A_while_solution_is_B.mp4 






RNN thinks the interval (32, 37) in trial 32 is A with 0.8672 confidence. In reality, it is B.
Skipping trial 32 and start 32
RNN thinks the interval (5, 10) in trial 1 is same with 0.8668 confidence. In reality, it is A.


100%|█████████▉| 300/301 [00:02<00:00, 143.18it/s]

[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_1_sec_5_to_10_rnn_thinks_same_while_solution_is_A.mp4





[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_1_sec_5_to_10_rnn_thinks_same_while_solution_is_A.mp4


100%|█████████▉| 300/301 [00:02<00:00, 140.62it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_1_sec_5_to_10_rnn_thinks_same_while_solution_is_A.mp4 






RNN thinks the interval (31, 36) in trial 32 is A with 0.8638 confidence. In reality, it is B.
Skipping trial 32 and start 31
RNN thinks the interval (31, 36) in trial 21 is A with 0.8550 confidence. In reality, it is same.


100%|█████████▉| 300/301 [00:02<00:00, 141.24it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_21_sec_31_to_36_rnn_thinks_A_while_solution_is_same.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_21_sec_31_to_36_rnn_thinks_A_while_solution_is_same.mp4


100%|█████████▉| 300/301 [00:02<00:00, 141.81it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_21_sec_31_to_36_rnn_thinks_A_while_solution_is_same.mp4 






RNN thinks the interval (21, 26) in trial 30 is B with 0.8503 confidence. In reality, it is same.


100%|█████████▉| 300/301 [00:02<00:00, 143.89it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_30_sec_21_to_26_rnn_thinks_B_while_solution_is_same.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_30_sec_21_to_26_rnn_thinks_B_while_solution_is_same.mp4


100%|█████████▉| 300/301 [00:02<00:00, 143.12it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_30_sec_21_to_26_rnn_thinks_B_while_solution_is_same.mp4 






RNN thinks the interval (6, 11) in trial 0 is A with 0.8457 confidence. In reality, it is same.


100%|█████████▉| 300/301 [00:02<00:00, 145.46it/s]


[MoviePy] >>>> Building video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_0_sec_6_to_11_rnn_thinks_A_while_solution_is_same.mp4
[MoviePy] Writing video cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_0_sec_6_to_11_rnn_thinks_A_while_solution_is_same.mp4


100%|█████████▉| 300/301 [00:02<00:00, 141.79it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cogsci_images/confusing_videos/confusing_rtheta/confusing_mass_interval_in_trial_0_sec_6_to_11_rnn_thinks_A_while_solution_is_same.mp4 




