# Tutorial notebook on how to search the barrier sequence database to get sequences with specific attributes

In [3]:
import pandas as pd
import pickle
import sys
sys.path.append("..")  # Use sys to add the parent directory (where hex_maze_utils lives) to the path

## 1. Load the barrier sequence database

In [2]:
# Load the database of different barrier sequences
barrier_df = pd.read_pickle('../Barrier_Sequence_Databases/barrier_sequences_first1000.pkl')
display(barrier_df)

Unnamed: 0,barrier_sequence,sequence_length,reward_path_lengths,choice_points
0,"[(37, 7, 39, 41, 14, 46, 20, 23, 30), (37, 39,...",5,"[[15, 17, 21], [15, 21, 19], [15, 17, 19], [23...","[{13}, {26}, {26, 13, 29}, {29}, {13}]"
1,"[(34, 36, 37, 39, 10, 45, 14, 15, 20), (34, 37...",6,"[[17, 21, 19], [19, 21, 17], [19, 15, 17], [17...","[{21}, {30}, {24}, {16, 24, 26}, {24}, {35}]"
2,"[(34, 7, 41, 11, 46, 17, 20, 28, 31)]",1,"[[23, 19, 17]]",[{35}]
3,"[(34, 10, 42, 12, 45, 18, 23, 25, 31), (34, 37...",6,"[[15, 19, 21], [15, 19, 17], [21, 19, 15], [21...","[{17}, {17, 26, 35}, {35}, {24}, {24, 17, 26},..."
4,"[(39, 9, 10, 46, 21, 22, 23, 26, 30)]",1,"[[23, 17, 17]]",[{29}]
...,...,...,...,...
995,"[(32, 34, 45, 13, 14, 15, 18, 19, 25), (32, 34...",2,"[[19, 17, 15], [19, 15, 17]]","[{35}, {24}]"
996,"[(37, 7, 40, 13, 46, 45, 18, 25, 28), (37, 7, ...",2,"[[15, 19, 17], [17, 19, 15]]","[{26}, {36}]"
997,"[(36, 10, 11, 46, 15, 21, 25, 27, 29)]",1,"[[19, 15, 21]]",[{16}]
998,"[(32, 8, 9, 41, 15, 20, 21, 27, 29), (32, 8, 9...",2,"[[17, 21, 15], [17, 15, 21]]","[{31}, {13}]"


## 2. Filter the database based on certain criteria

In [5]:
# For example, we only want sequences>= length 5
filtered = barrier_df[barrier_df['sequence_length'] >= 5]
print(f"There are {len(filtered)} barrier sequences in our database with at least 5 mazes:")
display(filtered)

There are 121 barrier sequences in our database with at least 5 mazes:


Unnamed: 0,barrier_sequence,sequence_length,reward_path_lengths,choice_points
0,"[(37, 7, 39, 41, 14, 46, 20, 23, 30), (37, 39,...",5,"[[15, 17, 21], [15, 21, 19], [15, 17, 19], [23...","[{13}, {26}, {26, 13, 29}, {29}, {13}]"
1,"[(34, 36, 37, 39, 10, 45, 14, 15, 20), (34, 37...",6,"[[17, 21, 19], [19, 21, 17], [19, 15, 17], [17...","[{21}, {30}, {24}, {16, 24, 26}, {24}, {35}]"
3,"[(34, 10, 42, 12, 45, 18, 23, 25, 31), (34, 37...",6,"[[15, 19, 21], [15, 19, 17], [21, 19, 15], [21...","[{17}, {17, 26, 35}, {35}, {24}, {24, 17, 26},..."
9,"[(34, 39, 42, 11, 12, 16, 21, 27, 30), (34, 39...",5,"[[17, 25, 19], [17, 15, 19], [21, 15, 19], [17...","[{31}, {24, 13, 31}, {24}, {13}, {31}]"
11,"[(32, 37, 8, 40, 9, 11, 16, 21, 28), (32, 37, ...",5,"[[19, 21, 15], [19, 17, 15], [17, 17, 23], [21...","[{36}, {13, 36, 29}, {13}, {29}, {13}]"
...,...,...,...,...
957,"[(39, 9, 10, 42, 21, 22, 23, 25, 30), (39, 9, ...",5,"[[23, 17, 17], [15, 21, 17], [15, 17, 19], [15...","[{29}, {31}, {17}, {17, 29, 31}, {29}]"
958,"[(34, 37, 40, 10, 11, 15, 20, 22, 26), (34, 37...",6,"[[19, 21, 17], [19, 15, 17], [19, 15, 21], [17...","[{30}, {24}, {16}, {17}, {24}, {36}]"
962,"[(34, 39, 8, 42, 12, 46, 21, 27, 30), (34, 39,...",5,"[[17, 15, 19], [17, 25, 19], [21, 15, 19], [17...","[{24, 13, 31}, {31}, {24}, {13}, {31}]"
977,"[(34, 37, 7, 41, 10, 18, 25, 28, 30), (34, 37,...",5,"[[15, 19, 21], [15, 19, 17], [21, 19, 15], [17...","[{17}, {17, 26, 35}, {35}, {17, 35, 36}, {17}]"


In [6]:
# Or we only want sequences where the each maze only has a single choice point
filtered = barrier_df[barrier_df['choice_points'].apply(lambda x: all(len(cp) == 1 for cp in x))]
print(f"There are {len(filtered)} barrier sequences in our database where each maze in the sequence has only 1 choice point:")
display(filtered)

# Note that this simply rules out all sequences where any maze has more than one choice point. 
# Instead, if a sequence breaks this rule in the 5th maze in the sequence (as many long sequences do), 
# we could include the subsequence of the first 4 mazes that satisfy this criteria

# Steph TODO: Add tutorial to search for subsequences!

There are 843 barrier sequences in our database where each maze in the sequence has only 1 choice point:


Unnamed: 0,barrier_sequence,sequence_length,reward_path_lengths,choice_points
2,"[(34, 7, 41, 11, 46, 17, 20, 28, 31)]",1,"[[23, 19, 17]]",[{35}]
4,"[(39, 9, 10, 46, 21, 22, 23, 26, 30)]",1,"[[23, 17, 17]]",[{29}]
5,"[(32, 36, 8, 9, 41, 44, 21, 23, 24)]",1,"[[17, 17, 23]]",[{13}]
6,"[(9, 10, 42, 44, 13, 18, 22, 25, 30)]",1,"[[15, 21, 17]]",[{31}]
7,"[(34, 37, 7, 45, 13, 22, 25, 27, 29)]",1,"[[17, 19, 15]]",[{36}]
...,...,...,...,...
995,"[(32, 34, 45, 13, 14, 15, 18, 19, 25), (32, 34...",2,"[[19, 17, 15], [19, 15, 17]]","[{35}, {24}]"
996,"[(37, 7, 40, 13, 46, 45, 18, 25, 28), (37, 7, ...",2,"[[15, 19, 17], [17, 19, 15]]","[{26}, {36}]"
997,"[(36, 10, 11, 46, 15, 21, 25, 27, 29)]",1,"[[19, 15, 21]]",[{16}]
998,"[(32, 8, 9, 41, 15, 20, 21, 27, 29), (32, 8, 9...",2,"[[17, 21, 15], [17, 15, 21]]","[{31}, {13}]"
