# Setup and Functions

The following script and [module](/sl_sequencing.py) were written by [Harrison Gietz](https://www.linkedin.com/in/harrison-gietz-24ab191b5/) with modifcations from Will Decker.

In [2]:
import numpy as np
import random
from collections import Counter
import os
import pandas as pd
from itertools import permutations, chain
import sl_sequencing as sls

In [3]:
np.random.seed(42) # for replicability

# _____________________change these variables depending on what you want, then run the rest_________________________
total = 792
vector_size = 12 # a non-zero indexed vextor
number_of_transitions_allowed = total/len(list(permutations(np.linspace(0, vector_size-1, vector_size), 2)))
# ___________________________________________________________________________________________________________________

def get_random_except(excluded_numbers, numbers):
    """Return a random number from the second list, excluding numbers in the given first list."""
    choices = [x for x in numbers if x not in excluded_numbers]

    if not choices:
        raise ValueError("No numbers left to choose from after excluding the given numbers.")

    return random.choice(choices)


def count_numbers(lst):
    """function used for verifying answers and correct number counts at the end"""
    counts = Counter(lst)
    for number, count in sorted(counts.items()):
        print(f"{number}: {count}")

def count_sublist(main_list, sublist):
    """function used for verifying answers and correct transition counts at the end"""
    count = 0
    sublist_length = len(sublist)

    for i in range(len(main_list) - sublist_length + 1):
        if main_list[i:i + sublist_length] == sublist:
            count += 1

    return count

possible_nums = range(1,vector_size+1)

# Actually Finding the Numbers

In [5]:
try_count = 0
while(True):
    big_long_answer = []
    start = np.random.randint(1,13)
    # dictionary for measuring the amount of times each transition has occured
    tally_tracker = {i: {j: 0 for j in range(1, vector_size+1) if j != i} for i in range(1, vector_size+1)}

    try_count += 1
    print(f'number of times tried: {try_count}')

    for i in range(total):
        excluded_numbers = [start]
        #  loop to make sure we don't allow one transition (e.g. 6 -> 10) too many times
        for possible_transition in tally_tracker[start].keys():
            if tally_tracker[start][possible_transition] >= number_of_transitions_allowed:
                excluded_numbers.append(possible_transition)

        try:
            candidate_number = get_random_except(excluded_numbers, possible_nums)
        except:
            break # do this break when we accidentally (randomly) exhaust all possible options for transitions

        # update the dicitonary so that we know not to use this transition again
        tally_tracker[start][candidate_number] += 1

        big_long_answer.append(candidate_number)
        start = candidate_number
    if len(big_long_answer) == total:
        break # this means we got to the end length successfully, while using the perfect number of transitions)

print('Holy shit here\'s the answer')
print(big_long_answer)

number of times tried: 1
number of times tried: 2
number of times tried: 3
number of times tried: 4
number of times tried: 5
number of times tried: 6
number of times tried: 7
number of times tried: 8
number of times tried: 9
Holy shit here's the answer
[8, 3, 1, 2, 8, 4, 10, 5, 11, 3, 9, 5, 6, 9, 10, 2, 5, 3, 12, 1, 10, 4, 1, 10, 5, 2, 4, 8, 5, 6, 5, 1, 7, 1, 2, 12, 4, 7, 9, 12, 2, 8, 3, 12, 9, 6, 7, 10, 9, 8, 4, 2, 3, 6, 5, 8, 2, 1, 10, 5, 11, 8, 1, 4, 11, 3, 5, 9, 6, 4, 9, 6, 2, 8, 11, 8, 5, 3, 1, 8, 5, 2, 6, 3, 11, 12, 9, 10, 9, 12, 11, 1, 12, 9, 3, 1, 2, 6, 4, 6, 1, 5, 7, 6, 3, 11, 5, 1, 6, 8, 1, 5, 2, 8, 2, 6, 2, 8, 2, 8, 12, 10, 1, 12, 2, 11, 2, 9, 3, 6, 11, 2, 6, 9, 2, 1, 4, 2, 12, 1, 11, 2, 1, 2, 4, 10, 5, 1, 4, 2, 7, 8, 11, 4, 9, 11, 6, 5, 1, 12, 8, 10, 12, 9, 7, 9, 3, 2, 4, 5, 8, 10, 4, 11, 9, 2, 6, 12, 6, 1, 9, 5, 10, 6, 8, 9, 1, 8, 9, 5, 9, 10, 5, 4, 11, 7, 8, 12, 8, 6, 11, 10, 3, 12, 11, 7, 3, 7, 8, 5, 9, 8, 10, 8, 12, 1, 3, 10, 8, 7, 3, 11, 10, 9, 4, 5, 2, 12, 9, 8, 3, 9,

# Verifying the Answers

In [6]:
print(f'final length of list: {len(big_long_answer)}\n')

print('Checking transition counts: ')
for i in possible_nums:
    print(f'Transition counts from {i}: {tally_tracker[i]}')

print('\nDouble Checking transition counts (with different method): ')
for i in possible_nums:
    print(f'Transition counts for {i}: ')
    remade_dict_of_transitions = {}
    for j in possible_nums:
        if j==i:
            continue
        sublist = [i,j]
        remade_dict_of_transitions[j] = count_sublist(big_long_answer, sublist)
    for k in remade_dict_of_transitions.keys():
        print(f'     There were {remade_dict_of_transitions[k]} transitions to {k}: ')

print('\nChecking number counts: ')
count_numbers(big_long_answer)

final length of list: 792

Checking transition counts: 
Transition counts from 1: {2: 6, 3: 6, 4: 6, 5: 6, 6: 6, 7: 6, 8: 6, 9: 6, 10: 6, 11: 6, 12: 6}
Transition counts from 2: {1: 6, 3: 6, 4: 6, 5: 6, 6: 6, 7: 6, 8: 6, 9: 6, 10: 6, 11: 6, 12: 6}
Transition counts from 3: {1: 6, 2: 6, 4: 6, 5: 6, 6: 6, 7: 6, 8: 6, 9: 6, 10: 6, 11: 6, 12: 6}
Transition counts from 4: {1: 6, 2: 6, 3: 6, 5: 6, 6: 6, 7: 6, 8: 6, 9: 6, 10: 6, 11: 6, 12: 6}
Transition counts from 5: {1: 6, 2: 6, 3: 6, 4: 6, 6: 6, 7: 6, 8: 6, 9: 6, 10: 6, 11: 6, 12: 6}
Transition counts from 6: {1: 6, 2: 6, 3: 6, 4: 6, 5: 6, 7: 6, 8: 6, 9: 6, 10: 6, 11: 6, 12: 6}
Transition counts from 7: {1: 6, 2: 6, 3: 6, 4: 6, 5: 6, 6: 6, 8: 6, 9: 6, 10: 6, 11: 6, 12: 6}
Transition counts from 8: {1: 6, 2: 6, 3: 6, 4: 6, 5: 6, 6: 6, 7: 6, 9: 6, 10: 6, 11: 6, 12: 6}
Transition counts from 9: {1: 6, 2: 6, 3: 6, 4: 6, 5: 6, 6: 6, 7: 6, 8: 6, 10: 6, 11: 6, 12: 6}
Transition counts from 10: {1: 6, 2: 6, 3: 6, 4: 6, 5: 6, 6: 6, 7: 6, 8: 6, 9: 6

# Note: Small Error to Manually Correct

## There is an error in the specified requirements (or maybe the algorithm) where one final transition is not included.


In one case, the end of the sequence was [..., 11, 7, 4] and the start of the sequence was "6".

For some reason, the final transition from 4 -> 6 is not added, meaning there are only 5 (not the required 6) transitions from 4 -> 6. You have to tack a 6 onto the end to meet the required number of transitions.

I'm pretty sure this is because of the constraints posed in Will's email; unfortunately, the sequence actually has to be n+1 long in order to have the correct number of equal transitions, since there is no number there to be "transitioning into" the very first number.

(I could be wrong and confused though; in either case, the "simple fix" here is to manually add on the first value of your sequence to the ending, and then you have the required number of transitions for each pair of numbers)

## Trying with object

In [3]:
# import the module
import sl_sequencing as sls

# create object
s = sls.Sequence(total=132, vector_size=4) # non-zero indexed vector size

# make sequence
s.sequence()

# print sequence
print(s.valid_sequence)

# validate sequence
s.validate()

# save sequence as .csv file to current directory
s.save_csv('index.csv')

# matching/replacing indices in valid sequence with custom values
inpt = {
    1: 'a', # key=value in sequence, dict values=what you want to replace in sequence
    2: 'b',
    3: 'c', 
    4: 'd'
}
s.match(inpt=inpt)
print(s.match_sequence)

# saving match sequence
s.save_match_csv(filename='match.csv')



number of times tried: 1
Sequence achieved
[2, 4, 1, 4, 3, 1, 2, 1, 4, 2, 1, 2, 3, 4, 3, 2, 3, 4, 2, 3, 2, 3, 4, 3, 1, 4, 1, 3, 1, 2, 1, 3, 2, 1, 4, 2, 4, 1, 4, 3, 1, 4, 3, 1, 3, 4, 1, 4, 2, 3, 2, 1, 4, 1, 2, 1, 4, 3, 4, 1, 2, 1, 2, 3, 4, 1, 3, 2, 1, 3, 2, 4, 3, 4, 1, 2, 4, 2, 4, 2, 4, 2, 3, 4, 1, 2, 1, 3, 2, 4, 3, 2, 1, 2, 3, 4, 3, 4, 1, 3, 2, 4, 1, 3, 4, 3, 1, 3, 2, 1, 2, 3, 2, 4, 3, 1, 3, 1, 4, 2, 3, 1, 3, 1, 4, 2, 4, 2, 3, 1, 2, 4]
total length of list: 132

Checking transition counts: 
Transition counts from 1: {2: 11, 3: 11, 4: 11}
Transition counts from 2: {1: 11, 3: 11, 4: 11}
Transition counts from 3: {1: 11, 2: 11, 4: 11}
Transition counts from 4: {1: 11, 2: 11, 3: 11}

Double Checking transition counts (with different method): 
Transition counts for 1: 
     There were 11 transitions to 2: 
     There were 11 transitions to 3: 
     There were 11 transitions to 4: 
Transition counts for 2: 
     There were 11 transitions to 1: 
     There were 11 transitions to 3: 
     Ther

In [7]:
print('The actual sequence:')
print('End: ', big_long_answer[-3:])
print('Start: ', big_long_answer[:1])
print(big_long_answer)

The actual sequence:
End:  [10, 11, 7]
Start:  [8]
[8, 3, 1, 2, 8, 4, 10, 5, 11, 3, 9, 5, 6, 9, 10, 2, 5, 3, 12, 1, 10, 4, 1, 10, 5, 2, 4, 8, 5, 6, 5, 1, 7, 1, 2, 12, 4, 7, 9, 12, 2, 8, 3, 12, 9, 6, 7, 10, 9, 8, 4, 2, 3, 6, 5, 8, 2, 1, 10, 5, 11, 8, 1, 4, 11, 3, 5, 9, 6, 4, 9, 6, 2, 8, 11, 8, 5, 3, 1, 8, 5, 2, 6, 3, 11, 12, 9, 10, 9, 12, 11, 1, 12, 9, 3, 1, 2, 6, 4, 6, 1, 5, 7, 6, 3, 11, 5, 1, 6, 8, 1, 5, 2, 8, 2, 6, 2, 8, 2, 8, 12, 10, 1, 12, 2, 11, 2, 9, 3, 6, 11, 2, 6, 9, 2, 1, 4, 2, 12, 1, 11, 2, 1, 2, 4, 10, 5, 1, 4, 2, 7, 8, 11, 4, 9, 11, 6, 5, 1, 12, 8, 10, 12, 9, 7, 9, 3, 2, 4, 5, 8, 10, 4, 11, 9, 2, 6, 12, 6, 1, 9, 5, 10, 6, 8, 9, 1, 8, 9, 5, 9, 10, 5, 4, 11, 7, 8, 12, 8, 6, 11, 10, 3, 12, 11, 7, 3, 7, 8, 5, 9, 8, 10, 8, 12, 1, 3, 10, 8, 7, 3, 11, 10, 9, 4, 5, 2, 12, 9, 8, 3, 9, 6, 12, 8, 4, 2, 11, 3, 1, 9, 3, 2, 3, 9, 7, 12, 4, 1, 7, 11, 3, 8, 5, 11, 2, 1, 9, 8, 11, 9, 8, 12, 11, 6, 10, 2, 3, 9, 1, 11, 5, 8, 5, 4, 1, 12, 10, 5, 12, 6, 11, 7, 3, 7, 2, 9, 10, 6, 11, 3, 5, 12, 5

## Saving sequence

In [5]:
user = os.getlogin()

path2save = f'/Users/{user}/Box Sync/willdecker/LSU Undergrad/Honors-Thesis/github/statistical_learning_sequencing/'
filename = 'index.csv'
x = pd.DataFrame(big_long_answer).to_csv(path2save + filename)

## Appending syllables

In [None]:
syls = ["di", "da", "du", "pi", "pa", "pu", "bi", "ba", "bu", "ti", "tu", "ta"]
syllist = []

for j in big_long_answer:
  for i in range(len(syls)):
    if i+1 == j:
      syllist.append(syls[i])


## Project specific sequences

In [9]:
import sl_sequencing as sls

# creating random sequence
r = sls.Sequence(total=792, vector_size=12).sequence()
inpt = {
    1: 'pi.wav',
    2: 'pu.wav',
    3: 'pa.wav',
    4: 'ti.wav',
    5: 'tu.wav',
    6: 'ta.wav',
    7: 'bi.wav',
    8: 'bu.wav',
    9: 'ba.wav',
    10: 'di.wav',
    11: 'du.wav',
    12: 'da.wav'
}

r.match(inpt=inpt)
r.save_match_csv('randseq.csv')

number of times tried: 1
Sequence achieved


In [4]:
from itertools import chain

# creating structured sequence
s = sls.Sequence(total=264, vector_size=4).sequence()
inpt2 = {
    1: ['pi.wav', 'tu.wav', 'bi.wav'],
    2: ['bu.wav', 'pa.wav', 'da.wav'],
    3: ['di.wav', 'ba.wav', 'pu.wav'],
    4: ['ta.wav', 'ti.wav', 'du.wav']
    
}

s.match(inpt=inpt2, unlist=True)
s.save_match_csv('strseq.csv')

number of times tried: 1
Sequence achieved
