In [40]:
import sys
import numpy as np

# Assuming your directory has these files:
# L (contains edge labels in order)
# I (bitvector for incoming edges, probably as a string of '0' and '1')
# O (bitvector for outgoing edges)
# nodes (mapping edges to nodes or vice versa)
# graph (some metadata or original graph information, optional)

def load_L(filename):
    # Assume L is stored as a single line of characters
    with open(filename, 'r') as f:
        L_str = f.read().strip()  # one string of characters
    return L_str

def load_bitvector(filename):
    # Assume I and O are stored as a string of '0'/'1' characters in a single line
    with open(filename, 'r') as f:
        bit_str = f.read().strip()
    return bit_str

def load_nodes(filename):
    # Assume nodes file might contain one node per line or a mapping of edges -> node
    # This will depend on your format. Let's assume a simple format: each line gives a node id for the corresponding edge index.
    with open(filename, 'r') as f:
        nodes = [line.strip() for line in f]
    return nodes


In [276]:
class WheelerGraphIndex:
    def __init__(self, L, I, O):
        self.L = L
        self.I = I
        self.O = O
        self.n = len(L)
        self.F = "".join(sorted(L))
        print(self.F)
        
        
        # Build C array (like first in FM-index)
        # C stores indices where a new character starts in F
        indices = [0]
        for i in range(1, len(self.F)):
            string = self.F
            if string[i] != string[i - 1]:
                indices.append(i)
        self.C = dict(zip(sorted(set(L)),indices))
#         print(self.C)
        for i, key in enumerate(self.C.keys()):
            if i == len(self.C) -1:
                self.C[key] = [self.C[key]]
                self.C[key].append(len(L) -1)
            else:
                self.C[key] = [self.C[key]]
                self.C[key].append(indices[i + 1] -1)
            
        
        print(self.C)
        
        
        def zeros_ones_helper(string): 
            # Get indices for 1s and 0s of I and O
            zeros = []
            ones = []
            for i in range(len(string)):
                if string[i] == "0":
                    zeros.append(i)
                else: 
                    ones.append(i)
            return zeros, ones
            
        self.I_zeros, self.I_ones = zeros_ones_helper(self.I)
        self.O_zeros, self.O_ones = zeros_ones_helper(self.O)
        
#         print(self.I_zeros)
#         print(self.I_ones)
#         print(self.O_zeros)
#         print(self.O_ones)
        
    def i_select_0(self, low, high):
        # Get positions of 0's corresponding to the characters in F
        
        return self.I_zeros[low], self.I_zeros[high]
        
    def i_rank_1(self, low, high):
        low_1, high_1 = -1, -1
        for i in range(len(self.I_ones)):
            if self.I_ones[i] < low:
                continue
            elif self.I_ones[i] > low and low_1 == -1:
                low_1 = i
            elif self.I_ones[i] > high and self.I_ones[i] == high + 1:
                high_1 = i
                break
       
        return low_1, high_1
        
    def o_select_1(self, low, high): 
        return self.O_ones[low], self.O_ones[high]
    
    def o_rank_0(self, low, high):
        low_0, high_0 = -1, -1
        for i in range(len(self.O_zeros)):
            if self.O_zeros[i] < low:
                continue
            elif self.O_zeros[i] == low + 1:
                low_0 = i
            elif self.O_zeros[i] == high - 1:
                high_0 = i
                break
        print(low_0)
        return low_0, high_0
    
    def l_rank_c(self, low, high, char): 
        
        indices = []
        counter = 0 
        for i in range(high + 1):
            if self.L[i] == char:
                if i >= low:
                    indices.append(counter)
                counter += 1
                
        return indices
                
    
    def backward_search(self,P):
        indices = []
        for i in range(len(P) - 1, -1, -1):
            c = P[i]

            if i == len(P) -1: 
                start, stop = self.C[c][0], self.C[c][1]
            elif len(indices) == 1: 
                start, stop = self.C[c][0] + indices[0], self.C[c][0] + indices[0]
            elif len(indices) > 1:
                start, stop = self.C[c][0] + indices[0], self.C[c][0] + indices[-1]
                
#             print(start,stop)

            I_low, I_high = self.i_select_0(start,stop)
#             print(I_low, I_high)
            I_start, I_stop = self.i_rank_1(I_low,I_high)
            print(I_start, I_stop)

            if I_start > 0:
                O_low,O_high = self.o_select_1(I_start -1, I_stop)
            else: 
                print("error")
                O_low,O_high = self.o_select_1(-1, I_stop)
            print(O_low, O_high)
            O_start, O_stop = self.o_rank_0(O_low, O_high)
            print(O_start, O_stop)

            if i > 0:
                if O_stop == -1:
                    O_stop = O_start
                print(P[i:len(P) - 1] + " found")
                indices = self.l_rank_c(O_start, O_stop, P[i-1])
                print(indices)
                
                if len(indices) == 0: 
                    return False
                
                if i == 1 and len(indices)> 0: 
                    return True
                
                                                
        return False
        
            


In [277]:
def main():
    # Adjust filenames as needed
    L_file = "./out__graph/L.txt"       # contains the edge labels in order
    I_file = "./out__graph/I.txt"       # bitvector of incoming edges
    O_file = "./out__graph/O.txt"       # bitvector of outgoing edges
    nodes_file = r"./out__graph/nodes.txt"# node data (optional depending on use case)
    
    L_file = "./out__rowboat/L.txt"       # contains the edge labels in order
    I_file = "./out__rowboat/I.txt"       # bitvector of incoming edges
    O_file = "./out__rowboat/O.txt"       # bitvector of outgoing edges
    nodes_file = r"./out__rowboat/nodes.txt"# node data (optional depending on use case)
#     L_file = "./out__DMPK/L.txt"       # contains the edge labels in order
#     I_file = "./out__DMPK/I.txt"       # bitvector of incoming edges
#     O_file = "./out__DMPK/O.txt"       # bitvector of outgoing edges
#     nodes_file = r"./out__DMPK/nodes.txt"# node data (optional depending on use case)

    # Load data
    L_str = load_L(L_file)
    I_str = load_bitvector(I_file)
    O_str = load_bitvector(O_file)
    nodes = load_nodes(nodes_file)  # if needed
    
    print(L_str)
    print(I_str)
    print(O_str)
#     print(nodes)

#     # Construct the WheelerGraphIndex
    wgi = WheelerGraphIndex(L_str, I_str, O_str)
    print("_boat"[::-1])
    print(wgi.backward_search("_boat"))
#     print("GCTCCCTCTCCTAGGACCCTCCCCCCAAAAG"[::-1])
#     print(wgi.backward_search("GCTCCCTCTCCTAGGACCCTCCCCCCAAAAG"[::-1]))

#     print("CCCCCTAGGACCCCCACCCCCGACCCTCGCGAAAAAA"[::-1])
#     print(wgi.backward_search("CCCCTAGGACCCCCACCCCCGACCCTCGCGAAA"[::-1]))

if __name__ == "__main__":
    main()

brrytwoawuoao_r__o
01010101010101001010101010101010101
01010010101010101010100101101010101
___aaboooorrrtuwwy
{'_': [0, 2], 'a': [3, 4], 'b': [5, 5], 'o': [6, 9], 'r': [10, 12], 't': [13, 13], 'u': [14, 14], 'w': [15, 16], 'y': [17, 17]}
taob_
12 -1
25 34
-1
-1 17
 found
[0, 1]
3 4
6 10
4
4 5
a found
[]
False
