In [1]:
import Levenshtein
import numpy as np
from sentence_getter import SentenceGetter
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import pickle
from keras.models import Model, Input
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional
from keras import optimizers
import kenlm
import pandas as pd
import seaborn as sns
import pylab as pl
from sklearn.linear_model import BayesianRidge
from sklearn.metrics import r2_score
import heapq
from collections import defaultdict 

def get_model(max_len, n_words, n_tags, embedding_mat):
    input = Input(shape=(max_len,))
    model = Embedding(input_dim=n_words, weights=[embedding_mat], output_dim=50, input_length=max_len)(input)
    model = Dropout(0.1)(model)
    model = Bidirectional(LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model)
    out = TimeDistributed(Dense(n_tags, activation="softmax"))(model)  # softmax output layer
    model = Model(input, out)
    return model


def get_embedding_matrix(embeddings_path, word2idx):
    embedding_vectors = {}
    with open(embeddings_path, 'r') as f:
        for line in f:
            line_split = line.strip().split(" ")
            vec = np.array(line_split[1:], dtype=float)
            char = line_split[0]
            embedding_vectors[char] = vec

    embedding_matrix = np.zeros((len(word2idx), 50))
    for char in word2idx:
        embedding_vector = embedding_vectors.get(char)
        if embedding_vector is not None:
            embedding_matrix[word2idx[char]] = embedding_vector
    return embedding_matrix


def get_word(X, y, words, tags):
    ans = ""
    for i, ch in enumerate(X):
        if tags[y[i]] == "C":
            ans += words[ch]
    return ans

def get_word2(word,tag_seq, words):
    ans = ""
    for i in range(len(word)):
        if tag_seq[i]=='C':
            ans+=words[word[i]]
    return ans

def score_candidate_length(c, example,length_model):
    import scipy.stats
    needed_length, needed_length_std=length_model.predict(np.array([[len(example)]]), return_std=True)
    needed_length=needed_length[0]
    needed_length_std=needed_length_std[0]
    clength=len(c)
    #print "NL:{} CL:{}".format(needed_length, clength)
    return scipy.stats.norm.logpdf(clength, loc=needed_length, scale=needed_length_std)-scipy.stats.norm.logpdf(needed_length, loc=needed_length, scale=needed_length_std)    

Using TensorFlow backend.


In [2]:
     
def updatestr(s,i,ch):  
    list1 = list(s)
    list1[i] = ch
    str1 = ''.join(list1)
    return str1

def getTopk(m,k):
    mapping = {0:'C',1:'D',2:'O'}
    r_mapping = {'C':0, 'D':1, 'O':2}
    best_seq = ""
    best_prob = 1.0
    best_idx = np.argmax(m, axis=-1)
    for i in range(30):
        best_seq += mapping[best_idx[i]]
        best_prob *= m[i][best_idx[i]]
    heap = [(-1*best_prob,best_seq)]
    heapq.heapify(heap)
    
    result = []
    added = set()
    while k>0:
        top = heapq.heappop(heap)
        result += [(top[0]*-1,top[1])]
        added.add(top[1])
        k-=1
        prob = -1*top[0]
        seq = top[1]
        curr_prob = prob
        curr_seq = seq
        for i in range(30):
            for j in range(3):
                curr_seq = updatestr(curr_seq,i,mapping[j])
                if curr_seq in added:
                    continue
                curr_prob = prob*m[i][j]/m[i][r_mapping[seq[i]]]
                heapq.heappush(heap,(-1*curr_prob,curr_seq))
                curr_seq = seq
    return result
    

In [3]:
def get_len(row):
    from collections import Counter
    return Counter(list(row))['C']
df = pd.read_csv('./data/components-blends-knight.csv',sep='\t',index_col=0)
df["slen"]=df.source.apply(len)
df["tlen"]=df.target.apply(get_len)
df["ratio"]=df["slen"]/df["tlen"]
len_model = BayesianRidge(verbose=True, compute_score=True)
X=df["slen"].values.reshape(-1,1)
y=df["tlen"].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(X, y)
len_model.fit(X_train, y_train)
# r2_score(y_test,len_model.predict(X_test))

Convergence after  1  iterations


BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=True, copy_X=True,
              fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
              normalize=False, tol=0.001, verbose=True)

In [4]:
kfold = 3
X_tr = []
X_te = []
y_tr = [] 
y_te = []

for i in range(kfold):
    data = pickle.load(open("./data/df_lstm.pkl", "rb"))

    embeddings_path = "./data/pretrained_char_emb.txt"

    words = list(set(data["Word"].values))
    words.append("$")
    n_words = len(words)
    tags = list(set(data["Tag"].values))
    tags.append("O")
    n_tags = len(tags)
    getter = SentenceGetter(data)
    sentences = getter.sentences
    max_len = 30
    word2idx = {w: i for i, w in enumerate(words)}
    tag2idx = {t: i for i, t in enumerate(tags)}

    embedding_mat = get_embedding_matrix(embeddings_path, word2idx)

    X = [[word2idx[w[0]] for w in s] for s in sentences]
    X = pad_sequences(maxlen=max_len, sequences=X, padding="post", value=n_words - 1)
    y = [[tag2idx[w[1]] for w in s] for s in sentences]
    y = pad_sequences(maxlen=max_len, sequences=y, padding="post", value=tag2idx["O"])
    y = [to_categorical(i, num_classes=n_tags) for i in y]
    X_tr_t, X_te_t, y_tr_t, y_te_t = train_test_split(X, y, test_size=0.1)
    X_tr += [X_tr_t]
    X_te += [X_te_t]
    y_tr += [y_tr_t]
    y_te += [y_te_t]

In [5]:
models = []
lmodel = kenlm.Model('./data/wordlist_english_filtered_threshold100-kenlm.arpa')
phoneme_model = kenlm.Model('./data/cmudict-ud.arpa')
def phoneme_score(s):
    return phoneme_model.score(s)
def phoneme_len(s):
    return len(s.split(' '))
df = pd.read_csv('./seq2seq_output/global_candidates.phonemes', sep='\t', skiprows=3, names=['word','phonemes'])
df=df.dropna()
df["score"]=df["phonemes"].apply(phoneme_score)
df["len"]=df["phonemes"].apply(phoneme_len)
df["nscore"]=df["score"]/(df["len"]**0.4)
df.sort_values(by='nscore', ascending=True)
phoneme_score_dict=dict(zip(df.word.values, df.nscore.values))

for i in range(kfold):
    model = get_model(max_len, n_words, n_tags, embedding_mat)
    model.compile(optimizer="rmsprop", loss="mse", metrics=["accuracy"])
    history = model.fit(X_tr[i], np.array(y_tr[i]), batch_size=32, epochs=10, validation_split=0.1, verbose=0)
    models += [model]
    print("model ",i+1," trained.")

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


model  1  trained.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


model  2  trained.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


model  3  trained.


In [10]:
def getEditDistance(model,idx):
    preds = []
    true = []
    for i, test in enumerate(X_te[idx]):
        p = model.predict(np.array([X_te[idx][i]]))
        t = y_te[idx][i]
        predictions = getTopk(p[0],10)
#         print(predictions)
        candidates = [get_word2(X_te[idx][i],d[1], words) for d in predictions]
        m_scores=[lmodel.score(" ".join(c))/(float(len(" ".join(c)))) for c in candidates]
        phen_score = [phoneme_score_dict[c] if c in phoneme_score_dict else 0.0 for c in candidates]
        input_len = [p[1].index('O') if 'O' in p[1] else 30 for p in predictions]
        lstm_len = [(p[1].index('O') - p[1].count('D')) for p in predictions]
        len_score = [1/(1+((i-l)/i)) for l,i in zip(lstm_len,input_len)]
        for j in range(len(m_scores)):
            m_scores[j]= m_scores[j]/7 +  predictions[j][0] + len_score[j]/9
        
        max_idx=-1
        max_val = -99999
        for ele in enumerate(m_scores):
            if ele[1]>max_val:
                max_val = ele[1]
                max_idx = ele[0]
        preds.append(candidates[max_idx])
    #     p = np.argmax(p, axis=-1)
        t = np.argmax(t, axis=-1)
    #     preds.append(get_word(X_te[i], p[0], words, tags))
        true.append(get_word(X_te[idx][i], t, words, tags))

    distance = 0
    for i,word in enumerate(true):
        distance += Levenshtein.distance(word, preds[i])
    acc = distance / len(preds)
    return acc

In [11]:
avg_edit = 0
for i in range(kfold):
    res = getEditDistance(models[i],i)
    avg_edit += res
avg_edit/kfold

[(0.039537584143026876, 'CCCCDDDDDDCCCCCCOOOOOOOOOOOOOO'), (0.031741111997178606, 'CCCCCDDDDDCCCCCCOOOOOOOOOOOOOO'), (0.024797858160919947, 'CCCCDDDDDCCCCCCCOOOOOOOOOOOOOO'), (0.019907933431859166, 'CCCCCDDDDCCCCCCCOOOOOOOOOOOOOO'), (0.019907933431859162, 'CCCCCDDDDCCCCCCCOOOOOOOOOOOOOO'), (0.019009320240556026, 'CCCCDDDDDDDCCCCCOOOOOOOOOOOOOO'), (0.01554295045639191, 'CCCDDDDDDDCCCCCCOOOOOOOOOOOOOO'), (0.015260845492304538, 'CCCCCDDDDDDCCCCCOOOOOOOOOOOOOO'), (0.015260845492304536, 'CCCCCDDDDDDCCCCCOOOOOOOOOOOOOO'), (0.012478014069302835, 'CCCDCDDDDDCCCCCCOOOOOOOOOOOOOO')]
[(0.04924524395708485, 'CCCCDDDDDDCCCOOOOOOOOOOOOOOOOO'), (0.041415939634830236, 'CCCCDDDDDCCCCOOOOOOOOOOOOOOOOO'), (0.03432871735294118, 'CCCDDDDDDDCCCOOOOOOOOOOOOOOOOO'), (0.03387440444655178, 'CCCCCDDDDDCCCOOOOOOOOOOOOOOOOO'), (0.02887093191922374, 'CCCDDDDDDCCCCOOOOOOOOOOOOOOOOO'), (0.02887093191922374, 'CCCDDDDDDCCCCOOOOOOOOOOOOOOOOO'), (0.028488848404260456, 'CCCCCDDDDCCCCOOOOOOOOOOOOOOOOO'), (0.028488848404260

[(0.024392303515333887, 'CCCCDDDDDDDCCCCCOOOOOOOOOOOOOO'), (0.023069041420965185, 'CCCCDDDDDDCCCCCCOOOOOOOOOOOOOO'), (0.021736831930168923, 'CCCCCDDDDDDCCCCCOOOOOOOOOOOOOO'), (0.020557626951566797, 'CCCCCDDDDDCCCCCCOOOOOOOOOOOOOO'), (0.020557626951566797, 'CCCCCDDDDDCCCCCCOOOOOOOOOOOOOO'), (0.012013969622464002, 'CCCDDDDDDDDCCCCCOOOOOOOOOOOOOO'), (0.011546051917292412, 'CCCCDCDDDDDCCCCCOOOOOOOOOOOOOO'), (0.01136222180396421, 'CCCDDDDDDDCCCCCCOOOOOOOOOOOOOO'), (0.011362221803964208, 'CCCDDDDDDDCCCCCCOOOOOOOOOOOOOO'), (0.010919688243514676, 'CCCCDCDDDDCCCCCCOOOOOOOOOOOOOO')]
[(0.045864952235505366, 'CCCDDDDCCCCCCCCOOOOOOOOOOOOOOO'), (0.03137981634801458, 'CCCDDDCCCCCCCCCOOOOOOOOOOOOOOO'), (0.0301604914113407, 'CCCCDDDCCCCCCCCOOOOOOOOOOOOOOO'), (0.024744916127426565, 'CCCDDDDDCCCCCCCOOOOOOOOOOOOOOO'), (0.022615787807917447, 'CCDDDDDCCCCCCCCOOOOOOOOOOOOOOO'), (0.020635161170430332, 'CCCCDDCCCCCCCCCOOOOOOOOOOOOOOO'), (0.020635161170430332, 'CCCCDDCCCCCCCCCOOOOOOOOOOOOOOO'), (0.0169299407451

[(0.008628432800951363, 'CCCCCDDDDDDDDCCCCCCOOOOOOOOOOO'), (0.008061409323261146, 'CCCCCDDDDDDDCCCCCCCOOOOOOOOOOO'), (0.006232679902175469, 'CCCCCCDDDDDDDCCCCCCOOOOOOOOOOO'), (0.006154560800715444, 'CCCCDDDDDDDDDCCCCCCOOOOOOOOOOO'), (0.0058230949966672765, 'CCCCCCDDDDDDCCCCCCCOOOOOOOOOOO'), (0.0058230949966672765, 'CCCCCCDDDDDDCCCCCCCOOOOOOOOOOO'), (0.005750109546428249, 'CCCCDDDDDDDDCCCCCCCOOOOOOOOOOO'), (0.005750109546428248, 'CCCCDDDDDDDDCCCCCCCOOOOOOOOOOO'), (0.005683455185810367, 'CCCCCDDDDDDDDDCCCCCOOOOOOOOOOO'), (0.0053099629654850995, 'CCCCCDDDDDDDCDCCCCCOOOOOOOOOOO')]
[(0.13804286819960673, 'CCCDDCCCCCCCOOOOOOOOOOOOOOOOOO'), (0.0890391011549094, 'CCDDDCCCCCCCOOOOOOOOOOOOOOOOOO'), (0.07285343250591084, 'CCCDDDCCCCCCOOOOOOOOOOOOOOOOOO'), (0.06256005292585769, 'CCCDCCCCCCCCOOOOOOOOOOOOOOOOOO'), (0.04699122983301386, 'CCDDDDCCCCCCOOOOOOOOOOOOOOOOOO'), (0.046991229833013855, 'CCDDDDCCCCCCOOOOOOOOOOOOOOOOOO'), (0.040351891795434296, 'CCDDCCCCCCCCOOOOOOOOOOOOOOOOOO'), (0.040351891795

[(0.07375586005363877, 'CCCCDDDCCCCOOOOOOOOOOOOOOOOOOO'), (0.0524747710954008, 'CCCDDDDCCCCOOOOOOOOOOOOOOOOOOO'), (0.03948508270956945, 'CCCCDDDDCCCOOOOOOOOOOOOOOOOOOO'), (0.032729616057516626, 'CCDCDDDCCCCOOOOOOOOOOOOOOOOOOO'), (0.028092285485665676, 'CCCDDDDDCCCOOOOOOOOOOOOOOOOOOO'), (0.02809228548566567, 'CCCDDDDDCCCOOOOOOOOOOOOOOOOOOO'), (0.027763834004244196, 'CCCCDDCCCCCOOOOOOOOOOOOOOOOOOO'), (0.025579320601577582, 'CCCCCDDCCCCOOOOOOOOOOOOOOOOOOO'), (0.02328600208050597, 'CCDDDDDCCCCOOOOOOOOOOOOOOOOOOO'), (0.02328600208050597, 'CCDDDDDCCCCOOOOOOOOOOOOOOOOOOO')]
[(0.09030944961942303, 'CCCDDDCCCCCCCOOOOOOOOOOOOOOOOO'), (0.07084675005758191, 'CCCDDCCCCCCCCOOOOOOOOOOOOOOOOO'), (0.06048910578392463, 'CCDDDDCCCCCCCOOOOOOOOOOOOOOOOO'), (0.04745302486882457, 'CCDDDCCCCCCCCOOOOOOOOOOOOOOOOO'), (0.047453024868824564, 'CCDDDCCCCCCCCOOOOOOOOOOOOOOOOO'), (0.032156148127486545, 'CCCDCDCCCCCCCOOOOOOOOOOOOOOOOO'), (0.03110117379340202, 'CCCDDDDCCCCCCOOOOOOOOOOOOOOOOO'), (0.026519520673368855, '

[(0.043587253950390016, 'CCCDDDDDDCCCCCCOOOOOOOOOOOOOOO'), (0.041731469128643374, 'CCCDDDDDCCCCCCCOOOOOOOOOOOOOOO'), (0.03500485010087418, 'CCCCDDDDDCCCCCCOOOOOOOOOOOOOOO'), (0.033514472441876525, 'CCCCDDDDCCCCCCCOOOOOOOOOOOOOOO'), (0.033514472441876525, 'CCCCDDDDCCCCCCCOOOOOOOOOOOOOOO'), (0.02377888067968613, 'CCCDDDDDDDCCCCCOOOOOOOOOOOOOOO'), (0.022766463474103233, 'CCCDDDDDCDCCCCCOOOOOOOOOOOOOOO'), (0.02276646347410323, 'CCCDDDDDCDCCCCCOOOOOOOOOOOOOOO'), (0.019096778950708323, 'CCCCDDDDDDCCCCCOOOOOOOOOOOOOOO'), (0.019096778950708316, 'CCCCDDDDDDCCCCCOOOOOOOOOOOOOOO')]
[(0.06577441167737416, 'CCCCDDDDCCCCCOOOOOOOOOOOOOOOOO'), (0.05192066482539047, 'CCCCDDDDDCCCCOOOOOOOOOOOOOOOOO'), (0.04195502288389894, 'CCCCCDDDCCCCCOOOOOOOOOOOOOOOOO'), (0.03311823892217088, 'CCCCCDDDDCCCCOOOOOOOOOOOOOOOOO'), (0.033118238922170876, 'CCCCCDDDDCCCCOOOOOOOOOOOOOOOOO'), (0.026208356252863017, 'CCCDDDDDCCCCCOOOOOOOOOOOOOOOOO'), (0.020688216677693456, 'CCCDDDDDDCCCCOOOOOOOOOOOOOOOOO'), (0.0206882166776934

[(0.10248229066246815, 'CCCDDDCCCCCCOOOOOOOOOOOOOOOOOO'), (0.09550280777705841, 'CCCDDCCCCCCCOOOOOOOOOOOOOOOOOO'), (0.06351548414023823, 'CCDDDDCCCCCCOOOOOOOOOOOOOOOOOO'), (0.05918980765848043, 'CCDDDCCCCCCCOOOOOOOOOOOOOOOOOO'), (0.05918980765848042, 'CCDDDCCCCCCCOOOOOOOOOOOOOOOOOO'), (0.03806350909569244, 'CCCDCDCCCCCCOOOOOOOOOOOOOOOOOO'), (0.03547122111525489, 'CCCDCCCCCCCCOOOOOOOOOOOOOOOOOO'), (0.035471221115254886, 'CCCDCCCCCCCCOOOOOOOOOOOOOOOOOO'), (0.02879776400283969, 'CDCDDDCCCCCCOOOOOOOOOOOOOOOOOO'), (0.026836512944763004, 'CDCDDCCCCCCCOOOOOOOOOOOOOOOOOO')]
[(0.0408226768636745, 'CCCDDDDDCCCCCCOOOOOOOOOOOOOOOO'), (0.03890397626364502, 'CCCCDDDDCCCCCCOOOOOOOOOOOOOOOO'), (0.03759391944963285, 'CCCDDDDDDCCCCCOOOOOOOOOOOOOOOO'), (0.03582697319948002, 'CCCCDDDDDCCCCCOOOOOOOOOOOOOOOO'), (0.035826973199480014, 'CCCCDDDDDCCCCCOOOOOOOOOOOOOOOO'), (0.015778840686000772, 'CCCDDDDDCDCCCCOOOOOOOOOOOOOOOO'), (0.015442218195468833, 'CCCDDDDCCCCCCCOOOOOOOOOOOOOOOO'), (0.015037221727667865, 'C

[(0.036384992163531964, 'CCCCDDDDDCCCCCCOOOOOOOOOOOOOOO'), (0.03457024783376577, 'CCCCDDDDCCCCCCCOOOOOOOOOOOOOOO'), (0.025876529423061748, 'CCCDDDDDDCCCCCCOOOOOOOOOOOOOOO'), (0.02458590704685039, 'CCCDDDDDCCCCCCCOOOOOOOOOOOOOOO'), (0.024585907046850385, 'CCCDDDDDCCCCCCCOOOOOOOOOOOOOOO'), (0.019818857660636277, 'CCCCDDDDDDCCCCCOOOOOOOOOOOOOOO'), (0.018830368796864273, 'CCCCDDDDCDCCCCCOOOOOOOOOOOOOOO'), (0.018830368796864273, 'CCCCDDDDCDCCCCCOOOOOOOOOOOOOOO'), (0.01409491174498427, 'CCCDDDDDDDCCCCCOOOOOOOOOOOOOOO'), (0.01409491174498427, 'CCCDDDDDDDCCCCCOOOOOOOOOOOOOOO')]
[(0.004295498355945863, 'CCCCCDDDDDDDDDCCCCCCOOOOOOOOOO'), (0.003977774591993582, 'CCCCCDDDDDDDDCCCCCCCOOOOOOOOOO'), (0.003651826346757823, 'CCCCCDDDDDDDCDCCCCCCOOOOOOOOOO'), (0.0033817128660749715, 'CCCCCDDDDDDDCCCCCCCCOOOOOOOOOO'), (0.003381712866074971, 'CCCCCDDDDDDDCCCCCCCCOOOOOOOOOO'), (0.0027171920760411883, 'CCCCDDDDDDDDDDCCCCCCOOOOOOOOOO'), (0.0026982123032849256, 'CCCCCCDDDDDDDDCCCCCCOOOOOOOOOO'), (0.0025162103

[(0.01672547546915798, 'CCCCCDDDDDDDCCCCCCCOOOOOOOOOOO'), (0.014030299795859508, 'CCCCDDDDDDDDCCCCCCCOOOOOOOOOOO'), (0.012494237800754055, 'CCCCCDDDDDDCCCCCCCCOOOOOOOOOOO'), (0.010480892001461583, 'CCCCDDDDDDDCCCCCCCCOOOOOOOOOOO'), (0.010480892001461581, 'CCCCDDDDDDDCCCCCCCCOOOOOOOOOOO'), (0.010032503306499903, 'CCCCCCDDDDDDCCCCCCCOOOOOOOOOOO'), (0.008607684058435717, 'CCCCCDDDDDDDDCCCCCCOOOOOOOOOOO'), (0.008415846195386621, 'CCCCDCDDDDDDCCCCCCCOOOOOOOOOOO'), (0.00841584619538662, 'CCCCDCDDDDDDCCCCCCCOOOOOOOOOOO'), (0.007494464493963449, 'CCCCCCDDDDDCCCCCCCCOOOOOOOOOOO')]
[(0.015540911962600596, 'CCCCDDDDDCCCCCCCOOOOOOOOOOOOOO'), (0.01533088175916605, 'CCCDDDDDDCCCCCCCOOOOOOOOOOOOOO'), (0.014711843655606405, 'CCCCDDDCDCCCCCCCOOOOOOOOOOOOOO'), (0.014513018031774277, 'CCCDDDDCDCCCCCCCOOOOOOOOOOOOOO'), (0.014513018031774275, 'CCCDDDDCDCCCCCCCOOOOOOOOOOOOOO'), (0.013158086169777062, 'CCCCDDDDCCCCCCCCOOOOOOOOOOOOOO'), (0.012980259056304044, 'CCCDDDDDCCCCCCCCOOOOOOOOOOOOOO'), (0.012980259056

[(0.041635097510498625, 'CCCCDDDDDDDCCCOOOOOOOOOOOOOOOO'), (0.03271541909299563, 'CCCCDDDDDDDDCCOOOOOOOOOOOOOOOO'), (0.027590800664126596, 'CCCCCDDDDDDCCCOOOOOOOOOOOOOOOO'), (0.02167989654907364, 'CCCCCDDDDDDDCCOOOOOOOOOOOOOOOO'), (0.02167989654907364, 'CCCCCDDDDDDDCCOOOOOOOOOOOOOOOO'), (0.018664380510298815, 'CCCDDDDDDDDCCCOOOOOOOOOOOOOOOO'), (0.014665824436981188, 'CCCDDDDDDDDDCCOOOOOOOOOOOOOOOO'), (0.014665824436981187, 'CCCDDDDDDDDDCCOOOOOOOOOOOOOOOO'), (0.014095789012880482, 'CCCCDCDDDDDCCCOOOOOOOOOOOOOOOO'), (0.013485397890272878, 'CCCCDDDDDDCCCCOOOOOOOOOOOOOOOO')]
[(0.026509459019208755, 'CCCDDDDDDCCCCCCOOOOOOOOOOOOOOO'), (0.023105050850119657, 'CCCDDDDDDDCCCCCOOOOOOOOOOOOOOO'), (0.015475024579994612, 'CCDDDDDDDCCCCCCOOOOOOOOOOOOOOO'), (0.014581369120978982, 'CCCCDDDDDCCCCCCOOOOOOOOOOOOOOO'), (0.013487684888950222, 'CCDDDDDDDDCCCCCOOOOOOOOOOOOOOO'), (0.01348768488895022, 'CCDDDDDDDDCCCCCOOOOOOOOOOOOOOO'), (0.012708794802657566, 'CCCCDDDDDDCCCCCOOOOOOOOOOOOOOO'), (0.0127087948026

[(0.03508872711399004, 'CCCDDDDDDCCCOOOOOOOOOOOOOOOOOO'), (0.03298597027145785, 'CCCCDDDDDCCCOOOOOOOOOOOOOOOOOO'), (0.028726112507574536, 'CCCDDDDDCCCCOOOOOOOOOOOOOOOOOO'), (0.02700464710820505, 'CCCCDDDDCCCCOOOOOOOOOOOOOOOOOO'), (0.02700464710820505, 'CCCCDDDDCCCCOOOOOOOOOOOOOOOOOO'), (0.024310926596321215, 'CCDDDDDDDCCCOOOOOOOOOOOOOOOOOO'), (0.022854049375251248, 'CCDCDDDDDCCCOOOOOOOOOOOOOOOOOO'), (0.022854049375251245, 'CCDCDDDDDCCCOOOOOOOOOOOOOOOOOO'), (0.019902643099608774, 'CCDDDDDDCCCCOOOOOOOOOOOOOOOOOO'), (0.019902643099608774, 'CCDDDDDDCCCCOOOOOOOOOOOOOOOOOO')]
[(0.046113930201986016, 'CCCDDDDDDCCCCCOOOOOOOOOOOOOOOO'), (0.04290197924372284, 'CCCDDDDDCCCCCCOOOOOOOOOOOOOOOO'), (0.031923723408918296, 'CCCCDDDDDCCCCCOOOOOOOOOOOOOOOO'), (0.029700155963127535, 'CCCCDDDDCCCCCCOOOOOOOOOOOOOOOO'), (0.029700155963127528, 'CCCCDDDDCCCCCCOOOOOOOOOOOOOOOO'), (0.025575330454457975, 'CCDDDDDDDCCCCCOOOOOOOOOOOOOOOO'), (0.023793944508795167, 'CCDDDDDDCCCCCCOOOOOOOOOOOOOOOO'), (0.02379394450879

[(0.04455003195367363, 'CCCCDDDDDDDDCCCCOOOOOOOOOOOOOO'), (0.03186347822528262, 'CCCCCDDDDDDDCCCCOOOOOOOOOOOOOO'), (0.02263833363012828, 'CCCCDDDDDDDDDCCCOOOOOOOOOOOOOO'), (0.018996227636832973, 'CCCDDDDDDDDDCCCCOOOOOOOOOOOOOO'), (0.016605660826756637, 'CCCCDDDDDDDCCCCCOOOOOOOOOOOOOO'), (0.016191594462387222, 'CCCCCDDDDDDDDCCCOOOOOOOOOOOOOO'), (0.016191594462387222, 'CCCCCDDDDDDDDCCCOOOOOOOOOOOOOO'), (0.013586654355223802, 'CCCDCDDDDDDDCCCCOOOOOOOOOOOOOO'), (0.013586654355223802, 'CCCDCDDDDDDDCCCCOOOOOOOOOOOOOO'), (0.013402871015019899, 'CCCCDDDDDDDDCDCCOOOOOOOOOOOOOO')]
[(0.03447744392121112, 'CCCDDDDDDDDCCCCOOOOOOOOOOOOOOO'), (0.03149267486907396, 'CCCDDDDDDDDDCCCOOOOOOOOOOOOOOO'), (0.020800203955701414, 'CCCDDDDDDDCCCCCOOOOOOOOOOOOOOO'), (0.020027479352260208, 'CCCCDDDDDDDCCCCOOOOOOOOOOOOOOO'), (0.019150110312099452, 'CCDDDDDDDDDCCCCOOOOOOOOOOOOOOO'), (0.018999496072976862, 'CCCDDDDDDDCDCCCOOOOOOOOOOOOOOO'), (0.018999496072976862, 'CCCDDDDDDDCDCCCOOOOOOOOOOOOOOO'), (0.01829366750995

[(0.008534685865001952, 'CCCCDDDDDDDDCCCCCCOOOOOOOOOOOO'), (0.00828353810965884, 'CCCCDDDDDDDCCCCCCCOOOOOOOOOOOO'), (0.006281580501886981, 'CCCDDDDDDDDDCCCCCCOOOOOOOOOOOO'), (0.006096734232439005, 'CCCDDDDDDDDCCCCCCCOOOOOOOOOOOO'), (0.006096734232439005, 'CCCDDDDDDDDCCCCCCCOOOOOOOOOOOO'), (0.005786808773678278, 'CCCCCDDDDDDDCCCCCCOOOOOOOOOOOO'), (0.0056165220100999206, 'CCCCCDDDDDDCCCCCCCOOOOOOOOOOOO'), (0.0056165220100999206, 'CCCCCDDDDDDCCCCCCCOOOOOOOOOOOO'), (0.005447215345837528, 'CCCCDDDDDDDDDCCCCCOOOOOOOOOOOO'), (0.0052869217007500595, 'CCCCDDDDDDDCDCCCCCOOOOOOOOOOOO')]
[(0.04719656836862697, 'CCCDDDDDDCCCCOOOOOOOOOOOOOOOOO'), (0.040530192661989956, 'CCDDDDDDDCCCCOOOOOOOOOOOOOOOOO'), (0.037675464574643665, 'CCCDDDDDCCCCCOOOOOOOOOOOOOOOOO'), (0.032353916621940794, 'CCDDDDDDCCCCCOOOOOOOOOOOOOOOOO'), (0.032353916621940794, 'CCDDDDDDCCCCCOOOOOOOOOOOOOOOOO'), (0.02419271539019496, 'CCCCDDDDDCCCCOOOOOOOOOOOOOOOOO'), (0.020775565886969945, 'CCDCDDDDDCCCCOOOOOOOOOOOOOOOOO'), (0.020775565

[(0.028979417278766365, 'CCCCDDDDDDDDCCOOOOOOOOOOOOOOOO'), (0.025592653963857025, 'CCCCCDDDDDDDCCOOOOOOOOOOOOOOOO'), (0.022038157514944378, 'CCCCDDDDDDDCCCOOOOOOOOOOOOOOOO'), (0.019462604573978394, 'CCCCCDDDDDDCCCOOOOOOOOOOOOOOOO'), (0.01946260457397839, 'CCCCCDDDDDDCCCOOOOOOOOOOOOOOOO'), (0.013580156484774423, 'CCCCDCDDDDDDCCOOOOOOOOOOOOOOOO'), (0.013043909075824832, 'CCCDDDDDDDDDCCOOOOOOOOOOOOOOOO'), (0.011993072267347396, 'CCCCCCDDDDDDCCOOOOOOOOOOOOOOOO'), (0.011993072267347395, 'CCCCCCDDDDDDCCOOOOOOOOOOOOOOOO'), (0.011519494960935595, 'CCCDCDDDDDDDCCOOOOOOOOOOOOOOOO')]
[(0.05124733840687443, 'CCCDDDDCCCCCCOOOOOOOOOOOOOOOOO'), (0.03668980214305271, 'CCCDDDCCCCCCCOOOOOOOOOOOOOOOOO'), (0.034907325358464206, 'CCCDDDDDCCCCCOOOOOOOOOOOOOOOOO'), (0.03314653847433932, 'CCDDDDDCCCCCCOOOOOOOOOOOOOOOOO'), (0.024991402491517812, 'CCCDDDCDCCCCCOOOOOOOOOOOOOOOOO'), (0.024991402491517812, 'CCCDDDCDCCCCCOOOOOOOOOOOOOOOOO'), (0.023730792196370108, 'CCDDDDCCCCCCCOOOOOOOOOOOOOOOOO'), (0.0237307921963

[(0.09567150466995553, 'CCDDDCCCCCOOOOOOOOOOOOOOOOOOOO'), (0.06858415177278136, 'CCDDDDCCCCOOOOOOOOOOOOOOOOOOOO'), (0.05777256368317899, 'CCCDDCCCCCOOOOOOOOOOOOOOOOOOOO'), (0.04141549032409157, 'CCCDDDCCCCOOOOOOOOOOOOOOOOOOOO'), (0.041415490324091565, 'CCCDDDCCCCOOOOOOOOOOOOOOOOOOOO'), (0.03792204192160377, 'CCDDCCCCCCOOOOOOOOOOOOOOOOOOOO'), (0.02893209967988311, 'DCDDDCCCCCOOOOOOOOOOOOOOOOOOOO'), (0.027185221844867838, 'CCDDCDCCCCOOOOOOOOOOOOOOOOOOOO'), (0.027185221844867834, 'CCDDCDCCCCOOOOOOOOOOOOOOOOOOOO'), (0.022899750447847277, 'CCCDCCCCCCOOOOOOOOOOOOOOOOOOOO')]
[(0.03390052628343686, 'CCCDDDDDDDCCCCOOOOOOOOOOOOOOOO'), (0.032772667708946976, 'CCCDDDDDDCCCCCOOOOOOOOOOOOOOOO'), (0.030442082232637325, 'CCCCDDDDDDCCCCOOOOOOOOOOOOOOOO'), (0.029429284874143775, 'CCCCDDDDDCCCCCOOOOOOOOOOOOOOOO'), (0.029429284874143775, 'CCCCDDDDDCCCCCOOOOOOOOOOOOOOOO'), (0.013873018570275168, 'CCDDDDDDDDCCCCOOOOOOOOOOOOOOOO'), (0.013411468126552797, 'CCDDDDDDDCCCCCOOOOOOOOOOOOOOOO'), (0.0134114681265527

[(0.008337193265180117, 'CCCCDDDDDDDDDCCCCCCCOOOOOOOOOO'), (0.007330551324175063, 'CCCCDDDDDDDDDDCCCCCCOOOOOOOOOO'), (0.00657167984454822, 'CCCCDDDDDDDDCCCCCCCCOOOOOOOOOO'), (0.005843442848402033, 'CCCDDDDDDDDDDCCCCCCCOOOOOOOOOO'), (0.005778207947715937, 'CCCCDDDDDDDDCDCCCCCCOOOOOOOOOO'), (0.005778207947715936, 'CCCCDDDDDDDDCDCCCCCCOOOOOOOOOO'), (0.0051378990923715145, 'CCCDDDDDDDDDDDCCCCCCOOOOOOOOOO'), (0.005137899092371514, 'CCCDDDDDDDDDDDCCCCCCOOOOOOOOOO'), (0.004844499553989143, 'CCCCCDDDDDDDDCCCCCCCOOOOOOOOOO'), (0.004606014802366881, 'CCCDDDDDDDDDCCCCCCCCOOOOOOOOOO')]
[(0.019224581075208566, 'CCCDDDDDCCCCCCCCCOOOOOOOOOOOOO'), (0.016245654247257258, 'CCCDDDDCCCCCCCCCCOOOOOOOOOOOOO'), (0.013044081169476024, 'CCDDDDDDCCCCCCCCCOOOOOOOOOOOOO'), (0.011022847874991607, 'CCDDDDDCCCCCCCCCCOOOOOOOOOOOOO'), (0.011022847874991606, 'CCDDDDDCCCCCCCCCCOOOOOOOOOOOOO'), (0.010871496580467184, 'CCCDDDDDCDCCCCCCCOOOOOOOOOOOOO'), (0.009823086561189317, 'CCCDDDDDDCCCCCCCCOOOOOOOOOOOOO'), (0.009186914

[(0.037553735564409836, 'CCCCDDDDDCCCCCOOOOOOOOOOOOOOOO'), (0.03550028106727171, 'CCCDDDDDDCCCCCOOOOOOOOOOOOOOOO'), (0.031244335307266342, 'CCCCDDDDDDCCCCOOOOOOOOOOOOOOOO'), (0.02953588154407796, 'CCCDDDDDDDCCCCOOOOOOOOOOOOOOOO'), (0.029535881544077956, 'CCCDDDDDDDCCCCOOOOOOOOOOOOOOOO'), (0.028531954067043973, 'CCCCDDDDCCCCCCOOOOOOOOOOOOOOOO'), (0.026971814482777544, 'CCCDDDDDCCCCCCOOOOOOOOOOOOOOOO'), (0.026971814482777544, 'CCCDDDDDCCCCCCOOOOOOOOOOOOOOOO'), (0.023738302633389517, 'CCCCDDDDCDCCCCOOOOOOOOOOOOOOOO'), (0.023738302633389514, 'CCCCDDDDCDCCCCOOOOOOOOOOOOOOOO')]
[(0.08333065068270577, 'CCCDDDDCCCOOOOOOOOOOOOOOOOOOOO'), (0.0782760137081879, 'CCCDDDDDCCOOOOOOOOOOOOOOOOOOOO'), (0.041933407985064446, 'CCDDDDDCCCOOOOOOOOOOOOOOOOOOOO'), (0.039389828249008954, 'CCDDDDDDCCOOOOOOOOOOOOOOOOOOOO'), (0.039389828249008954, 'CCDDDDDDCCOOOOOOOOOOOOOOOOOOOO'), (0.039379179628307184, 'CCCCDDDCCCOOOOOOOOOOOOOOOOOOOO'), (0.036990533244957484, 'CCCCDDDDCCOOOOOOOOOOOOOOOOOOOO'), (0.03699053324495

[(0.07130671072896722, 'CCCDDDDCCCCCCOOOOOOOOOOOOOOOOO'), (0.05674473590901993, 'CCCDDDCCCCCCCOOOOOOOOOOOOOOOOO'), (0.051730378489992374, 'CCCDDCDCCCCCCOOOOOOOOOOOOOOOOO'), (0.041166204917874476, 'CCCDDCCCCCCCCOOOOOOOOOOOOOOOOO'), (0.04116620491787447, 'CCCDDCCCCCCCCOOOOOOOOOOOOOOOOO'), (0.03281069780492548, 'CCCDDDDDCCCCCOOOOOOOOOOOOOOOOO'), (0.026110226693920633, 'CCCDDDCDCCCCCOOOOOOOOOOOOOOOOO'), (0.02611022669392063, 'CCCDDDCDCCCCCOOOOOOOOOOOOOOOOO'), (0.025331100002692942, 'CCDDDDDCCCCCCOOOOOOOOOOOOOOOOO'), (0.023802946435447513, 'CCCDDCDDCCCCCOOOOOOOOOOOOOOOOO')]
[(0.08061542558278491, 'CCCDDDCCCCCOOOOOOOOOOOOOOOOOOO'), (0.0792445128247998, 'CCDDDDCCCCCOOOOOOOOOOOOOOOOOOO'), (0.04978406621782896, 'CCCDDCCCCCCOOOOOOOOOOOOOOOOOOO'), (0.04893745887154248, 'CCDDDCCCCCCOOOOOOOOOOOOOOOOOOO'), (0.04893745887154247, 'CCDDDCCCCCCOOOOOOOOOOOOOOOOOOO'), (0.038271704371028274, 'CCCDDDDCCCCOOOOOOOOOOOOOOOOOOO'), (0.037620871513510235, 'CCDDDDDCCCCOOOOOOOOOOOOOOOOOOO'), (0.03762087151351023, '

[(0.07592574622397992, 'CCDDDCCCCOOOOOOOOOOOOOOOOOOOOO'), (0.070187235935301, 'CCCDDCCCCOOOOOOOOOOOOOOOOOOOOO'), (0.053601797410112555, 'CCDDCCCCCOOOOOOOOOOOOOOOOOOOOO'), (0.049550543636165914, 'CCCDCCCCCOOOOOOOOOOOOOOOOOOOOO'), (0.04955054363616591, 'CCCDCCCCCOOOOOOOOOOOOOOOOOOOOO'), (0.04683909856057395, 'CDDDDCCCCOOOOOOOOOOOOOOOOOOOOO'), (0.04329897334126583, 'CDCDDCCCCOOOOOOOOOOOOOOOOOOOOO'), (0.04329897334126583, 'CDCDDCCCCOOOOOOOOOOOOOOOOOOOOO'), (0.03306730584523683, 'CDDDCCCCCOOOOOOOOOOOOOOOOOOOOO'), (0.03306730584523683, 'CDDDCCCCCOOOOOOOOOOOOOOOOOOOOO')]
[(0.017171595606487327, 'CCCCDDDDDDDCCCCCCOOOOOOOOOOOOO'), (0.012518317098113347, 'CCCCCDDDDDDCCCCCCOOOOOOOOOOOOO'), (0.011496965159142437, 'CCCCDDDDDDDDCCCCCOOOOOOOOOOOOO'), (0.010640581724440662, 'CCCCDDDDDDCCCCCCCOOOOOOOOOOOOO'), (0.008381437510311103, 'CCCCCDDDDDDDCCCCCOOOOOOOOOOOOO'), (0.008381437510311101, 'CCCCCDDDDDDDCCCCCOOOOOOOOOOOOO'), (0.00811053771080346, 'CCCCDDDDDCDCCCCCCOOOOOOOOOOOOO'), (0.008003230079941548, 

[(0.015730525131176756, 'CCCCCDDDDDDDDCCCCCOOOOOOOOOOOO'), (0.011480998814383539, 'CCCCCDDDDDDDDDCCCCOOOOOOOOOOOO'), (0.010685170981268227, 'CCCCDDDDDDDDDCCCCCOOOOOOOOOOOO'), (0.0078607241078512, 'CCCCCDDDDDDDCCCCCCOOOOOOOOOOOO'), (0.007798623017631506, 'CCCCDDDDDDDDDDCCCCOOOOOOOOOOOO'), (0.007798623017631506, 'CCCCDDDDDDDDDDCCCCOOOOOOOOOOOO'), (0.007270984624518465, 'CCCCCCDDDDDDDCCCCCOOOOOOOOOOOO'), (0.007140528684345059, 'CCCCCDDDDDDDDCDCCCOOOOOOOOOOOO'), (0.005947481247257798, 'CCCDCDDDDDDDDCCCCCOOOOOOOOOOOO'), (0.005737186992160157, 'CCCCCDDDDDDDCDCCCCOOOOOOOOOOOO')]
[(0.05450979104505616, 'CCCDDDDCCOOOOOOOOOOOOOOOOOOOOO'), (0.044857624604707116, 'CCCCDDDCCOOOOOOOOOOOOOOOOOOOOO'), (0.03803189932972234, 'CCCDDDCCCOOOOOOOOOOOOOOOOOOOOO'), (0.03129750876730662, 'CCCCDDCCCOOOOOOOOOOOOOOOOOOOOO'), (0.031297508767306616, 'CCCCDDCCCOOOOOOOOOOOOOOOOOOOOO'), (0.03013357775939328, 'CCDDDDDCCOOOOOOOOOOOOOOOOOOOOO'), (0.02518950311043657, 'CCCDDDDDCOOOOOOOOOOOOOOOOOOOOO'), (0.0247977600576440

ValueError: substring not found