In [132]:
import random
import copy

def _read(filename):
    with open(filename, 'r') as sqinput:
        sqinput = sqinput.read().splitlines()
    return sqinput

def Composition(k, text):
    return [text[i:k+i] for i in range(len(text)-k+1)]

def Overlap(strings):
    final = [strings[0]]
    final += [i[-1] for i in strings[1:]]
    return ''.join(final)

def OverlapGraph(strings):
    k = len(strings[0])
    prefs = [i[:k-1] for i in strings]
    suffs = [i[-(k-1):] for i in strings]

    dic = {}

    for z, suff in enumerate(suffs):
        dic[strings[z]] = []
        for x, pref in enumerate(prefs):
            if pref == suff:
                dic[strings[z]].append(strings[x])
    
    formatted = '\n'.join([f"{key} -> {','.join(values)}" for key, values in dic.items() if dic[key] != []])
    return dic, formatted
                
def DeBruijn(k, text):
    k -= 1
    all_mers = Composition(k-1, text)
    debruijn = {}

    for i in range(len(text)-k+1):
        preffix = text[i:k+i]
        if preffix in debruijn:
            debruijn[preffix] = [debruijn[preffix], text[i+1:i+k+1]]
        else:
            debruijn[preffix] = text[i+1:i+k+1]
    return debruijn

def MerDeBruijn(strings):
    preffs = [i[:-1] for i in strings]
    suffs = [i[1:] for i in strings]

    merdebruijn = {}
    for idx, i in enumerate(preffs):
        if i in merdebruijn:
            merdebruijn[i] = [merdebruijn[i], suffs[idx]]
        else:
            merdebruijn[i] = suffs[idx]
    return dict(sorted(merdebruijn.items()))


def EulerianCycle(strings, format=True):
    if format:
        graph = [i.split(' -> ') for i in strings]
        graph = dict(graph)
        for (key, val) in graph.items():
            val = val.split(',')
            graph[key] = val

    copy_graph = copy.deepcopy(dict(graph))
    
    l = 1
    values = []
    for val in copy_graph.values():
        for i in val:
            l += 1
            values.append(i)

    validate = True
    degrees = {key: [] for key in copy_graph.keys()}

    for key in degrees.keys():
        degrees[key].append(values.count(key))
        degrees[key].append(len(copy_graph[key]))
        degrees[key].append(degrees[key][1] - degrees[key][0])
    
    final_sequence = []
    cycle = []

    cn = random.choice([key for key in copy_graph.keys()])

    while len(final_sequence) != l:
        if copy_graph[cn] != []:
            cycle.append(cn)
            next_possibles = copy_graph[cn]
            new_cn = random.choice(next_possibles)
            copy_graph[cn].remove(new_cn)
            cn = new_cn
        elif copy_graph[cn] == []:
            final_sequence.insert(0, cn)
            if len(cycle) == 0:
                # final_sequence.append(final_sequence[0])
                break
            else:
                cn = cycle[-1]
                cycle.pop()
            
    return final_sequence


def EulerianPath(strings, format=True):
    if format:
        graph = [i.split(' -> ') for i in strings]
        graph = dict(graph)
        for (key, val) in graph.items():
            val = val.split(',')
            graph[key] = val
        copy_graph = copy.deepcopy(dict(graph))
    else:
        graph = strings
        copy_graph = copy.deepcopy(graph)
    
    l = 1
    values = []
    for val in copy_graph.values():
        for i in val:
            l += 1
            values.append(i)

    print(l)
    validate = True

    end = [val for val in values if val not in copy_graph.keys()]
    if end == []:
        pass
    else:
        copy_graph[end[0]] = []
    degrees = {key: [] for key in copy_graph.keys()}

    for key in degrees.keys():
        degrees[key].append(values.count(key))
        degrees[key].append(len(copy_graph[key]))
        degrees[key].append(degrees[key][1] - degrees[key][0])
    
    final_sequence = []
    cycle = []
    
    cn = [key for key in degrees.keys() if degrees[key][2] == abs(1)][0]

    while len(final_sequence) != l:
        if copy_graph[cn] != []:
            cycle.append(cn)
            next_possibles = copy_graph[cn]
            new_cn = random.choice(next_possibles)
            copy_graph[cn].remove(new_cn)
            cn = new_cn
        elif copy_graph[cn] == []:
            final_sequence.insert(0, cn)
            if len(cycle) == 0:
                break
            else:
                cn = cycle[-1]
                cycle.pop()
            
    return final_sequence


def DeBruijnPair(k, pairs):
    prefs = [str(i[0][:-1]) + str(i[1][:-1]) for i in pairs]
    suffs = [str(i[0][1:]) + str(i[1][1:]) for i in pairs]
    
    pairs = [[prefs[i], suffs[i]] for i in range(len(prefs))]
    pairdebruijn = {key[0]: [] for key in pairs}

    for pair in pairs:
        pairdebruijn[pair[0]].append(pair[1])
    
    for pref in prefs:
        for suff in suffs:
            if pref[1:int(len(pref)/2)] + pref[1+int(len(pref)/2):] == suff[:int(len(pref)/2)-1] + suff[int(len(pref)/2):-1]:
                if suff not in pairdebruijn[pref]:
                    pairdebruijn[pref].append(suff)

    return pairdebruijn

def Reconstruction(final, k, d):
    pref = ''.join([i[0] for i in final[:-1]]) + final[-1][:int(len(final[0])/2)]
    suff = ''.join([i[int(len(final[0])/2)] for i in final[:-1]]) + final[-1][int(len(final[0])/2):]

    return pref[:k+d] + suff



In [133]:
# txt = _read('dataset_203_99 (1).txt')
# final = EulerianCycle(txt)
# # print('->'.join(final))

# txt = _read('dataset_203_6 (1).txt')
# a = EulerianPath(txt)
# print('->'.join(a))

txt = _read('dataset_204_16 (2).txt')
params = txt[0].split(' ')
k = int(params[0])
d = int(params[1])
pairs = [i.split('|') for i in txt[1:]]
# pairs = Pairs(2, 1, txt)
pairdebruijn = DeBruijnPair(k, pairs)
paireulerian = EulerianPath(pairdebruijn, format=False)
final = Reconstruction(paireulerian, k, d)

with open('submission_paireulerianpath.txt', 'w') as f:
    f.write(final)


5702


In [74]:
def Pairs(k, d, text):
    pairs = []

    for i in range(len(text)-k+1):
        init = text[i:i+k]
        nex = text[i+k+d:i+(2*k)+d]
        if len(nex) != k:
            pass
        else:
            pairs.append([init, nex])

    return pairs

Pairs(3, 1, 'TAATGGGATGCCATGTT')

[['TAA', 'GGG'],
 ['AAT', 'GGA'],
 ['ATG', 'GAT'],
 ['TGG', 'ATG'],
 ['GGG', 'TGC'],
 ['GGA', 'GCC'],
 ['GAT', 'CCA'],
 ['ATG', 'CAT'],
 ['TGC', 'ATG'],
 ['GCC', 'TGT'],
 ['CCA', 'GTT']]

In [146]:
val = {'Terr': 12, 'Giga': 9, 'Mega':6, 'kilo':3, 'hecta':2, 'deca': 1, ' ': 0, 'deci': -1, 'centi': -2, 'milli':-3, 'micro':-6, 'nano':-9, 'pico':-12}
conversion_table = {'kilometers_miles': 0.62, 'kilometers_feet': 3280.8, 'centimeters_inch': 0.39, 'liters_gallons': 1.057, 'millimeters_cups': 0.0042, 'celsius_fahrenheit': (lambda x: x * (9/5)+32), 'kilogram_pounds': 2.2046}


intro = input('Hey user! Would you like to do a unit conversion (u) or a metric to imperial (i) conversion today?: ')

if intro == 'i':
    conversion = input(f'Please enter your preferred conversion. You have the following options: {", ".join(i for i in conversion_table.keys())}')
    direction = input('Would you like to convert from imperial to metric (i-m) or metric to imperial (m-i)?: ')
    constant = conversion_table[conversion]
    if direction == 'i-m': constant /= 1
    

elif intro == 'u':
    type_unit = input('Please enter your desired type of unit of measure (i.e. litres, meters, grams, etc.) *use an empty space for base unit: ')
    i = float(input('Please enter the absolute numerical value of your measurement (9, 15.0, 3.4): '))
    d = int(input('Please enter an integer representing your dimensions (2 is area, 3 is volume, etc.): '))

    print('Here are all of the possible unit prefixes: ', list(val.keys()))
    p1 = input('Please enter the unit it is in right now: ')
    p2 = input('Please enter the unit you want to convert to: ')


    def conversions(p1, p2, d, i):
        val = {'Terr': 12, 'Giga': 9, 'Mega':6, 'kilo':3, 'hecta':2, 'deca': 1, ' ': 0, 'deci': -1, 'centi': -2, 'milli':-3, 'micro':-6, 'nano':-9, 'pico':-12}
        p1_val = val[p1]
        p2_val = val[p2]
        return i*((10**(p1_val-p2_val))**d)

    print(f"{i}{p1}{type_unit}^{d} is {conversions(p1, p2, d, i)}{p2}{type_unit}^{d}")
else:
    print("Well, I guess that's bye from me.")

Here are all of the possible unit prefixes:  ['Terr', 'Giga', 'Mega', 'kilo', 'hecta', 'deca', ' ', 'deco', 'cent', 'milli', 'micro', 'nano', 'pico']
250.0centimeters^2 is 2.5000000000000004e-14Megameters^2
