In [1]:
import os
from Bio import SeqIO

In [1]:
def ucitaj_proteine(putanja_do_fajla):
    proteini_rnk = {}
    with open(putanja_do_fajla, "r") as fajl:
        for red in SeqIO.parse(fajl, "fasta"):
            proteini_rnk[red.description] = red.seq

    return proteini_rnk

In [3]:
putanja_do_fajla = "podaci/proteini/rnk.fasta"
proteini_rnk = ucitaj_proteine(putanja_do_fajla)

In [4]:
def edit_rastojanje(str1, str2):
    duzina_str1 = len(str1)
    duzina_str2 = len(str2)
    
    # Inicijalizujemo matricu za čuvanje rastojanja
    rastojanja = [[0] * (duzina_str2 + 1) for _ in range(duzina_str1 + 1)]
    
    # Inicijalizujemo prvi red i prvu kolonu
    for i in range(duzina_str1 + 1):
        rastojanja[i][0] = i
    for j in range(duzina_str2 + 1):
        rastojanja[0][j] = j
    
    # Popunjavamo matricu rastojanja
    for i in range(1, duzina_str1 + 1):
        for j in range(1, duzina_str2 + 1):
            if str1[i - 1] == str2[j - 1]:
                cena = 0
            else:
                cena = 1
            rastojanja[i][j] = min(rastojanja[i - 1][j] + 1,        # brisanje
                                   rastojanja[i][j - 1] + 1,        # ubacivanje
                                   rastojanja[i - 1][j - 1] + cena) # zamena
    
    return rastojanja[duzina_str1][duzina_str2]

In [5]:
# Izračunavanje edit rastojanja između svih parova proteina
edit_rastojanja = {}
for i in range(len(proteini_rnk) - 1):
    for j in range(i+1, len(proteini_rnk)):
        protein1, rnk1 = list(proteini_rnk.items())[i]
        protein2, rnk2 = list(proteini_rnk.items())[j]
     
        if protein1 != protein2:
            rastojanje = edit_rastojanje(rnk1, rnk2)
            edit_rastojanja[(protein1, protein2)] = rastojanje
            edit_rastojanja[(protein2, protein1)] = rastojanje
            print(f'{protein1.split(" | ")[1]} - {protein2.split(" | ")[1]} : {rastojanje}')

32 kDa non-structural protein [Bovine coronavirus] - hemmaglutinin-esterase [Bovine coronavirus] : 653
32 kDa non-structural protein [Bovine coronavirus] - spike structural protein [Bovine coronavirus] : 3255
32 kDa non-structural protein [Bovine coronavirus] - 4.9 kDa non-structural protein [Bovine coronavirus] : 747
32 kDa non-structural protein [Bovine coronavirus] - 4.8 kDa non-structural protein [Bovine coronavirus] : 699
32 kDa non-structural protein [Bovine coronavirus] - 12.7 kDa non-structural protein [Bovine coronavirus] : 530
32 kDa non-structural protein [Bovine coronavirus] - small membrane protein [Bovine coronavirus] : 586
32 kDa non-structural protein [Bovine coronavirus] - matrix protein [Bovine coronavirus] : 403
32 kDa non-structural protein [Bovine coronavirus] - nucleocapsid protein [Bovine coronavirus] : 709
32 kDa non-structural protein [Bovine coronavirus] - internal protein [Bovine coronavirus] : 425
32 kDa non-structural protein [Bovine coronavirus] - orf1ab p

In [6]:
with open('edit_rastojanja.txt', 'w') as f:
    for (protein1, protein2), rastojanje in edit_rastojanja.items():
        f.write(f"{protein1}, {protein2}, {rastojanje}\n")