In [2]:
from Bio import pairwise2
from Bio.Seq import Seq
from Bio import Entrez
from dataclasses import dataclass
import pandas as pd

@dataclass
class fastaseq:
    header: str
    seq: Seq

Entrez.email = 'adam.jasinski@mif.stud.vu.lt'

In [3]:
file = open("sequence.seq", "r")
ids = []
for id in file.readlines():
    ids.append(id.strip())
    
file.close()

sequences = []

file = open("sequences.fasta", "w")
for id in ids[:20]:
    handle = Entrez.efetch(db="nuccore", id=id, rettype="fasta", retmode="text")
    record = handle.read()
    file.write(record)
    sequences.append(fastaseq(record.partition('\n')[0], record.partition('\n')[2]))
    
file.close()

CODE  DESCRIPTION
x     No parameters. Identical characters have score of 1, else 0.
m     A match score is the score of identical chars, else mismatch
      score.
d     A dictionary returns the score of any pair of characters.
c     A callback function returns scores.

CODE  DESCRIPTION
x     No gap penalties.
s     Same open and extend gap penalties for both sequences.
d     The sequences have different open and extend gap penalties.
c     A callback function returns the gap penalties.

In [34]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option("display.precision", 2)
df = pd.DataFrame(None, columns=ids[:20], index=ids[:20])

max = 0
maxPos = (0, 0)
for i, seq1 in enumerate(sequences):
    for j, seq2 in enumerate(sequences):
        alignmentScore = pairwise2.align.globalms(seq1.seq, seq2.seq, 5, -4, -3, -0.5, score_only=True)
        if alignmentScore > max:
            max = alignmentScore
            maxPos = (i, j)
        df.at[ids[i],ids[j]] = alignmentScore
df=(df/max).round(2)
print(df)

            NR_177670.1 NR_177739.1 NR_177759.1 NR_177761.1 NR_177763.1 NR_177815.1 NR_177858.1 NR_112036.1 NR_108903.1 NR_025615.1 NR_147370.1 NR_041968.1 NR_041951.1 NR_114250.1 NR_114254.1 NR_114268.1 NR_114293.1 NR_114321.1 NR_114348.1 NR_114354.1
NR_177670.1  0.95003245  0.73062946   0.6358209  0.76573653  0.74140169  0.61187541   0.7476963  0.49967554  0.61875406  0.64263465  0.65107073  0.63341986  0.66683971  0.62167424  0.58332252  0.61959766  0.57637897  0.56125892  0.66073978  0.63523686
NR_177739.1  0.73062946  0.91369241  0.56917586  0.72096042  0.68565866  0.55107073  0.70233615  0.49039585  0.59740428  0.62342635  0.65541856  0.61550941  0.66690461  0.62589228  0.58961713  0.61083712   0.5724854  0.57670344  0.64613887  0.64114212
NR_177759.1   0.6358209  0.56917586  0.76378975  0.59338092  0.60304997  0.62719014  0.62998053  0.43160286   0.4889682  0.50447761  0.50324465  0.49876703  0.51680727  0.47884491  0.45658663  0.47663855  0.45561324   0.4541207  0.51849448  0.4