# Association of the gut microbiome with T2D¶

### https://docs.google.com/document/d/1Iv-LkPUQRKj3V3kI_Ax2DWC5QKs1xKsyeiAb06XNkP4/edit¶
### 1) бактерии, ассоциированные с ожирением из базы данных chocoplan
### 2) общее количество последовательностей для выбранной бактерии
### 3) фильтрация геномных последовательностей на основе их длины (от 100 до 150 нуклеотидов)
### 4) количество отфильтрованных последовательностей
### 5) проверка уникальности в BLAST и вывод уникальных последовательностей

## Bacteroides intestinalis

In [2]:
from Bio import SeqIO

input_file = "Bacteroides_intestinalis.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Bacteroides_intestinalis: {count}")

Количество последовательностей Bacteroides_intestinalis: 9941


In [3]:
input_file = "Bacteroides_intestinalis.ffn"
output_file = "filtered_sequences_intestinalis_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Bacteroides_intestinalis: {count}")

Количество последовательностей Bacteroides_intestinalis: 406


In [12]:
input_file = "Bacteroides_intestinalis.ffn"
output_file = "filtered_sequences_intestinalis_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [3, 10, 17, 29, 31, 55, 56, 57, 66, 87, 89, 107, 111, 116, 152, 185, 159, 194, 209, 251, 257, 259, 260, 261, 270, 298, 299, 300, 206, 318, 335, 344, 380, 396, 405]

# Считывание всех последовательностей из файла
sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 3:
ID: 329854__B3C5A5__BACINT_00087|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_intestinalis|UniRef90_B3C5A5|UniRef50_B3C5A5|129
Sequence: TTGTATTATTGGGCAGTTGGAGCTAAAGTATATTGTGGGTATTTCTTTGCTGTGATATGTTTTTATGGCTTTTGTATGGAAAAAGAGGAGAATATTTGCATTTACTTTGAAGAGATTGGTTATCTTTGA

Последовательность номер 10:
ID: 329854__B3C5R7__BACINT_00250|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_intestinalis|UniRef90_B3C5R7|UniRef50_B3C5R7|141
Sequence: ATGAACTTTCCTCCTTACCACAGAAGCCTTTATACCCAAAAGGAAAGTACTTCAACGTTGAGAGGAGAACTTGTGGAAACAGATATTATTTTTCTTCTCCTTCATGGTATAGAAAGGAGAATAGAAGCAGATATACCTTAA

Последовательность номер 17:
ID: 329854__B3C6N0__BACINT_00566|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_intestinalis|UniRef90_B3C6N0|UniRef50_B3C6N0|150
Sequence: ATGAATTCACCAGCAATCTTTAATTCATTACTGTACC

## Bacteroides vulgatus

In [4]:
from Bio import SeqIO

input_file = "Bacteroides_vulgatus.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Bacteroides_vulgatus: {count}")

Количество последовательностей Bacteroides_vulgatus: 17726


In [5]:
input_file = "Bacteroides_vulgatus.ffn"
output_file = "filtered_sequences_vulgatus_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Bacteroides_vulgatus: {count}")

Количество последовательностей Bacteroides_vulgatus: 796


In [13]:
input_file = "Bacteroides_vulgatus.ffn"
output_file = "filtered_sequences_vulgatus_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [30, 62, 71, 81, 219, 232, 252, 271, 323, 324, 328, 333, 334, 335, 336, 406, 511, 592, 731, 732, 733, 734, 735, 736, 737, 738, 739, 771, 776, 777, 778]

# Считывание всех последовательностей из файла
sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 30:
ID: 821__R9HQ68__C800_00097|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_R9HQ68|UniRef50_R9HQ68|144
Sequence: ATGGAAAGTCGCAAAGTAATCATAACTTGGATTGCCGTTACAGTAGCAGTGATGTTCGCATTGCCGTTTGCTGTGGCAGGATTCGCGTCGGAATGTTCGGGAATGGCTTTGTGTATGATATTGTTTCATGACTTCATAAGGTAA

Последовательность номер 62:
ID: 821__R9HIC6__C800_01903|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_R9HIC6|UniRef50_G1UMP5|123
Sequence: ATGTACCCAATTGTTTTGGGTTGTAAAAACACATTGGTTAACCTTAAAAAAAATTCCATTATTTATGCGACTATTTATTATTATGTTTTTATTTGCCGTCGGAGTAATGAGGGCGGCGGATAG

Последовательность номер 71:
ID: 821__R9HKN5__C800_02696|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_R9HKN5|UniRef50_R9HKN5|126
Sequence: ATGAAAATAGCTTCATTTAAAGTAAAAAGTAAGATGCTGATGTATAAATGTCAAGACATTGGTTGG

## Bifidobacterium adolescentis

In [7]:
input_file = "Bifidobacterium_adolescentis.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Bifidobacterium_adolescentis: {count}")

Количество последовательностей Bifidobacterium_adolescentis: 7524


In [8]:
input_file = "Bifidobacterium_adolescentis.ffn"
output_file = "filtered_sequences_adolescentis_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Bifidobacterium_adolescentis: {count}")

Количество последовательностей Bifidobacterium_adolescentis: 381


In [9]:
input_file = "Bifidobacterium_adolescentis.ffn"
output_file = "filtered_sequences_adolescentis_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [12, 18, 34, 60, 61, 62, 64, 210, 216, 254, 256, 258, 375, 376, 377]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 12:
ID: 1680__A0A1X2ZGM5__AD0028_1545|k__Bacteria.p__Actinobacteria.c__Actinobacteria.o__Bifidobacteriales.f__Bifidobacteriaceae.g__Bifidobacterium.s__Bifidobacterium_adolescentis|UniRef90_A0A1X2ZGM5|UniRef50_A0A1X2ZGM5|135
Sequence: ATGAGCCGCGACGACCACGAGAAGGACAGGTTCACCGACCCGGAGACGGAGAGGCGCTGGGGGCTCGGCATGGCCGCCGTGTTCGTCGTCGCCGTCGTCGCCGTGCCCATCTGCCGTCTGCTGGGATGGTCATGA

Последовательность номер 18:
ID: 1680__A0A1X2Z809__AD0028_2006|k__Bacteria.p__Actinobacteria.c__Actinobacteria.o__Bifidobacteriales.f__Bifidobacteriaceae.g__Bifidobacterium.s__Bifidobacterium_adolescentis|UniRef90_A0A1X2Z809|UniRef50_A0A1X2Z809|120
Sequence: ATGGATTCGACATCGGACAAGCGGATCGCCGCAATGCGCGGCTCATTGATGGACACCGCGCTGGAGGCGGCGGAATCCCGCGGGCAGAGGCTTGCGGGCTTGACTTCCTCCCCCGCCTGA

Последовательность номер 34:
ID: 1680__A0A1X3A1Z0__AL0467_1053|k__Bacteria.p__Actinobacteria.c__Actinobacteria.o__Bifidobacteriales.f__Bifidobacteriaceae.g__Bifidobacterium.s__Bifidobacterium_adolescentis|UniRef90_A0A1X3A1Z

## Bifidobacterium bifidum

In [15]:
input_file = "Bifidobacterium_bifidum.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Bifidobacterium_bifidum: {count}")

Количество последовательностей Bifidobacterium_bifidum: 7610


In [16]:
input_file = "Bifidobacterium_bifidum.ffn"
output_file = "filtered_sequences_bifidum_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Bifidobacterium_bifidum: {count}")

Количество последовательностей Bifidobacterium_bifidum: 481


In [17]:
input_file = "Bifidobacterium_bifidum.ffn"
output_file = "filtered_sequences_bifidum_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [17]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 17:
ID: 1681__A0A0H2PLD8__B0085_1756|k__Bacteria.p__Actinobacteria.c__Actinobacteria.o__Bifidobacteriales.f__Bifidobacteriaceae.g__Bifidobacterium.s__Bifidobacterium_bifidum|UniRef90_A0A0H2PLD8|UniRef50_A0A0H2PLD8|126
Sequence: ATGATTCACGAAGACAGATGGAAGAGCACCAGCGAGGCGCTGGATGGATTGGCTGACGAGCGGCACGGAAAGCTGCGCGAGCGCCTTGATGGCGCGACGGCGGATTGCGACGCCGAGGAGGGCTGA



## Bifidobacterium pseudocatenulatum

In [21]:
input_file = "Bifidobacterium_pseudocatenulatum.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Bifidobacterium_pseudocatenulatum: {count}")

Количество последовательностей Bifidobacterium_pseudocatenulatum: 4766


In [22]:
input_file = "Bifidobacterium_pseudocatenulatum.ffn"
output_file = "filtered_sequences_pseudocatenulatum_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Bifidobacterium_pseudocatenulatum: {count}")

Количество последовательностей Bifidobacterium_pseudocatenulatum: 201


In [20]:
input_file = "Bifidobacterium_pseudocatenulatum.ffn"
output_file = "filtered_sequences_pseudocatenulatum_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [168]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 168:
ID: 28026__A8SI74__BIFPSEUDO_04517|k__Bacteria.p__Actinobacteria.c__Actinobacteria.o__Bifidobacteriales.f__Bifidobacteriaceae.g__Bifidobacterium.s__Bifidobacterium_pseudocatenulatum|UniRef90_A8SI74|UniRef50_A8SI74|144
Sequence: ATGAAAGAACTCTATCAAAAAATGATGGAAGCAGGATATTCTTTCCACGACATTGACGAATCTGACTATTTCGGTGCTTTAGAAATGTTATCTTCTACACCTCGAAAAGTCATGTCCGCAGAAGATTTCTTCGATAGTATCTAA



## Bifidobacterium longum

In [23]:
input_file = "Bifidobacterium_longum.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Bifidobacterium_longum: {count}")

Количество последовательностей Bifidobacterium_longum: 12638


In [24]:
input_file = "Bifidobacterium_longum.ffn"
output_file = "filtered_sequences_longum_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Bifidobacterium_longum: {count}")

Количество последовательностей Bifidobacterium_longum: 604


In [25]:
input_file = "Bifidobacterium_longum.ffn"
output_file = "filtered_sequences_longum_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [48, 106, 115]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 48:
ID: 216816__A0A2N0T4I9__APC1465_1922|k__Bacteria.p__Actinobacteria.c__Actinobacteria.o__Bifidobacteriales.f__Bifidobacteriaceae.g__Bifidobacterium.s__Bifidobacterium_longum|UniRef90_A0A2N0T4I9|UniRef50_A0A2N0T4I9|141
Sequence: ATGCTCAGTGTCACCGGCGACATCATGGGTGACAACAACCAGCAAGTGTCCTGTAAGGTGATTGTCGACGGACAGGAGAAAGTCTCGAAGGATGCCTCCGGTTCCGCTGGTACGGCCACTTGCAACGTCCCGTTGTTCTAG

Последовательность номер 106:
ID: 216816__A0A1V8QI33__B5780_0397|k__Bacteria.p__Actinobacteria.c__Actinobacteria.o__Bifidobacteriales.f__Bifidobacteriaceae.g__Bifidobacterium.s__Bifidobacterium_longum|UniRef90_A0A1V8QI33|UniRef50_A0A1V8QI33|141
Sequence: ATGTCTTCCGCCATTCACAATTCGCTCATGGGAGGTCTGCCGTTGCCGGACAGCAGAACATTGGGACTTCTTGAGGATTACGTCAACGGTCTTATCGGGGCTTCTGAACTGATGGAGCGTGGTCTCAATAGTTGCGTTTGA

Последовательность номер 115:
ID: 216816__A0A1V8QE10__B5780_2001|k__Bacteria.p__Actinobacteria.c__Actinobacteria.o__Bifidobacteriales.f__Bifidobacteriaceae.g__Bifidobacterium.s__Bifidobacterium_longum|Un

## Clostridium symbiosum

In [27]:
input_file = "Clostridium_symbiosum.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Clostridium_symbiosum: {count}")

Количество последовательностей Clostridium_symbiosum: 7021


In [28]:
input_file = "Clostridium_symbiosum.ffn"
output_file = "filtered_sequences_symbiosum_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Clostridium_symbiosum: {count}")

Количество последовательностей Clostridium_symbiosum: 398


In [29]:
input_file = "Clostridium_symbiosum.ffn"
output_file = "filtered_sequences_symbiosum_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [6, 32, 63, 145, 151, 152, 173, 192, 216, 217, 218, 219, 225, 244, 301, 302, 303, 324, 343, 364, 380, 384, 388, 389, 392, 396]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 6:
ID: 1512__U2BR77__CLOSYM_00103|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Lachnoclostridium.s__Clostridium_symbiosum|UniRef90_U2BR77|UniRef50_U2BR77|135
Sequence: GTGATAGAGATGACCAAAACACAACAGAGCCTGATTGCAGATAGAAACATTTTTAATAATGAATTGATGATTCCAGCAGACGATCTGTATATTTTACATAATGATTTTTTTTTCATGCTTTCAAACTGGCATTAA

Последовательность номер 32:
ID: 1512__U2DEU7__CLOSYM_00547|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Lachnoclostridium.s__Clostridium_symbiosum|UniRef90_U2DEU7|UniRef50_U2DEU7|141
Sequence: GTGGAGTTTATATTTATTTTTGCCGTAAACCTCTTTGAGGTCTTCTTTATCTACCTTTTTCAGGTTATGAAGGTAGTAGGGGCATTTGGGGTTGACGCACTTGTGGATGATGAAGTGCTTGCGGTCTTTCTTGGGGACTAG

Последовательность номер 63:
ID: 1512__U2DC80__CLOSYM_01142|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Lachnoclostridium.s__Clostridium_symbiosum|UniRef90_U2DC80|UniRef50_U2DC80|132
Sequence: GTGATCATCATCACGGCCGACGATGACCTTGTCAGGA

## Bacteroides stercoris

In [30]:
input_file = "Bacteroides_stercoris.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Bacteroides_stercoris: {count}")

Количество последовательностей Bacteroides_stercoris: 7149


In [31]:
input_file = "Bacteroides_stercoris.ffn"
output_file = "filtered_sequences_stercoris_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Bacteroides_stercoris: {count}")

Количество последовательностей Bacteroides_stercoris: 324


In [32]:
input_file = "Bacteroides_stercoris.ffn"
output_file = "filtered_sequences_stercoris_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [262, 276, 277, 283, 290, 293, 298]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 262:
ID: 46506__S3YUJ2__HMPREF1181_00850|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_stercoris|UniRef90_S3YUJ2|UniRef50_S3YUJ2|105
Sequence: ATGGATCTCATCCTTTTGACTGATGAGCTTTTCAAGAAACTTGAGCATACACAGTGCTTGAGCATCCTTATCTTTGAAGCCTTCCTTTTTCGTTTCTTGCAGTAA

Последовательность номер 276:
ID: 46506__S3ZHC3__HMPREF1181_02063|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_stercoris|UniRef90_S3ZHC3|UniRef50_S3ZHC3|150
Sequence: ATGAAATCCACATTCTTCCCGTTAAGTAATATCACAACTGTGTTTTCTGATAATTTATATTTGCAATATTCTGATTATTGCAATAAGGCAAAATTTATCGGTAGTAGTATTTTTTTATTAAAAAATCAAAACAAAACAGCTTTATATTGA

Последовательность номер 277:
ID: 46506__S3YER2__HMPREF1181_02085|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_stercoris|UniRef90_S3YER2|UniRef50_S3YER2|150
Sequence: ATGAATATCTTTTTTTTATCTTTTGTGTTTTGGGAAATCCGTGTGAGT

## Faecalibacterium prausnitzii

In [33]:
input_file = "Faecalibacterium_prausnitzii.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Faecalibacterium_prausnitzii: {count}")

Количество последовательностей Faecalibacterium_prausnitzii: 24458


In [34]:
input_file = "Faecalibacterium_prausnitzii.ffn"
output_file = "filtered_sequences_prausnitzii_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Faecalibacterium_prausnitzii: {count}")

Количество последовательностей Faecalibacterium_prausnitzii: 1010


In [35]:
input_file = "Faecalibacterium_prausnitzii.ffn"
output_file = "filtered_sequences_prausnitzii_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [14, 16, 19, 24, 75, 97, 99, 101, 106, 109, 110, 113, 118, 133, 134, 146, 147, 442, 457, 535, 540, 589, 590, 591, 594, 645, 646, 690, 762, 798, 868]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 14:
ID: 853__R6Q0T7__C4N22_11060|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Ruminococcaceae.g__Faecalibacterium.s__Faecalibacterium_prausnitzii|UniRef90_R6Q0T7|UniRef50_R7FXY9|123
Sequence: ATGCTGCATTTACTTTCGTGGGAGACCACGCTGCCCATCCTGCTCATCGGGATGCTGGGCATCTTCCTGGTCATCGGCGTTATTGTGCTGGCGGTGACCCTGCTGGGGAAGTTTACAAAGTAA

Последовательность номер 16:
ID: 853__A0A329UHF5__C4N23_06620|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Ruminococcaceae.g__Faecalibacterium.s__Faecalibacterium_prausnitzii|UniRef90_A0A329UHF5|UniRef50_A0A174U2Z8|132
Sequence: ATGGAATTTTTCAATCAGGCGGTTACTGTACTGCAAACCCTCGTTATCGCGCTCGGTGCTGGTCTTGGTATCTGGGGCGTGATTAACTTGCTCGAAGGGTACGGGAACGATAACCGTGCGATGCGTTCCTGA

Последовательность номер 19:
ID: 853__A0A329UIK5__C4N24_02235|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Ruminococcaceae.g__Faecalibacterium.s__Faecalibacterium_prausnitzii|UniRef90_A0A329UIK5|UniRef50_A0A329UIK5|144
Sequence: ATGCTTGTTACGGGTATTATC

## Eggerthella lenta

In [38]:
input_file = "Eggerthella_lenta.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Eggerthella_lenta: {count}")

Количество последовательностей Eggerthella_lenta: 5902


In [39]:
input_file = "Eggerthella_lenta.ffn"
output_file = "filtered_sequences_lenta_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Eggerthella_lenta: {count}")

Количество последовательностей Eggerthella_lenta: 29


In [40]:
input_file = "Eggerthella_lenta.ffn"
output_file = "filtered_sequences_lenta_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = []
sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")
print (f"Нет уникальной последовательности в BLAST")

Нет уникальной последовательности в BLAST


## Bacteroides caccae

In [47]:
input_file = "Bacteroides_caccae.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Bacteroides_caccae: {count}")

Количество последовательностей Bacteroides_caccae: 8292


input_file = "Bacteroides_caccae.ffn"
output_file = "filtered_sequences_caccae_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Bacteroides_caccae: {count}")

In [49]:
input_file = "Bacteroides_caccae.ffn"
output_file = "filtered_sequences_caccae_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [14, 118, 164]
sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")


Последовательность номер 14:
ID: 47678__A5ZBW8__BACCAC_00355|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_caccae|UniRef90_A5ZBW8|UniRef50_A5ZBW8|108
Sequence: ATGAGCATAGCATATGGTATTAAAGGGAAGACTGCAGAAGAAGAGAAAGCATATAGCAAGAGAAAAATATGGAGTAGAGAGAAGAAAGAACAGGAAAGGGGGCTATGA

Последовательность номер 118:
ID: 47678__A5ZLE2__BACCAC_03742|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_caccae|UniRef90_A5ZLE2|UniRef50_A5ZLE2|120
Sequence: ATGGTAATCAAAAAAAGTAATAATCCCTTTGCCGGAAAGGCAAAACCCGGAGTCGGAAAGAAAAAACTTCGGGAAAAAGAGTCGGGAAGTCGAAACAGGAAAAGGCTAATAATCAGTTGA

Последовательность номер 164:
ID: 47678__A0A174KNB8__ERS852494_01580|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_caccae|UniRef90_A0A174KNB8|UniRef50_A0A174KNB8|108
Sequence: ATGGAAAAACAACAGTATCAAGTTACGATTCAGATTATGAGTACATTGATATTGTTTGGTAAAATGTGCACAGAGAAAATGA

## Roseburia intestinalis

In [50]:
input_file = "Roseburia_intestinalis.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Roseburia_intestinalis: {count}")

Количество последовательностей Roseburia_intestinalis: 6931


In [51]:
input_file = "Roseburia_intestinalis.ffn"
output_file = "filtered_sequences_intestinalis_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Roseburia_intestinalis: {count}")

Количество последовательностей Roseburia_intestinalis: 456


In [52]:
input_file = "Bacteroides_caccae.ffn"
output_file = "filtered_sequences_caccae_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [2, 30, 64, 88, 137, 204, 405]
sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 2:
ID: 47678__A5ZB35__BACCAC_00060|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_caccae|UniRef90_A5ZB35|UniRef50_A5ZB35|132
Sequence: TTGAGGGAAAAAGAAAAGGCAGGCAAAAACGATACACTCACAAAGTTTGTCGTTTTTGCTTTTTTAGGTACTTTTGTAATCGAAAAAACCAAAATGCAGTATCTGCAAAAGAATTATGAAACATCCTCTTGA

Последовательность номер 30:
ID: 47678__A5ZDV5__BACCAC_01063|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_caccae|UniRef90_A5ZDV5|UniRef50_A5ZDV5|129
Sequence: ATGCATTTCCACCTGAACCGAGAAAAAAGAAATCAAGAGAAAGAGAAATATATTAAAAATGAAGATAAGAAAAACAATCCTCACGGCAGCTATTCTCATGGCTGCCGTTTGTTTGCCGGCACAGAATAA

Последовательность номер 64:
ID: 47678__A5ZGD1__BACCAC_01949|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_caccae|UniRef90_A5ZGD1|UniRef50_A5ZGD1|144
Sequence: TTGCTGGGCGCAAAATTATATGTTTTTTCTGAAAGTACCCCTGTTTTTGTAGGGGTATTTTTGAAAA

## Roseburia inulinivorans

In [54]:
input_file = "Roseburia_inulinivorans.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Roseburia_inulinivorans: {count}")

Количество последовательностей Roseburia_inulinivorans: 7449


In [55]:
input_file = "Roseburia_inulinivorans.ffn"
output_file = "filtered_sequences_inulinivorans_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Roseburia_inulinivorans: {count}")

Количество последовательностей Roseburia_inulinivorans: 329


In [59]:
input_file = "Roseburia_inulinivorans.ffn"
output_file = "filtered_sequences_inulinivorans_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [5, 6, 7, 8, 9, 11, 12, 15, 16, 17, 21, 22, 25, 29, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 67, 69, 72, 73, 74, 75, 76, 78, 79, 80, 83, 85, 89, 90, 91, 103, 104, 105, 106, 107, 108, 109, 114, 115, 116, 120, 122, 124, 125, 127, 128, 131, 132, 133, 134, 135, 135, 136, 137, 138, 140, 141, 142, 143, 144, 145]
sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 5:
ID: 360807__A0A174DHK0__DW813_06725|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Roseburia.s__Roseburia_inulinivorans|UniRef90_A0A174DHK0|UniRef50_R9ML12|132
Sequence: ATGGGAACATTCGTTGTATTAGTTGTTTTGATATCCGTAGTTGCTCTGATCGTCAGAGGCATTGTCCGGGACAAAAAGAGTGGAAAATCTTCCTGCGGTGGGGACTGTTCACATTGCAGAGGCTGCCATTAG

Последовательность номер 6:
ID: 360807__C0FZ45__DW813_07645|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Roseburia.s__Roseburia_inulinivorans|UniRef90_C0FZ45|UniRef50_C0FZ45|150
Sequence: ATGAAAGGTAAGTTATCAAAAGCGGTTGCAAAGGGAATGGTATCAGTTTTGAATACATTTCTGCGTGCTGATGCAAATTCAGCGGCATGTGCTATTACATATCAACCCAAAGCACCAAAAGAATTAGCAAGATACAGGAGAACGAAATGA

Последовательность номер 7:
ID: 360807__A0A0M6WIF1__DW813_07950|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Roseburia.s__Roseburia_inulinivorans|UniRef90_A0A0M6WIF1|UniRef50_A0A173TPS9|126
Sequence: ATGAACAGTATCTTTCACAGAGTAAGTA

## Akkermansia muciniphila

In [56]:
input_file = "Akkermansia_muciniphila.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Akkermansia_muciniphila: {count}")

Количество последовательностей Akkermansia_muciniphila: 9121


In [57]:
input_file = "Akkermansia_muciniphila.ffn"
output_file = "filtered_sequences_muciniphila_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Akkermansia_muciniphila: {count}")

Количество последовательностей Akkermansia_muciniphila: 36


In [58]:
input_file = "Akkermansia_muciniphila.ffn"
output_file = "filtered_sequences_muciniphila_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [28, 32, 33, 34, 35]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 28:
ID: 239935__A0A2N8HZG7__CXU09_11590|k__Bacteria.p__Verrucomicrobia.c__Verrucomicrobiae.o__Verrucomicrobiales.f__Akkermansiaceae.g__Akkermansia.s__Akkermansia_muciniphila|UniRef90_A0A2N8HZG7|UniRef50_A0A2N8HZG7|122
Sequence: GATTAGGTAAAAAAGCCCATATTTTTCTCCATGTTAAAGAATGCGAATACCGCTTCAACCATAGAGGTGAAGACCTTTATACTCTTATTTTAACCAACCTGCGAAATTCTCCGATCAACTAG

Последовательность номер 32:
ID: 239935__A0A2N8HSB4__CXU17_09605|k__Bacteria.p__Verrucomicrobia.c__Verrucomicrobiae.o__Verrucomicrobiales.f__Akkermansiaceae.g__Akkermansia.s__Akkermansia_muciniphila|UniRef90_A0A2N8HSB4|UniRef50_B4VNM0|108
Sequence: TTGCTAGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGT

Последовательность номер 33:
ID: 239935__A0A2N8HQN2__CXU17_11555|k__Bacteria.p__Verrucomicrobia.c__Verrucomicrobiae.o__Verrucomicrobiales.f__Akkermansiaceae.g__Akkermansia.s__Akkermansia_muciniphila|UniRef90_A0A2N8HQN2|UniRef50_A0A1U3K2S1|121
Sequence: TGATAGGGGTG

## Acidaminococcus sp

In [60]:
input_file = "Acidaminococcus_sp.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Acidaminococcus_sp: {count}")

Количество последовательностей Acidaminococcus_sp: 1846


In [61]:
input_file = "Acidaminococcus_sp.ffn"
output_file = "filtered_sequences_acidaminococcus_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Acidaminococcus_sp: {count}")

Количество последовательностей Acidaminococcus_sp: 16


In [62]:
input_file = "Acidaminococcus_sp.ffn"
output_file = "filtered_sequences_acidaminococcus_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [1, 3, 5, 7, 8, 10, 11, 13, 15, 16]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 1:
ID: 1262687__R7LTX7__BN701_00041|k__Bacteria.p__Firmicutes.c__Negativicutes.o__Acidaminococcales.f__Acidaminococcaceae.g__Acidaminococcus.s__Acidaminococcus_sp_CAG_542|UniRef90_R7LTX7|UniRef50_D2RJA9|123
Sequence: ATGATGAATCTGATGAACGTGGAACCGGTATTCTGGATCGGAATGGGAGCCGTGGCGGCAGTGATCTGCCTGATGCAGGCAGTCTGCTGGACCCGGGAACCGAAGATGCGGAAGAATGGATGA

Последовательность номер 3:
ID: 1262687__R7M008__BN701_00090|k__Bacteria.p__Firmicutes.c__Negativicutes.o__Acidaminococcales.f__Acidaminococcaceae.g__Acidaminococcus.s__Acidaminococcus_sp_CAG_542|UniRef90_R7M008|UniRef50_R7M008|144
Sequence: ATGACTCCTTCCAGAAAAATTTCCTTTTCTCTTTTCCTGGCGGTGCTGGGAATGGTTTCTTCCGGGTTCCGGCCTTTGCCGGCTCTGGCCCGGGAGGATGCCGCCAAAATCTCCCACGCCCGGAAAAGCTGGCCAACAATGTGA

Последовательность номер 5:
ID: 1262687__R7M073__BN701_00231|k__Bacteria.p__Firmicutes.c__Negativicutes.o__Acidaminococcales.f__Acidaminococcaceae.g__Acidaminococcus.s__Acidaminococcus_sp_CAG_542|UniRef90_R7M073|UniRef50_R7M073|147
Sequence: ATG

## Blautia wexlerae

In [63]:
input_file = "Blautia_wexlerae.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Blautia_wexlerae: {count}")

Количество последовательностей Blautia_wexlerae: 5413


In [64]:
input_file = "Blautia_wexlerae.ffn"
output_file = "filtered_sequences_wexlerae_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Blautia_wexlerae: {count}")

Количество последовательностей Blautia_wexlerae: 104


In [67]:
input_file = "Blautia_wexlerae.ffn"
output_file = "filtered_sequences_wexlerae_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [14, 16, 41, 60, 61, 62, 69, 83, 84, 85, 86, 96, 97, 102]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 14:
ID: 418240__A0A174CFY6__ERS852478_01853|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Blautia.s__Blautia_wexlerae|UniRef90_A0A174CFY6|UniRef50_A0A174CFY6|120
Sequence: ATGGAAAGAACCTGTGTACCAATTCCATCAAGTGATATAAATGCAATTATGCACCTGGAAATCTGTGCACCGGAAAATAAGAATAATCTCAAAGGACTCATCAGATCTGTCATGGTATGA

Последовательность номер 16:
ID: 418240__A0A174W4T9__ERS852478_01888|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Blautia.s__Blautia_wexlerae|UniRef90_A0A174W4T9|UniRef50_R5ZDS4|138
Sequence: ATGGAAGCAGTACAGAGAATTCGATTGATCCGAATCATAGAAAAAATGGAGAAAAATCCGGAATTCTGTAATAAGTTTGGAATAAAGAATACATCTGAATATATAGCAGAAAAAGAATCAAAAAAAGGCATATATTAG

Последовательность номер 41:
ID: 418240__A0A174WAG5__ERS852478_03570|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Blautia.s__Blautia_wexlerae|UniRef90_A0A174WAG5|UniRef50_A0A174WAG5|138
Sequence: ATGATGCAGGGAGGGAAGGATATGAGTAAAGAAGAGGTGCTGCTGGAAAGCA

## Dorea longicatena

In [68]:
input_file = "Dorea_longicatena.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Dorea_longicatena: {count}")

Количество последовательностей Dorea_longicatena: 14095


In [69]:
input_file = "Dorea_longicatena.ffn"
output_file = "filtered_sequences_longicatena_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Dorea_longicatena: {count}")

Количество последовательностей Dorea_longicatena: 417


In [70]:
input_file = "Dorea_longicatena.ffn"
output_file = "filtered_sequences_longicatena_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [147, 185, 189, 201, 202, 206, 209, 210, 232, 233, 236, 237, 238, 239, 240, 241, 243, 246, 247, 249, 250, 254, 256, 257, 258, 261, 262, 263, 264, 265, 268, 269, 270, 272, 273, 276, 279, 280, 281, 282, 284, 285, 286, 287, 288, 290, 291, 293, 301, 303, 305, 307, 308, 309, 310, 311, 315, 320, 322, 323, 328, 330, 337, 339, 347, 354, 355, 359, 360, 361, 362, 364, 368, 369, 371, 380, 389, 393, 400, 404, 405, 406, 414, 417]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 147:
ID: 88431__C5EQA4__DW690_13830|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Dorea.s__Dorea_longicatena|UniRef90_C5EQA4|UniRef50_A0A174IKU1|150
Sequence: ATGGCATCCTATGTTTCCCCGAAAATACGGGACAAATTTGAAAGTCTTTCCATAGACCTGAAGAATGACATATTGAAGCGCAATGTGCACCTGGAAACCTTGCAGGATTTAATTCAGGTTCTGGAAAAGATCGTGAAAGAAGGCAGCTGA

Последовательность номер 185:
ID: 88431__A0A173XWM2__ERS852423_00844|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Dorea.s__Dorea_longicatena|UniRef90_A0A173XWM2|UniRef50_A0A173XWM2|120
Sequence: ATGAAGACGAAGAAAGAAAAAGAGATAAAACTGATATTTGTATTGATTGCAATTCTTTTTGCGGCATTTCTGATTATTCCGGTTGTCAGACTTCTGGGAAAATCTGTGATTGGTGAATAG

Последовательность номер 189:
ID: 88431__A0A174AQ39__ERS852423_01721|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Dorea.s__Dorea_longicatena|UniRef90_A0A174AQ39|UniRef50_A0A174AQ39|144
Sequence: ATGAGCAATGATCTTCTGAGACCGGACTGTTATTTTCTGTTAAAAGATAAT