# Association of the gut microbiome with obesity

#### https://docs.google.com/document/d/1Iv-LkPUQRKj3V3kI_Ax2DWC5QKs1xKsyeiAb06XNkP4/edit
#### 1) бактерии, ассоциированные с ожирением из базы данных chocoplan 
#### 2) общее количество последовательностей для выбранной бактерии
#### 3) фильтрация геномных последовательностей на основе их длины (от 100 до 150 нуклеотидов)
#### 4) количество отфильтрованных последовательностей
#### 5) проверка уникальности в BLAST и вывод уникальных последовательностей

## Bacteroides vulgatus

In [16]:
from Bio import SeqIO

input_file = "Bacteroides_vulgatus.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Bacteroides_vulgatus: {count}")

Количество последовательностей Bacteroides_vulgatus: 17726


In [76]:
input_file = "Bacteroides_vulgatus.ffn"
output_file = "filtered_sequences_vulgatus_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Bacteroides_vulgatus: {count}")

Количество последовательностей Bacteroides_vulgatus: 796


In [81]:
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [30, 62, 71, 81, 219, 232, 252, 271, 323, 324, 328, 333, 334, 335, 336, 406, 511, 592, 731, 732, 733, 734, 735, 736, 737, 738, 739, 771, 776, 777, 778]

# Считывание всех последовательностей из файла
sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 30:
ID: 821__R9HQ68__C800_00097|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_R9HQ68|UniRef50_R9HQ68|144
Sequence: ATGGAAAGTCGCAAAGTAATCATAACTTGGATTGCCGTTACAGTAGCAGTGATGTTCGCATTGCCGTTTGCTGTGGCAGGATTCGCGTCGGAATGTTCGGGAATGGCTTTGTGTATGATATTGTTTCATGACTTCATAAGGTAA

Последовательность номер 62:
ID: 821__R9HIC6__C800_01903|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_R9HIC6|UniRef50_G1UMP5|123
Sequence: ATGTACCCAATTGTTTTGGGTTGTAAAAACACATTGGTTAACCTTAAAAAAAATTCCATTATTTATGCGACTATTTATTATTATGTTTTTATTTGCCGTCGGAGTAATGAGGGCGGCGGATAG

Последовательность номер 71:
ID: 821__R9HKN5__C800_02696|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_R9HKN5|UniRef50_R9HKN5|126
Sequence: ATGAAAATAGCTTCATTTAAAGTAAAAAGTAAGATGCTGATGTATAAATGTCAAGACATTGGTTGG

## Bacteroides thetaiotaomicron

In [82]:
input_file = "Bacteroides_thetaiotaomicron.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Bacteroides_thetaiotaomicron: {count}")

Количество последовательностей Bacteroides_thetaiotaomicron: 15679


In [83]:
input_file = "Bacteroides_thetaiotaomicron.ffn"
output_file = "filtered_sequences_thetaiotaomicron_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Bacteroides_thetaiotaomicron: {count}")

Количество последовательностей Bacteroides_thetaiotaomicron: 612


In [84]:
input_file = "Bacteroides_thetaiotaomicron.ffn"
output_file = "filtered_sequences_thetaiotaomicron_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [90, 177, 184, 186, 187, 188, 210, 213, 214, 216, 224, 225, 248, 255, 347, 353, 368, 373, 377, 390, 403, 474]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 90:
ID: 818__I0PZJ7__BSIG_5835|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_thetaiotaomicron|UniRef90_I0PZJ7|UniRef50_I0PZJ7|132
Sequence: ATGCTTATTCGTAAAGAAATCGCGTTTAGATGGAATAAAAATACTGTGTTACATTCCCTTCTCTACATAAAAGAAATCCTCAAAATATTTATAACAGTTGCCTTTCTTATTAAAGAAAGAAACCACTATTAG

Последовательность номер 177:
ID: 818__R9HGK3__C799_00136|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_thetaiotaomicron|UniRef90_R9HGK3|UniRef50_R9HGK3|108
Sequence: ATGTTTGTTGAGTTTTATTTAAGCGTATCGATAAGCAGAGTTTTCATTAAGGGATTAATTCCTGTTGGTGCTGCAAAGGGGAAAGAAGTGGAAGAATGGATTGTGTGA

Последовательность номер 184:
ID: 818__R9HD23__C799_01045|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_thetaiotaomicron|UniRef90_R9HD23|UniRef50_R9HD23|105
Sequence: ATGTATCTTTTCTATTTCTACAATGCAAAGATAAACGCATCATTAGACAATGCGTTTACTCAAAAATG

## Staphylococcus aureus

In [85]:
input_file = "Staphylococcus_aureus.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Staphylococcus_aureus: {count}")

Количество последовательностей Staphylococcus_aureus: 40610


In [86]:
input_file = "Staphylococcus_aureus.ffn"
output_file = "filtered_sequences_aureus_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Staphylococcus_aureus: {count}")

Количество последовательностей Staphylococcus_aureus: 1645


In [87]:
input_file = "Staphylococcus_aureus.ffn"
output_file = "filtered_sequences_aureus_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = []

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")
    else:
        print(f"Последовательность номер {num} не найдена.")

## Lactobacillus reuteri

In [88]:
input_file = "Lactobacillus_reuteri.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Lactobacillus_reuteri: {count}")

Количество последовательностей Lactobacillus_reuteri: 10761


In [89]:
input_file = "Lactobacillus_reuteri.ffn"
output_file = "filtered_sequences_reuteri_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Lactobacillus_reuteri: {count}")

Количество последовательностей Lactobacillus_reuteri: 274


In [90]:
input_file = "Lactobacillus_reuteri.ffn"
output_file = "filtered_sequences_reuteri_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [18, 36, 37, 185, 189, 190, 236, 242, 244, 252]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 18:
ID: 1598__A0A256VKE2__CBF81_03425|k__Bacteria.p__Firmicutes.c__Bacilli.o__Lactobacillales.f__Lactobacillaceae.g__Lactobacillus.s__Lactobacillus_reuteri|UniRef90_A0A256VKE2|UniRef50_A0A256VKE2|126
Sequence: ATGTCAAACAATAGATATAAAATTTTATCAAAACAAGATTTATCTAAGATTGTTGGAGGATGGGGATACTCACTTGTATTTAGAAATGGATGGACAAATTTCTTAAATAAGTGGAAAAAAAGATGA

Последовательность номер 36:
ID: 1598__A0A317GH97__DKZ23_05845|k__Bacteria.p__Firmicutes.c__Bacilli.o__Lactobacillales.f__Lactobacillaceae.g__Lactobacillus.s__Lactobacillus_reuteri|UniRef90_A0A317GH97|UniRef50_A0A317GH97|132
Sequence: ATGACAAGTTTTACTTTACGATTGCCGGATAACTTATATAAAGGAGTCCAACAATTATCAGATAGAGACGAAATTACTAAAACCCAAGTAATTAGAGATGCAATTAAGGAACACTTAGGGAGGGAGAACTAA

Последовательность номер 37:
ID: 1598__A0A317GH48__DKZ23_05850|k__Bacteria.p__Firmicutes.c__Bacilli.o__Lactobacillales.f__Lactobacillaceae.g__Lactobacillus.s__Lactobacillus_reuteri|UniRef90_A0A317GH48|UniRef50_V6DHY9|147
Sequence: ATGGCAGAAACAAAAAACTTTACTATGAGAATGCCTG

## Akkermansia muciniphila

In [91]:
input_file = "Akkermansia_muciniphila.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Akkermansia_muciniphila: {count}")

Количество последовательностей Akkermansia_muciniphila: 9121


In [92]:
input_file = "Akkermansia_muciniphila.ffn"
output_file = "filtered_sequences_muciniphila_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Akkermansia_muciniphila: {count}")

Количество последовательностей Akkermansia_muciniphila: 36


In [93]:
input_file = "Akkermansia_muciniphila.ffn"
output_file = "filtered_sequences_muciniphila_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [28, 32, 33, 34, 35]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 28:
ID: 239935__A0A2N8HZG7__CXU09_11590|k__Bacteria.p__Verrucomicrobia.c__Verrucomicrobiae.o__Verrucomicrobiales.f__Akkermansiaceae.g__Akkermansia.s__Akkermansia_muciniphila|UniRef90_A0A2N8HZG7|UniRef50_A0A2N8HZG7|122
Sequence: GATTAGGTAAAAAAGCCCATATTTTTCTCCATGTTAAAGAATGCGAATACCGCTTCAACCATAGAGGTGAAGACCTTTATACTCTTATTTTAACCAACCTGCGAAATTCTCCGATCAACTAG

Последовательность номер 32:
ID: 239935__A0A2N8HSB4__CXU17_09605|k__Bacteria.p__Verrucomicrobia.c__Verrucomicrobiae.o__Verrucomicrobiales.f__Akkermansiaceae.g__Akkermansia.s__Akkermansia_muciniphila|UniRef90_A0A2N8HSB4|UniRef50_B4VNM0|108
Sequence: TTGCTAGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGTAAGAGGGGT

Последовательность номер 33:
ID: 239935__A0A2N8HQN2__CXU17_11555|k__Bacteria.p__Verrucomicrobia.c__Verrucomicrobiae.o__Verrucomicrobiales.f__Akkermansiaceae.g__Akkermansia.s__Akkermansia_muciniphila|UniRef90_A0A2N8HQN2|UniRef50_A0A1U3K2S1|121
Sequence: TGATAGGGGTG

## Bifidobacterium adolescentis

In [94]:
input_file = "Bifidobacterium_adolescentis.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Bifidobacterium_adolescentis: {count}")

Количество последовательностей Bifidobacterium_adolescentis: 7524


In [95]:
input_file = "Bifidobacterium_adolescentis.ffn"
output_file = "filtered_sequences_adolescentis_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Bifidobacterium_adolescentis: {count}")

Количество последовательностей Bifidobacterium_adolescentis: 381


In [96]:
input_file = "Bifidobacterium_adolescentis.ffn"
output_file = "filtered_sequences_adolescentis_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [12, 18, 34, 60, 61, 62, 64, 210, 216, 254, 256, 258, 375, 376, 377]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 12:
ID: 1680__A0A1X2ZGM5__AD0028_1545|k__Bacteria.p__Actinobacteria.c__Actinobacteria.o__Bifidobacteriales.f__Bifidobacteriaceae.g__Bifidobacterium.s__Bifidobacterium_adolescentis|UniRef90_A0A1X2ZGM5|UniRef50_A0A1X2ZGM5|135
Sequence: ATGAGCCGCGACGACCACGAGAAGGACAGGTTCACCGACCCGGAGACGGAGAGGCGCTGGGGGCTCGGCATGGCCGCCGTGTTCGTCGTCGCCGTCGTCGCCGTGCCCATCTGCCGTCTGCTGGGATGGTCATGA

Последовательность номер 18:
ID: 1680__A0A1X2Z809__AD0028_2006|k__Bacteria.p__Actinobacteria.c__Actinobacteria.o__Bifidobacteriales.f__Bifidobacteriaceae.g__Bifidobacterium.s__Bifidobacterium_adolescentis|UniRef90_A0A1X2Z809|UniRef50_A0A1X2Z809|120
Sequence: ATGGATTCGACATCGGACAAGCGGATCGCCGCAATGCGCGGCTCATTGATGGACACCGCGCTGGAGGCGGCGGAATCCCGCGGGCAGAGGCTTGCGGGCTTGACTTCCTCCCCCGCCTGA

Последовательность номер 34:
ID: 1680__A0A1X3A1Z0__AL0467_1053|k__Bacteria.p__Actinobacteria.c__Actinobacteria.o__Bifidobacteriales.f__Bifidobacteriaceae.g__Bifidobacterium.s__Bifidobacterium_adolescentis|UniRef90_A0A1X3A1Z

## Blautia hydrogenotrophica

In [97]:
input_file = "Blautia_hydrogenotrophica.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Blautia_hydrogenotrophica: {count}")

Количество последовательностей Blautia_hydrogenotrophica: 4178


In [98]:
input_file = "Blautia_hydrogenotrophica.ffn"
output_file = "filtered_sequences_hydrogenotrophica_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Blautia_hydrogenotrophica: {count}")

Количество последовательностей Blautia_hydrogenotrophica: 276


In [99]:
input_file = "Blautia_hydrogenotrophica.ffn"
output_file = "filtered_sequences_hydrogenotrophica_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [16, 32, 165]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 16:
ID: 53443__R5BW37__ERS852414_01867|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Blautia.s__Blautia_hydrogenotrophica|UniRef90_R5BW37|UniRef50_R5BW37|111
Sequence: ATGATGCTTGATATTGTTCTTTCAAATCATTTTAAGAAAGGCATCAAACGCGCTAAAAAATGCGGCTTGAATCTGGACATTTTGGAAAACGTTGTCGATCAGCTTGCATAA

Последовательность номер 32:
ID: 53443__A0A173UR74__ERS852414_03234|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Blautia.s__Blautia_hydrogenotrophica|UniRef90_A0A173UR74|UniRef50_A0A173UR74|132
Sequence: ATGCGTTTCATCAAAAACCTTACAGAAACAGCAAAAGAGCTTCAAAAGTTGCTGGAACAACTAGAGAAGCTCGTGATCAAGCTGATATCCATTGTCGGATGGATATACATATTGATTAAGTTGTTTGAGTAA

Последовательность номер 165:
ID: 53443__C0CMW7__RUMHYD_02205|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Blautia.s__Blautia_hydrogenotrophica|UniRef90_C0CMW7|UniRef50_C0CMW7|129
Sequence: ATGTCAGTTCAAAAATATTTGATTAATTTATTGGAATTTTATATTTTTGTCATGGAAATATTATA

## Prevotella copri

In [100]:
input_file = "Prevotella_copri.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Prevotella_copri: {count}")

Количество последовательностей Prevotella_copri: 11818


In [101]:
input_file = "Prevotella_copri.ffn"
output_file = "filtered_sequences_copri_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Prevotella_copri: {count}")

Количество последовательностей Prevotella_copri: 1151


In [102]:
input_file = "Prevotella_copri.ffn"
output_file = "filtered_sequences_copri_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [8, 13, 16, 21, 22, 37, 38, 39, 63, 64, 107, 192, 216, 217, 219, 238, 239, 242, 243, 244, 280, 293, 295, 314, 332, 337, 355, 356, 357, 363, 369, 379, 408, 412, 414, 418, 474, 505, 527, 539, 566, 577, 578, 595, 600, 601, 608, 612, 613, 636, 655, 687, 732, 740, 788, 796, 798, 822, 873, 890, 1023, 1067]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 8:
ID: 165179__A0A3E5EB72__DXB41_01380|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Prevotellaceae.g__Prevotella.s__Prevotella_copri|UniRef90_A0A3E5EB72|UniRef50_A0A3E5EB72|108
Sequence: ATGATGCTAGCAAGTTCTTCTATGGTGACTTGGAAGGCAAGGGCAACTACCGTGTGGAGATGGCTAACATTTGGGGCTGCGGCCACAATGACAGCTGGAACGGTTTGA

Последовательность номер 13:
ID: 165179__A0A3E5DY95__DXB41_10815|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Prevotellaceae.g__Prevotella.s__Prevotella_copri|UniRef90_A0A3E5DY95|UniRef50_A0A3E5DY95|114
Sequence: ATGAATGTTACTTTGATTCTAGGAAATGGTTTTGACCTTAACATGGGGTTGCCTACAGCCTATTCTGATTTCTATAAGTATTATATGTTAGTTGATTCTGTAAACCACCAATAG

Последовательность номер 16:
ID: 165179__A0A3E5DT69__DXB41_13615|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Prevotellaceae.g__Prevotella.s__Prevotella_copri|UniRef90_A0A3E5DT69|UniRef50_A0A3E5DT69|129
Sequence: GTGTTTGCAGATTCTTTCCCGGATGGCGTTCTGCTTTGGTTGATTATTGTTCTGTTCTTTCATTTCTTTCTTCTTTTTA

## Faecalibacterium_prausnitzii

In [103]:
input_file = "Faecalibacterium_prausnitzii.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Faecalibacterium_prausnitzii: {count}")

Количество последовательностей Faecalibacterium_prausnitzii: 24458


In [105]:
input_file = "Faecalibacterium_prausnitzii.ffn"
output_file = "filtered_sequences_prausnitzii_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Faecalibacterium_prausnitzii: {count}")

Количество последовательностей Faecalibacterium_prausnitzii: 1010


In [106]:
input_file = "Faecalibacterium_prausnitzii.ffn"
output_file = "filtered_sequences_prausnitzii_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [14, 16, 19, 24, 75, 97, 99, 101, 106, 109, 110, 113, 118, 133, 134, 146, 147, 442, 457, 535, 540, 589, 590, 591, 594, 645, 646, 690, 762, 798, 868]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 14:
ID: 853__R6Q0T7__C4N22_11060|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Ruminococcaceae.g__Faecalibacterium.s__Faecalibacterium_prausnitzii|UniRef90_R6Q0T7|UniRef50_R7FXY9|123
Sequence: ATGCTGCATTTACTTTCGTGGGAGACCACGCTGCCCATCCTGCTCATCGGGATGCTGGGCATCTTCCTGGTCATCGGCGTTATTGTGCTGGCGGTGACCCTGCTGGGGAAGTTTACAAAGTAA

Последовательность номер 16:
ID: 853__A0A329UHF5__C4N23_06620|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Ruminococcaceae.g__Faecalibacterium.s__Faecalibacterium_prausnitzii|UniRef90_A0A329UHF5|UniRef50_A0A174U2Z8|132
Sequence: ATGGAATTTTTCAATCAGGCGGTTACTGTACTGCAAACCCTCGTTATCGCGCTCGGTGCTGGTCTTGGTATCTGGGGCGTGATTAACTTGCTCGAAGGGTACGGGAACGATAACCGTGCGATGCGTTCCTGA

Последовательность номер 19:
ID: 853__A0A329UIK5__C4N24_02235|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Ruminococcaceae.g__Faecalibacterium.s__Faecalibacterium_prausnitzii|UniRef90_A0A329UIK5|UniRef50_A0A329UIK5|144
Sequence: ATGCTTGTTACGGGTATTATC

## Lactobacillus plantarum

In [107]:
input_file = "Lactobacillus_plantarum.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Lactobacillus_plantarum: {count}")

Количество последовательностей Lactobacillus_plantarum: 18818


In [108]:
input_file = "Lactobacillus_plantarum.ffn"
output_file = "filtered_sequences_plantarum_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Lactobacillus_plantarum: {count}")

Количество последовательностей Lactobacillus_plantarum: 811


In [109]:
input_file = "Lactobacillus_plantarum.ffn"
output_file = "filtered_sequences_plantarum_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [358, 359, 378, 411, 491, 492, 495, 503, 504, 505, 508, 512, 517, 520, 524, 530, 534, 540, 552, 642, 717]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 358:
ID: 1590__A0A165RVA4__Lp19_1176|k__Bacteria.p__Firmicutes.c__Bacilli.o__Lactobacillales.f__Lactobacillaceae.g__Lactobacillus.s__Lactobacillus_plantarum|UniRef90_A0A165RVA4|UniRef50_A0A165RVA4|141
Sequence: ATGGCCCCATATCGCCACAATGTCGCCAATACATGGTGCTTTATTTTGCTTCATGGCAGTATGCGAACAGTTTATTTTTATTTTGGTGTGATCAAGTACATTTTCTATTTCTATTTTGGAATCGGAATTATTTTCTGTTAA

Последовательность номер 359:
ID: 1590__A0A165RVD5__Lp19_1180|k__Bacteria.p__Firmicutes.c__Bacilli.o__Lactobacillales.f__Lactobacillaceae.g__Lactobacillus.s__Lactobacillus_plantarum|UniRef90_A0A165RVD5|UniRef50_A0A165RVD5|123
Sequence: ATGCATCAAGTGGATTTTGATAAAGATATTCAACTTTCACGCTATAGCCATGATCCAACGTTCTTTTTTACATTTAAGCGTGCTTTAGATGATGCAAAAGTAGCGTCTAACAAGAAAGAGTAG

Последовательность номер 378:
ID: 1590__A0A165QVP2__Lp19_3589|k__Bacteria.p__Firmicutes.c__Bacilli.o__Lactobacillales.f__Lactobacillaceae.g__Lactobacillus.s__Lactobacillus_plantarum|UniRef90_A0A165QVP2|UniRef50_A0A165QVP2|114
Sequence: ATGTTCGAGCGCAAAAAGTTTATC

## Butyrivibrio fibrisolvens

In [110]:
input_file = "Butyrivibrio_fibrisolvens.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Butyrivibrio_fibrisolvens: {count}")

Количество последовательностей Butyrivibrio_fibrisolvens: 7741


In [111]:
input_file = "Butyrivibrio_fibrisolvens.ffn"
output_file = "filtered_sequences_fibrisolvens_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Butyrivibrio_fibrisolvens: {count}")

Количество последовательностей Butyrivibrio_fibrisolvens: 85


In [113]:
input_file = "Butyrivibrio_fibrisolvens.ffn"
output_file = "filtered_sequences_fibrisolvens_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [3, 4, 7, 8, 9, 11, 12, 13, 14, 15, 16, 18, 20, 21, 22, 23, 24, 28, 29, 30, 32, 36, 37, 39, 43, 44, 48, 49, 51, 52, 55, 57, 58, 59, 60, 62, 67, 69, 73, 75, 77, 78, 80, 81, 82, 83, 84]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 3:
ID: 831__A0A317FV95__CPT75_02160|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Butyrivibrio.s__Butyrivibrio_fibrisolvens|UniRef90_A0A317FV95|UniRef50_A0A317FV95|148
Sequence: TGATCAGAATAATACGGGCAGGACGCCGCTTATGTTAGCTGTCCAATGTAACAATGAAGAGGCGGTTGACTATTTGATAAATAATGGTGCGCATATTAGTATCACTGATTATTCTGGGAACAATTTGTTTTATTATACTATGCAATGA

Последовательность номер 4:
ID: 831__A0A317FWG9__CPT75_02165|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Butyrivibrio.s__Butyrivibrio_fibrisolvens|UniRef90_A0A317FWG9|UniRef50_A0A011LVT2|107
Sequence: TTGACAGGCAGACAGGCAGACAGGCAGACAGGCAGACAGGCAGACAGGCAGACAGGCAGACAGGCAGACAGGCAGACAGGCAGACAGGCAGACAGGCAGACAGGCAG

Последовательность номер 7:
ID: 831__A0A317G5P3__CPT75_09280|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Butyrivibrio.s__Butyrivibrio_fibrisolvens|UniRef90_A0A317G5P3|UniRef50_V2XPP2|141
Sequence: ATGGATTTCTTCAATTCAGCAGTAACAATACTTGAAA

## Parabacteroides distasonis

In [114]:
input_file = "Parabacteroides_distasonis.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Parabacteroides_distasonis: {count}")

Количество последовательностей Parabacteroides_distasonis: 13339


In [115]:
input_file = "Parabacteroides_distasonis.ffn"
output_file = "filtered_sequences_distasonis_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Parabacteroides_distasonis: {count}")

Количество последовательностей Parabacteroides_distasonis: 578


In [116]:
input_file = "Parabacteroides_distasonis.ffn"
output_file = "filtered_sequences_distasonis_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [67, 78, 92, 103, 114, 132, 149, 159, 180, 197, 204, 257, 319, 530, 531, 532, 554]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 67:
ID: 823__A0A174G162__ERS852380_02554|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Tannerellaceae.g__Parabacteroides.s__Parabacteroides_distasonis|UniRef90_A0A174G162|UniRef50_A0A174G162|135
Sequence: ATGGGAACAACGTTGTTCCCTATCATGGGAGAGCCCCTGTTGACAGGCGTGAGAACCGAGGCTCTCCCATACCTGTCAACAGGGGCTTTCCCATATATGCCAACTGAGGCTCTCCCATATATGGTATTATCGTGA

Последовательность номер 78:
ID: 823__A0A174IFZ2__ERS852380_03295|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Tannerellaceae.g__Parabacteroides.s__Parabacteroides_distasonis|UniRef90_A0A174IFZ2|UniRef50_A0A174IFZ2|138
Sequence: ATGGAATCGGGCGTTTCAGAAACTATATCAAACCTAACCTCTGACACGAGGGACAAGTTAGGGACAAACGGAAGAAACACGAGGGACAAAAAATATCCCTTGAATTTCAACACTTCGTTTTTCAAAGGGTCTTATTAG

Последовательность номер 92:
ID: 823__A0A173RT10__ERS852429_00685|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Tannerellaceae.g__Parabacteroides.s__Parabacteroides_distasonis|UniRef90_A0A173RT10|UniRef50_A0A1

## Alistipes putredinis

In [117]:
input_file = "Alistipes_putredinis.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Alistipes_putredinis: {count}")

Количество последовательностей Alistipes_putredinis: 3223


In [118]:
input_file = "Alistipes_putredinis.ffn"
output_file = "filtered_sequences_putredinis_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Alistipes_putredinis: {count}")

Количество последовательностей Alistipes_putredinis: 186


In [119]:
input_file = "Alistipes_putredinis.ffn"
output_file = "filtered_sequences_putredinis_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [3, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 26, 27, 28, 30, 31, 36, 39, 41, 42, 46, 47, 48, 50, 51, 52, 56, 57, 65, 66, 69, 70, 77, 78, 79, 83, 84, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 107, 108, 111, 112, 114, 115, 118, 121, 124, 125, 126, 127, 128, 129, 130, 131, 133, 134, 137, 138, 139, 140, 142, 143, 148, 149, 150, 151, 153, 155, 156, 157, 159, 160, 161, 162, 163, 164, 166, 167, 168, 169, 170, 171, 172, 181, 182, 183, 184, 185]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 3:
ID: 28117__B0MTH1__ALIPUT_00059|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Rikenellaceae.g__Alistipes.s__Alistipes_putredinis|UniRef90_B0MTH1|UniRef50_B0MTH1|135
Sequence: ATGTTTCCTAATACACAACACGCTCTGGGGTATTGCCACCGCAAAGTTCCACAAAACAAAGTTTTTCTGCAACGGCCGACGGCCGGATCGAAAATTATATTCGTTGTAATTTGGGTGAAAAGTTTTTGTGTGTAA

Последовательность номер 9:
ID: 28117__B0MTM2__ALIPUT_00130|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Rikenellaceae.g__Alistipes.s__Alistipes_putredinis|UniRef90_B0MTM2|UniRef50_B0MTM2|123
Sequence: GTGGAGTATTGCCGCCGCAAAGATCGTGGAAAAGTTCTAAAATACCAATGGTACGACGCGTGCTCTTCCTTTATTACGGATGCATCCCGTCCGTTTTGCTCCGATCGGATACCTCTTCCCTGA

Последовательность номер 10:
ID: 28117__B0MTT7__ALIPUT_00148|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Rikenellaceae.g__Alistipes.s__Alistipes_putredinis|UniRef90_B0MTT7|UniRef50_B0MTT7|129
Sequence: GTGCAAAACAAAAATACCGTATCGATTGGTATCGGCAAGGGACATGTTAAGATGCGGGGACCGGATAAATCCGG

## Dorea formicigenerans

In [120]:
input_file = "Dorea_formicigenerans.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Dorea_formicigenerans: {count}")

Количество последовательностей Dorea_formicigenerans: 8215


In [121]:
input_file = "Dorea_formicigenerans.ffn"
output_file = "filtered_sequences_formicigenerans_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Dorea_formicigenerans: {count}")

Количество последовательностей Dorea_formicigenerans: 265


In [122]:
input_file = "Dorea_formicigenerans.ffn"
output_file = "filtered_sequences_formicigenerans_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [107, 158, 164, 170, 184, 197, 203, 212, 234, 237, 240, 241, 247]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 107:
ID: 39486__B0G7Y0__DORFOR_02385|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Dorea.s__Dorea_formicigenerans|UniRef90_B0G7Y0|UniRef50_B0G7Y0|120
Sequence: GTGGTTCTTGAAATTTTAAATAAAATTGCACAAATTTTAAATAAGAAAAAATGGGCAAAAACCGAAGAAAATGTTAATAATATAACGAAAATTATAAAAAATTTTAAAATTAACCAATAA

Последовательность номер 158:
ID: 39486__B0GAE4__DORFOR_03260|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Dorea.s__Dorea_formicigenerans|UniRef90_B0GAE4|UniRef50_B0GAE4|147
Sequence: ATGCGAAAAGAAAGATATAGAAGTAATAGAAAGAAATATCAGAAAATAAAGAAAAAGATAGAATCAGAGACTGAAAACATGAGAAAAGGGGGGCAAAAAAGTTCGGAAAAAGTATTGAAAATGGTTGACACACACGGGTCAGTATAA

Последовательность номер 164:
ID: 39486__G1WQ90__DWV67_01645|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Dorea.s__Dorea_formicigenerans|UniRef90_G1WQ90|UniRef50_R5FLJ5|144
Sequence: ATGGTTATTACTGGAATCATCATTTTTGGAATTACTGTTTATGCAGTATGGGCTGTCCGAAAAGTTCGACGGGATC

## Dorea longicatena

In [123]:
input_file = "Dorea_longicatena.ffn"

count = sum(1 for record in SeqIO.parse(input_file, "fasta"))
print(f"Количество последовательностей Dorea_longicatena: {count}")

Количество последовательностей Dorea_longicatena: 14095


In [124]:
input_file = "Dorea_longicatena.ffn"
output_file = "filtered_sequences_longicatena_100_150.ffn"

with open(input_file, "r") as in_handle, open(output_file, "w") as out_handle:
    for record in SeqIO.parse(in_handle, "fasta"):
        if 100 <= len(record.seq) <= 150:
            SeqIO.write(record, out_handle, "fasta")
count = sum(1 for record in SeqIO.parse(output_file, "fasta"))
print(f"Количество последовательностей Dorea_longicatena: {count}")

Количество последовательностей Dorea_longicatena: 417


In [125]:
input_file = "Dorea_longicatena.ffn"
output_file = "filtered_sequences_longicatena_100_150.ffn"
# Номера уникальных последовательностей, проверенных в BLAST
sequence_numbers = [147, 185, 189, 201, 202, 206, 209, 210, 232, 233, 236, 237, 238, 239, 240, 241, 243, 246, 247, 249, 250, 254, 256, 257, 258, 261, 262, 263, 264, 265, 268, 269, 270, 272, 273, 276, 279, 280, 281, 282, 284, 285, 286, 287, 288, 290, 291, 293, 301, 303, 305, 307, 308, 309, 310, 311, 315, 320, 322, 323, 328, 330, 337, 339, 347, 354, 355, 359, 360, 361, 362, 364, 368, 369, 371, 380, 389, 393, 400, 404, 405, 406, 414, 417]

sequences = list(SeqIO.parse(output_file, "fasta"))

for num in sequence_numbers:
    sequence_index = num - 1 
    if sequence_index < len(sequences):
        print(f"Последовательность номер {num}:")
        print(f"ID: {sequences[sequence_index].id}")
        print(f"Sequence: {str(sequences[sequence_index].seq)}\n")

Последовательность номер 147:
ID: 88431__C5EQA4__DW690_13830|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Dorea.s__Dorea_longicatena|UniRef90_C5EQA4|UniRef50_A0A174IKU1|150
Sequence: ATGGCATCCTATGTTTCCCCGAAAATACGGGACAAATTTGAAAGTCTTTCCATAGACCTGAAGAATGACATATTGAAGCGCAATGTGCACCTGGAAACCTTGCAGGATTTAATTCAGGTTCTGGAAAAGATCGTGAAAGAAGGCAGCTGA

Последовательность номер 185:
ID: 88431__A0A173XWM2__ERS852423_00844|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Dorea.s__Dorea_longicatena|UniRef90_A0A173XWM2|UniRef50_A0A173XWM2|120
Sequence: ATGAAGACGAAGAAAGAAAAAGAGATAAAACTGATATTTGTATTGATTGCAATTCTTTTTGCGGCATTTCTGATTATTCCGGTTGTCAGACTTCTGGGAAAATCTGTGATTGGTGAATAG

Последовательность номер 189:
ID: 88431__A0A174AQ39__ERS852423_01721|k__Bacteria.p__Firmicutes.c__Clostridia.o__Clostridiales.f__Lachnospiraceae.g__Dorea.s__Dorea_longicatena|UniRef90_A0A174AQ39|UniRef50_A0A174AQ39|144
Sequence: ATGAGCAATGATCTTCTGAGACCGGACTGTTATTTTCTGTTAAAAGATAAT