# Часть 1: Материалы

In [None]:
from pymatgen.ext.matproj import MPRester
API_KEY = "v0O0ieTjYWTzUPtJOKwTzSvD6jAZSCpH"

with MPRester(API_KEY) as mpr:
    structures = mpr.get_structures("MnFe2O4")
    structure = structures[0]

    composition = structure.composition
    elements = list(composition.elements)

    print("Состав:", composition)
    print("Элементы:", elements)

    lattice = structure.lattice
    print("a =", lattice.a, "Å")
    print("b =", lattice.b, "Å")
    print("c =", lattice.c, "Å")
    print("α =", lattice.alpha, "°")
    print("β =", lattice.beta, "°")
    print("γ =", lattice.gamma, "°")

    print("Плотность:", structure.density, "g/cm³")


Состав: Mn10 Fe20 O40
Элементы: [Element Mn, Element Fe, Element O]
a = 6.150493600289411 Å
b = 6.100716953321962 Å
c = 30.396005395963694 Å
α = 60.279723329094324 °
β = 59.99807904718926 °
γ = 59.83729374457539 °
Плотность: 4.744605346781863 g cm^-3 g/cm³


# Часть 2: Последовательности ДНК/РНК

In [19]:
from Bio.Seq import Seq
from Bio.SeqUtils import gc_fraction

seq1 = Seq("ATGCCGTAGTGCAACGAGCTA")
seq2 = Seq("GGAAACCCGCGATGTTAAA")

sequences = {"gene1": seq1, "gene2": seq2}

for name, seq in sequences.items():
    print("ДНК:", seq)

    gc_content = gc_fraction(seq) * 100
    print(f"GC-состав: {gc_content:.2f}%")

    rev_comp = seq.reverse_complement()
    print("Обратная комплементарная:", rev_comp)

    rna = seq.transcribe()
    print("РНК:", rna, "\n")

ДНК: ATGCCGTAGTGCAACGAGCTA
GC-состав: 52.38%
Обратная комплементарная: TAGCTCGTTGCACTACGGCAT
РНК: AUGCCGUAGUGCAACGAGCUA 

ДНК: GGAAACCCGCGATGTTAAA
GC-состав: 47.37%
Обратная комплементарная: TTTAACATCGCGGGTTTCC
РНК: GGAAACCCGCGAUGUUAAA 



# Часть 3: Белки

In [None]:
from Bio import ExPASy, SeqIO
from Bio.SeqUtils.ProtParam import ProteinAnalysis
import pandas as pd
from collections import Counter

uniprot_id = "Q53F26"  # альфа-амилаза (человек)
handle = ExPASy.get_sprot_raw(uniprot_id)
record = SeqIO.read(handle, "swiss")
handle.close()

In [2]:
seq = str(record.seq)
print("Название:", record.description)
print("Длина последовательности:", len(seq))
print("Аминокислотная последовательность:", seq)

analysis = ProteinAnalysis(seq)

print("\nСостав аминокислот (%):")
for aa, freq in analysis.amino_acids_percent.items():
    print(f"{aa}: {freq:.2f}%")

Название: RecName: Full=Alpha-amylase {ECO:0000256|ARBA:ARBA00012595, ECO:0000256|RuleBase:RU361134}; EC=3.2.1.1 {ECO:0000256|ARBA:ARBA00012595, ECO:0000256|RuleBase:RU361134}; Flags: Fragment;
Длина последовательности: 511
Аминокислотная последовательность: MKFFLLLFTIGFCWAQYSPNTQQGRTSIVHLFEWRWVDIALECERYLAPKGFGGVQVSPPNENVAIYNPFRPWWERYQPVSYKLCTRSGNEDEFRNMVTRCNNVGVRIYVDAVINHMCGNAVSAGTSSTCGSYFNPGSRDFPAVPYSGWDFNDGKCKTGSGDIENYNDATQVRDCRLTGLLDLALEKDYVRSKIAEYMNHLIDIGVAGFRLDASKHMWPGDIKAILDKLHNLNSNWFPAGSKPFIYQEVIDLGGEPIKSSDYFGNGRVTEFKYGAKLGTIIRKWNGEKMSYLKNWGEGWGFVPSDRALVFVDNHDNQRGHGAGGASILTFWDARLYKMAVGFMLAHPYGFTRVMSSYRWPRQFQNGNDVNDWVGPPNNNGVIKEVTINPDTTCGNDWVCEHRWRQIRNMVIFRNVVDGQPFTNWYDNGSNQVAFGRGNRGFIVFNNDDWSFSLTLQTGLPAGTYCDVISGDKINGNCTGIKIYVSDDGKAHFSISNSAEDPFIAIHAESKL

Состав аминокислот (%):
A: 5.48%
C: 2.35%
D: 6.65%
E: 3.72%
F: 5.87%
G: 9.98%
H: 2.15%
I: 5.87%
K: 4.50%
L: 5.48%
M: 1.96%
N: 8.22%
P: 4.50%
Q: 2.74%
R: 5.48%
S: 6.26%
T: 4.31%
V: 6.85%
W: 3.52%
Y: 4.11%


In [3]:
aa20 = "ACDEFGHIKLMNPQRSTVWY"
pairs = [seq[i:i+2] for i in range(len(seq)-1) if seq[i] in aa20 and seq[i+1] in aa20]
counts = Counter(pairs)
total = sum(counts.values()) or 1
dipeptides = {a+b: counts.get(a+b, 0)/total for a in aa20 for b in aa20}

df_dipeptides = pd.DataFrame(list(dipeptides.items()), columns=["Dipeptide", "Frequency"])
df_dipeptides = df_dipeptides.sort_values(by="Frequency", ascending=False)
print("\nНаиболее частые дипептиды:")
print(df_dipeptides.head(10))


Наиболее частые дипептиды:
    Dipeptide  Frequency
111        GN   0.013725
104        GF   0.013725
222        ND   0.011765
225        NG   0.011765
115        GS   0.009804
347        VI   0.009804
5          AG   0.009804
342        VD   0.007843
355        VS   0.007843
94         FR   0.007843
