# 기본 사용법

## 01. 라이브러리 불러오기

- 라이브러리 불러오기

In [None]:
import Bio

print(Bio.__version__)

- 서열 불러오기: Seq

In [None]:
from Bio.Seq import Seq

In [None]:
my_seq = Seq("AGCTAAATTCAGGGACACACT")

In [None]:
my_seq

In [None]:
print(my_seq)

- 상보적 서열구하기

In [None]:
my_seq.complement()

- 반대가닥의 상보적 서열 얻기

In [None]:
my_seq.reverse_complement()

- FASTA 서열 읽어오기 - SeqIO.read()

In [None]:
from Bio import SeqIO

In [None]:
seq = SeqIO.read("one.fasta", "fasta")

In [None]:
print(type(seq))

In [None]:
print(seq)

- FASTA 서열 읽어오기1 - SeqIO.parse()

In [None]:
seq2 = SeqIO.parse("one.fasta", "fasta")

In [None]:
print(type(seq2))

In [None]:
print(seq)

In [None]:
for s in seq:
    print(type(s))
    print(s)

- Genbank 서열 읽어오기 

In [None]:
# seq3 = SeqIO.parse("ls_orchid.gbk", "genbank")
for seq_record in SeqIO.parse("ls_orchid.gbk", "genbank"):
    print(seq_record.id)
    print(repr(seq_record.seq))
    print(len(seq_record))

# 서열 객체의 특징 알아보기

## 1. 서열 객체는 문자열 특징을 가지고 있음

In [None]:
from Bio.Seq import Seq

In [None]:
my_seq = Seq("GACATTACGAAA")

In [None]:
for letter in enumerate(my_seq):
    print(type(letter))
    print(letter)

In [None]:
for index, letter in enumerate(my_seq):
    print("%i %s" % (index, letter))

In [None]:
print(len(my_seq))

In [None]:
print(my_seq[0:4])

In [None]:
my_seq.count("AC")

In [None]:
# GC 함량 계산하기

In [None]:
GC_rate = (my_seq.count("G")+my_seq.count("C"))/len(my_seq)* 100

In [None]:
print(f"GC 함량은 {GC_rate:.2f}")

In [None]:
from Bio.SeqUtils import GC
GC(my_seq)

# 2. Slicing a Sequence

In [None]:
my_seq = Seq("GACATTACGAAA")

In [None]:
my_seq[2:6]

In [None]:
my_seq[2:: 2]

## 3. 서열 데이터를 문자열로 바꾸기

In [None]:
my_seq2 = str(my_seq)

In [None]:
print(type(my_seq))
print(type(my_seq2))


In [None]:
print(my_seq)
print(my_seq2)

In [None]:
fasta_format_string = ">Name\n%s\n" % my_seq

In [None]:
print(fasta_format_string)
print(type(fasta_format_string))

In [None]:
fasta_format_string2 = ">Name\n%s\n" % my_seq2

In [None]:
print(fasta_format_string2)

In [None]:
protein_seq = Seq("EVRNAK")

In [None]:
dna_seq=Seq("GACATTACGAAA")

In [None]:
# 두 서열을 합치기
mix_seq = protein_seq + dna_seq

In [None]:
protein_seq

In [None]:
dna_seq

In [None]:
mix_seq

- 서열 정보 리스트 만들기

In [None]:
list_of_seqs = [Seq("ACGT"), Seq("AACC"), Seq("GGTT")]

In [None]:
list_of_seqs

In [None]:
print(list_of_seqs[0])

- for구문을 이용하여 서열 리스트를 하나의 서열로 만들기

In [None]:
concatenated = ""

for s in list_of_seqs:
    concatenated += s

In [None]:
concatenated

- Join method를 이용한 서열 합치기

In [None]:
spacer = Seq("N"*10)

In [None]:
my_seq3 = spacer.join(list_of_seqs)

In [None]:
my_seq3

- 소문자를 대문자로 상호전환하기

In [None]:
dna_seq2 = Seq("agagagCTCCTCT")

In [None]:
dna_seq2.upper()

In [None]:
dna_seq2.lower()

In [None]:
"AGAG" in dna_seq2

In [None]:
"AGAG" in dna_seq2.upper()

- 상보적 서열 구하기

In [None]:
dna_seq = Seq("agagagCTCCTCTG").upper()

In [None]:
dna_seq

In [None]:
dna_seq[::-1]  # 슬라이싱 아이디어를 이용한 서열 뒤집기

In [None]:
dna_seq.complement()

-상보적 서열을 구하고 5-말단부터 나열하기 위하여 서열 뒤집기

In [None]:
dna_seq.reverse_complement()

- 전사 정보 구하기

In [None]:
coding_dna =  Seq("AGCTAAATTCAGGGACACACTGGG")

In [None]:
template_dna = coding_dna.reverse_complement()
template_dna

In [None]:
messenger_rna = template_dna.reverse_complement_rna()
messenger_rna

In [None]:
messenger_rna = coding_dna.transcribe()
messenger_rna

- 메신저 RNA 서열로부터 DNA 서열 구하기

In [None]:
messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG")

In [None]:
coding_dna = messenger_rna.back_transcribe()
coding_dna

## 번역 과정에 대한 코딩

In [None]:
messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG")

In [None]:
protein_seq = messenger_rna.translate()
protein_seq

In [None]:
protein_seq2 = coding_dna.translate()
protein_seq2

- 미토콘드리아 번역 서열

In [None]:
protein_seq3 = coding_dna.translate(table="Vertebrate Mitochondrial")
protein_seq3

In [None]:
protein_seq4 = coding_dna.translate(table=2)
protein_seq4

- 첫 번째 스탑 코돈에서 번역 끝내기

In [None]:
protein_seq4 = coding_dna.translate(to_stop=True)
protein_seq4

- 미토콘드리아에서 스탑코돈 적용하기

In [None]:
protein_seq4 = coding_dna.translate(table=2, to_stop=True)
protein_seq4

-종결 부위를 다른 문자를 이용하여 나타내기 

In [None]:
protein_seq4 = coding_dna.translate(table=2, stop_symbol="@")
protein_seq4

In [None]:
gene = Seq("GTGAAAAAGATGCAATCTATCGTACTCGCACTTTCCCTGGTTCTGGTCG\
CTCCCATGGCAGCACAGGCTGCGGAAATTACGTTAGTCCCGTCAGTAAAA\
TTACAGATAGGCGATCGTGATAATCGTGGCTATTACTGGGATGGAGGTCAC\
TGGCGCGACCACGGCTGGTGGAAACAACATTATGAATGGCGAGGCAATCGCT\
GGCACCTACACGGACCGCCGCCACCGCCGCGCCACCATAAGAAAGCTCCTCA\
TGATCATCACGGCGGTCATGGTCCAGGCAAACATCACCGCTAA")


In [None]:
gene.translate(table="Bacterial")


In [None]:
gene.translate(table="Bacterial", to_stop=True)

- 세균의 경우 시작 코돈으로 GTG를 사용하는 것에 맞추어 재번역

In [None]:
gene.translate(table="Bacterial", cds=True)

### 번역틀 작업하기

In [None]:
from Bio.Data import CodonTable

In [None]:
standard_table = CodonTable.unambiguous_dna_by_name["Standard"]

In [None]:
standard_table

In [None]:
print(standard_table)

- 미토콘드리아의 경우

In [None]:
mito_table = CodonTable.unambiguous_dna_by_name["Vertebrate Mitochondrial"]

In [None]:
print(mito_table)

In [None]:
standard_table = CodonTable.unambiguous_dna_by_id[1]
print(standard_table)

In [None]:
standard_table

In [None]:
mito_table = CodonTable.unambiguous_dna_by_id[2]
mito_table

In [None]:
mito_table.stop_codons

In [None]:
mito_table.start_codons

In [None]:
standard_table.start_codons

In [None]:
standard_table.stop_codons

In [None]:
mito_table.forward_table["ACG"]

## 서열 비교하기

In [None]:
seq1 = Seq("ACGT")

In [None]:
"ACGT" == seq1

In [None]:
seq1 == "ACGT"

In [None]:
seq2 = Seq("ACGTGGGGGG")

In [None]:
seq1 == seq2

In [None]:
seq3 =  "ACGT"

In [None]:
seq1 == seq3

In [None]:
seq3 in seq2

- 미확인 서열 다루기

In [None]:
unknown_seq = Seq(None, 10)
unknown_seq

In [None]:
print(unknown_seq)

- 변경 가능한 서열 만들기

Seq 메소드에 의해 생성된 서열은 변경이 불가함.

In [172]:
from Bio.Seq import Seq

In [236]:
original_seq = Seq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA")

> Seq 메소드로 생성된 객체는 수정하려면 에러가 발생함

In [174]:
original_seq[5] = "G"

TypeError: 'Seq' object does not support item assignment

In [229]:
from Bio.Seq import MutableSeq

In [230]:
mutable_seq = MutableSeq(original_seq)
len(mutable_seq)

33

In [215]:
mutable_seq[0] = "A"
mutable_seq

MutableSeq('ACCATTGTAATGGGCCGCTGAAAGGGTGCCCGA')

In [216]:
mutable_seq.remove("A")
mutable_seq
len(mutable_seq)

32

In [227]:
for i in mutable_seq:
    if "G" in mutable_seq:
        mutable_seq.remove("G")
    else:
        break
print(mutable_seq)


CCATTTAATCCCTAAATCCCA


In [237]:
mutable_seq.reverse()
mutable_seq

MutableSeq('AGCCCGTGGGAAAGTCGCCGGGTAATGTTACCG')

> mutable 객체를 seq로 재생성하면 변경불가함

In [238]:
new_seq = Seq(mutable_seq)

In [239]:
new_seq[9]="T"

TypeError: 'Seq' object does not support item assignment

미확인 객체 만들기

In [240]:
from Bio.Seq import UnknownSeq

In [241]:
unk = UnknownSeq(20)
unk



UnknownSeq(20, character='?')

In [242]:
unk2 = Seq(None, 20)
unk2

Seq(None, length=20)

In [243]:
print(unk)

????????????????????


In [244]:
print(unk2)

UndefinedSequenceError: Sequence content is undefined

In [245]:
unk = UnknownSeq(20, character="N")
unk



UnknownSeq(20, character='N')

In [246]:
unk

UnknownSeq(20, character='N')

In [247]:
print(unk)

NNNNNNNNNNNNNNNNNNNN


In [248]:
unk.complement()



UnknownSeq(20, character='N')

In [249]:
unk_protein = unk.translate()



In [250]:
print(unk_protein)

XXXXXX


In [251]:
from Bio.Seq import reverse_complement, transcribe, back_transcribe, translate

In [252]:
my_string = "GCTGTTATGGGTCGTTGGAAGGGTGGTCGTGCTGCTGGTTAG"

In [253]:
translate(my_string)

'AVMGRWKGGRAAG*'