-
Notifications
You must be signed in to change notification settings - Fork 2
/
codon.clj
107 lines (100 loc) · 2.09 KB
/
codon.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
(ns varity.codon
"Handles codon."
(:require [clojure.string :as string]))
;; See https://en.wikipedia.org/wiki/DNA_codon_table
(def ^:private codon-amino-acid-map
{"TTT" "F"
"TTC" "F"
"TTA" "L"
"TTG" "L"
"CTT" "L"
"CTC" "L"
"CTA" "L"
"CTG" "L"
"ATT" "I"
"ATC" "I"
"ATA" "I"
"ATG" "M"
"GTT" "V"
"GTC" "V"
"GTA" "V"
"GTG" "V"
"TCT" "S"
"TCC" "S"
"TCA" "S"
"TCG" "S"
"CCT" "P"
"CCC" "P"
"CCA" "P"
"CCG" "P"
"ACT" "T"
"ACC" "T"
"ACA" "T"
"ACG" "T"
"GCT" "A"
"GCC" "A"
"GCA" "A"
"GCG" "A"
"TAT" "Y"
"TAC" "Y"
"TAA" "*"
"TAG" "*"
"CAT" "H"
"CAC" "H"
"CAA" "Q"
"CAG" "Q"
"AAT" "N"
"AAC" "N"
"AAA" "K"
"AAG" "K"
"GAT" "D"
"GAC" "D"
"GAA" "E"
"GAG" "E"
"TGT" "C"
"TGC" "C"
"TGA" "*"
"TGG" "W"
"CGT" "R"
"CGC" "R"
"CGA" "R"
"CGG" "R"
"AGT" "S"
"AGC" "S"
"AGA" "R"
"AGG" "R"
"GGT" "G"
"GGC" "G"
"GGA" "G"
"GGG" "G"})
(def ^:private amino-acid-codon-map
(->> (group-by second codon-amino-acid-map)
(map #(update % 1 (partial mapv first)))
(into {})))
(defn codon->amino-acid
"Converts three-character nucleotide string s into a single-letter amino acid.
e.g.
(genes->codon \"TCA\") => \"S\"
(genes->codon \"tca\") => \"S\"
(genes->codon \"TAA\") => \"*\"
(genes->codon \"TCAG\") => AssertionError"
[s]
{:pre [(= (count s) 3)]}
(get codon-amino-acid-map (string/upper-case s)))
(defn amino-acid->codons
"Returns possible nucleotide strings corresponding to amino acid s. s must be
one-character String or Character.
e.g.
(codon->genes \"L\") => [\"CTG\" \"CTC\" \"CTT\" \"CTA\" \"TTG\" \"TTA\"]
(codon->genes \\*) => [\"TAG\" \"TAA\" \"TGA\"]"
[s]
{:pre [(or (and (string? s) (= (count s) 1)) (char? s))]}
(let [s (cond-> s (char? s) str)]
(get amino-acid-codon-map (string/upper-case s))))
(defn amino-acid-sequence
"Converts nucleotide sequence into amino acid sequence."
[s]
(->> (string/upper-case s)
(re-seq #".{3}")
(map codon-amino-acid-map)
(apply str)))