In [None]:
import collections
import glob
import os

import music21.duration
import music21.note
import music21.volume
import numpy
import pytuning.tuning_tables
import sklearn.preprocessing

In [None]:
AMINO_ACIDS = {
    "A": ("C"  ,  89.094,  1.8),
    "C": ("C#" , 121.154,  2.5),
    "D": ("D-" , 133.104, -3.5),
    "E": ("D"  , 147.131, -3.5),
    "F": ("D#" , 165.192,  2.8),
    "G": ("E-" ,  75.067, -0.4),
    "H": ("E"  , 155.156, -3.2),
    "I": ("E#" , 131.175,  4.5),
    "K": ("F-" , 146.189, -3.9),
    "L": ("F"  , 131.175,  3.8),
    "M": ("F#" , 149.208,  1.9),
    "N": ("G-" , 132.119, -3.5),
    "P": ("G"  , 115.132, -1.6),
    "Q": ("G#" , 146.146, -3.5),
    "R": ("A--", 174.203, -4.5),
    "S": ("A-" , 105.093, -0.8),
    "T": ("A"  , 119.119, -0.7),
    "U": ("A#" , 168.064,  0.0),
    "V": ("B-" , 117.148,  4.2),
    "W": ("B"  , 204.228, -0.9),
    "Y": ("B#" , 181.191, -1.3)
}

AMINO_ACIDS = collections.OrderedDict(AMINO_ACIDS)

DURATIONS = [
    0.25,
    0.5,
    2.0,
    1.0,
    4.0
]

TIME_SIGNATURE = "4/4"

In [None]:
scale = pytuning.create_edo_scale(22)

with open("data/22-EDO.scl", "w") as fp:
    fp.write(pytuning.tuning_tables.create_scala_tuning(scale, "22-EDO"))

In [None]:
masses = numpy.array([x for _, x, _ in AMINO_ACIDS.values()])[:, numpy.newaxis]
scales = numpy.array([x for _, _, x in AMINO_ACIDS.values()])[:, numpy.newaxis]

masses = sklearn.preprocessing.normalize(masses, axis=0)
scales = sklearn.preprocessing.normalize(scales, axis=0)

masses = sklearn.preprocessing.minmax_scale(scales, feature_range=(0, 5), axis=0)
masses = numpy.round(masses)
masses = [DURATIONS[int(mass - 1)] for mass in masses]

scales = numpy.absolute(scales)
scales = sklearn.preprocessing.minmax_scale(scales, feature_range=(0.0, 1.0), axis=0)
scales = scales.ravel()

In [None]:
pathnames = glob.glob("data/fasta/*.fasta")

for pathname in pathnames:
    name = os.path.splitext(os.path.split(pathname)[-1])[0].split("_")[-1]

    with open(pathname) as fp:
        sequence = fp.read()

    sequence = sequence.splitlines()[-1].strip()
    
    stream = music21.stream.Stream()
        
    signature = music21.meter.TimeSignature(TIME_SIGNATURE)

    stream.append(signature)
    
    for pair in sequence:    
        note = music21.note.Note(AMINO_ACIDS[pair][0])

        note.duration = music21.duration.Duration(numpy.random.choice(DURATIONS))
                
        stream.append(note)
    
    translation = music21.midi.translate.streamToMidiFile(stream)

    translation.open(f"data/midi/{name.lower()}.midi", "wb")

    translation.write()
    
    translation.close()