Adapted from EMOPIA/workspace/transformer/generate.ipynb from https://github.com/annahung31/EMOPIA

In [None]:
# install muspy
!pip install muspy

In [None]:
# check gpu version
!nvcc --version

In [None]:
# install torch
!pip install torch==1.7.0 torchvision==0.8.0 torchaudio==0.7.0

In [None]:
# install pytorch fast transformers
!pip install --user pytorch-fast-transformers 

In [None]:
# clone EMOPIA repo
!git clone https://github.com/annahung31/EMOPIA.git

In [None]:
# install EMOPIA dependencies
!pip install -r EMOPIA/requirements.txt

In [None]:
# install midiSynth
!pip install midiSynth

In [None]:
# install fluidynth
!sudo apt install libfluidsynth-dev

In [None]:
# install ipdb
!pip install ipdb

In [None]:
# install miditoolkit
!pip install miditoolkit

In [None]:
# After running all of the above, restart runtime

In [None]:
# import midiSynth
from midiSynth.synth import MidiSynth
midi_synth = MidiSynth()

In [None]:
# import torch
import os
import pickle
import torch

In [None]:
# in file directory, move utils.py and models.py from EMOPIA/workspace/transformer to directory the notebook is in
# if you get an error with transformer, restart runtime and try again
from utils import write_midi
from models import TransformerModel, network_paras

# Prepare Dictionary

In [None]:
# download dictionary
!gdown --id 17dKUf33ZsDbHC5Z6rkQclge3ppDTVCMP
!unzip co-representation.zip -d ../../dataset/
!rm co-representation.zip

In [None]:
# check that loaded
path_dictionary = '../../dataset/co-representation/dictionary.pkl'
assert os.path.exists(path_dictionary)

In [None]:
# open dictionary
dictionary = pickle.load(open(path_dictionary, 'rb'))
event2word, word2event = dictionary

In [None]:
# config
n_class = []   # num of classes for each token
for key in event2word.keys():
    n_class.append(len(dictionary[0][key]))
n_token = len(n_class)

# Prepare Model

In [None]:
# once you run, it probably won't work and will give you the option to manually download
# use that option to download manually and then upload to same directory as the notebook in file explorer
!gdown --id 19Seq18b2JNzOamEQMG1uarKjj27HJkHu --output exp/pretrained_transformer.zip

In [None]:
# unzip pretrained transforemer
!unzip pretrained_transformer.zip -d exp/

In [None]:
# remove zip file
!rm pretrained_transformer.zip
os.listdir('exp/pretrained_transformer')

In [None]:
# check thaat loaded
path_saved_ckpt = 'exp/pretrained_transformer/loss_25_params.pt'
assert os.path.exists(path_saved_ckpt)

In [None]:
# initialize model
net = TransformerModel(n_class, is_training=False)
net.cuda()
net.eval()

net.load_state_dict(torch.load(path_saved_ckpt))

# Start Generating

In [None]:
# TEST EXAMPLE- SETUP
emotion_tag = 4  # the target emotion class you want. It should belongs to [1,2,3,4].
path_outfile = 'demo' # output midi file name

In [None]:
# TEST EXAMPLE - RUN CONDITIONAL GENERATOR
res, _ = net.inference_from_scratch(dictionary, emotion_tag, n_token=8, display=False)
write_midi(res, path_outfile + '.mid', word2event)

#midi_synth.play_midi(path_outfile + '.mid')
midi_synth.midi2audio(path_outfile + '.mid', path_outfile + '.mp3')

#Run on Performance Inputs


In [None]:
#raw valence inputs- CHANGE FILE NAME
import numpy as np

with open("cinderella_valence.csv") as file_name:
    raw_valence = np.loadtxt(file_name, delimiter=",")
    
print(raw_valence)

In [None]:
#raw arousal inputs- CHANGE FILE NAME
with open("cinderella_arousal.csv") as file_name:
    raw_arousal = np.loadtxt(file_name, delimiter=",")

print(raw_arousal)

In [None]:
# function to calculate emotion tag given valence and arousal scores
def quartile(valence, arousal):
    if valence <= 0 and arousal <= 0:
        return 3
    #low valence, high arousal = q2
    elif valence <= 0 and arousal > 0:
        return 2
    #high valence, low arousal = q4
    elif valence > 0 and arousal <= 0:
        return 4
    #high valence, high arousal = q1
    elif valence > 0 and arousal > 0:
        return 1  

In [None]:
#make output directory
!mkdir outputs

In [None]:
# calculate averages and tag
avg_valence = sum(raw_valence) / len(raw_valence)
avg_arousal = sum(raw_arousal) / len(raw_arousal)
print(avg_valence, avg_arousal)
emotion_tag = quartile(avg_valence, avg_arousal)
print(emotion_tag)

# run conditional generator with avg
path_name = 'outputs/avg_output_q' + str(emotion_tag)
res, _ = net.inference_from_scratch(dictionary, emotion_tag, n_token=8, display=False)
write_midi(res, path_name + '.mid', word2event)
midi_synth.play_midi(path_name + '.mid')
midi_synth.midi2audio(path_name + '.mid', path_name + '.mp3')

In [None]:
from statistics import median

# calculate medians and tag
median_valence = median(raw_valence)
median_arousal = median(raw_arousal)
print(median_valence, median_arousal)
emotion_tag = quartile(median_valence, median_arousal)
print(emotion_tag)

# run conditional generator with median
path_name = 'outputs/median_output_q' + str(emotion_tag)
res, _ = net.inference_from_scratch(dictionary, emotion_tag, n_token=8, display=False)
write_midi(res, path_name + '.mid', word2event)
midi_synth.play_midi(path_name + '.mid')
midi_synth.midi2audio(path_name + '.mid', path_name + '.mp3')

In [None]:
# calculate maximums and tag
max_valence = max(raw_valence)
max_arousal = max(raw_arousal)
print(max_valence, max_arousal)
emotion_tag = quartile(max_amelie_valence, max_amelie_arousal)
print(emotion_tag)

# run conditional generator with maximum
path_name = 'outputs/maximum_output_q' + str(emotion_tag)
res, _ = net.inference_from_scratch(dictionary, emotion_tag, n_token=8, display=False)
write_midi(res, path_name + '.mid', word2event)
midi_synth.play_midi(path_name + '.mid')
midi_synth.midi2audio(path_name + '.mid', path_name + '.mp3')

In [None]:
# calculate minimums and tag
min_arousal = min(raw_arousal)
min_valence = min(raw_valence)
print(min_valence, min_arousal)
emotion_tag = quartile(min_amelie_valence, min_amelie_arousal)
print(emotion_tag)

# run conditional generator with minimum
path_name = 'outputs/minimum_output_q' + str(emotion_tag)
res, _ = net.inference_from_scratch(dictionary, emotion_tag, n_token=8, display=False)
write_midi(res, path_name + '.mid', word2event)
midi_synth.play_midi(path_name + '.mid')
midi_synth.midi2audio(path_name + '.mid', path_name + '.mp3')

In [None]:
#run conditional generator for all pairs
emopia_inputs = []
for i in range(len(raw_valence)):
    emotion_tag = quartile(raw_valence[i], raw_arousal[i])
    path_name = 'outputs/output_' + str(i) + '_q' + str(emotion_tag)
    print('i=' + str(i) + ' q'+ str(emotion_tag))
    res, _ = net.inference_from_scratch(dictionary, emotion_tag, n_token=8, display=False)
    write_midi(res, path_name + '.mid', word2event)
    midi_synth.play_midi(path_name + '.mid')
    midi_synth.midi2audio(path_name + '.mid', path_name + '.mp3')


In [None]:
# zip output directory so you can easily download from file explorer
!zip -r outputs.zip outputs