In [12]:
from processing.preprocess import save_spectrogram
from dotenv import load_dotenv
import os
import warnings
import torch
import numpy as np
from pytube import YouTube
from moviepy.editor import *

sys.path.insert(0, './model')
sys.path.insert(0, './processing')
from model import OnsetsBaseline
from pred_to_midi import folder_to_midi
warnings.filterwarnings('ignore')

class Prediction():
    # init
    def __init__(self, model, output_folder, device = 'cuda'):
        # Load in the model
        self.model = model
        self.device = device
        
        # Set up directories
        self.output_folder = output_folder

    def prepare(self, input_file, song_name):
        '''
        Generates the 20s spectrograms of the given input .wav file,
        and outputs it in the output_folder in:
            output_folder/output_name/raw/
        '''
        output_dir = os.path.join(self.output_folder, song_name)
        save_spectrogram(input_file, output_dir, "raw____") # extra characters are a workaround for a poorly implemented save_spectrogram function

    def run_model(self, model_path, model_type, song_name):
        '''
        Runs the model on every numpy file in the folder
            
            output_folder/song_name/raw/

        and outputs the predictions in

            output_folder/song_name/model_name

        model_type can be:
            "onsets"
            "frames"
            "offsets"
            "velocities"
        '''
        # Create the folder if not already created
        output_dir = os.path.join(self.output_folder, song_name, model_type)
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)

        # Load in the model
        self.model.load_state_dict(torch.load(model_path, map_location=torch.device(self.device)), strict=False)

        # Load in the dataset
        raw_path = os.path.join(self.output_folder, song_name, "raw")
        spectrogram_list = os.listdir(raw_path)
        spectrogram_list = sorted(spectrogram_list, key=lambda x: float(x.split('_')[1].split('.')[0]))

        # Iterate through everything in the loader
        for spec in spectrogram_list:

            # Load data and turn into right shape
            data = np.load(os.path.join(raw_path, spec))
            data = torch.from_numpy(data)
            data = data.unsqueeze(0)
            data = data.transpose(1,2)

            # Prediction
            out = model(data, None)

            # Squeeze and transpose prediction
            out = out.squeeze(0)
            out = out.transpose(0,1)
            out = out.detach().numpy()

            # Turn logits into 1s and 0s
            for i in range(len(out)):
                for j in range(len(out[i])):
                    if out[i][j] > 0:
                        out[i][j] = 1
                    else:
                        out[i][j] = 0

            # Save the output
            np.save(os.path.join(output_dir, spec), out)

    def wav_to_wav(self, model_path, model_type, input_file, song_name):
        self.prepare(input_file, song_name)

        self.run_model(model_path, model_type, song_name)
        
        frames_dir = os.path.join(self.output_folder, song_name, "frames")
        velocities_dir = os.path.join(self.output_folder, song_name, "velocities")

        if not os.path.isdir(frames_dir):
            print("Need Frames to Generate MIDI")
            return
        
        frames_folder = os.path.join(self.output_folder, song_name, "frames")
        if os.path.isdir(velocities_dir):
            print("Synthesizing with velocity...")
            velocity_folder = os.path.join(self.output_folder, song_name, "velocities")
            folder_to_midi(frames_folder, velocity_folder, os.path.join(self.output_folder,song_name,song_name+"_synthesized.midi"))
        
        else:
            print("Synthesizing without velocity...")
            folder_to_midi(frames_folder, None, os.path.join(self.output_folder,song_name,song_name+"_synthesized.midi"))

        print("Complete")

    def pred_youtube_to_wav(self, youtube_url, song_name, model_path, model_type):
        '''
        Downloads the youtube video and converts it to a .wav file.
        Then makes a prediction on the .wav file.

        Credit for Downloading Code: https://stackoverflow.com/questions/27473526/download-only-audio-from-youtube-video-using-youtube-dl-in-python-script
        ''' 

        # Download the video as .wav and save it
        yt = YouTube(youtube_url)
        video = yt.streams.filter(only_audio=True).first()
        out_file = video.download(output_path=".")

        # Make directory if doesn't exist
        output_dir = os.path.join(self.output_folder, song_name)
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)

        new_file = os.path.join(output_dir, song_name+".wav")
        os.rename(out_file, new_file)

        # Make predictions
        self.wav_to_wav(model_path, model_type, new_file, song_name)
    
    def pred_mp4_to_wav(self, input_mp4, song_name, model_path, model_type, start="00:00:00", end=-1):
        '''
        Converts a mp4 file to a wav file.
        Then makes a prediction on the .wav file

        Defaults to starting at t='00:00:00' and ending at t=MAX
        where time is in 'hh:mm:ss' format
        '''

        # # Convert mp4 to wav
        wav_file = os.path.join(self.output_folder, song_name, song_name+".wav")

        videoclip = VideoFileClip(input_mp4)
        
        if end == -1:
            end = videoclip.duration

        videoclip = videoclip.subclip(start, end)
        audioclip = videoclip.audio
        audioclip.write_audiofile(wav_file)
        # Trim the audioclip


        audioclip.close()
        videoclip.close()

        print(wav_file)
        # return
        self.wav_to_wav(model_path, model_type, wav_file, song_name)

In [11]:
test_path = os.environ.get('testname')
maestro_path = os.environ.get('pathname')

In [5]:
# Setup the predictor class

model = OnsetsBaseline(229, 88)
output_folder = test_path

chopin = Prediction(model, output_folder, 'cpu')

In [None]:
# Predict a wav file

model_path = "frames_baseline.pt"
model_type = "frames"
input_file = "test.wav"
song_name = "test"

chopin.wav_to_wav(model_path, model_type, input_file, song_name)

In [None]:
# Predict a youtube video

model_path = "frames_baseline.pt"
model_type = "frames"

chopin.pred_youtube_to_wav("https://www.youtube.com/watch?v=RmK_V1FHH-o", "rr", model_path, model_type)

In [None]:
# Predict a mp4 file that starts at 00:00:21

model_path = "frames_baseline.pt"
model_type = "frames"

chopin.pred_mp4_to_wav("test.mp4", "simple", model_path, model_type, start='00:00:21')