# GAN + Music

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Sound Spleeter

https://github.com/deezer/spleeter

Разделение звуковой дорожки на составляющие

**Установка**

In [3]:
!apt install ffmpeg
!pip install spleeter
from IPython.display import Audio

**Загружаем аудио**

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
Audio('rapchik.mp3')

**Использование**

Опции:
- по умолчанию делит трек на два - вокал / остальное
- с флагом `-p spleeter:4stems` - вокал / бас / ударные / остальное
- с флагом `-p spleeter:5stems` - вокал / бас / ударные / клавиши / остальное

Подробную инструкцию можно глянуть, используя эту команду

In [None]:
!spleeter separate -h 

In [None]:
!spleeter separate -i rapchik.mp3 -o output/ -p spleeter:5stems

In [None]:
!for i in output/rapchik/*.wav; do ffmpeg -i "$i" -acodec libmp3lame "${i%%.*}.mp3"; done

In [None]:
Audio('output/rapchik/vocals.mp3')

In [None]:
Audio('output/rapchik/drums.mp3')

In [None]:
Audio('output/rapchik/bass.mp3')

In [None]:
Audio('output/rapchik/piano.mp3')

In [None]:
Audio('output/rapchik/other.mp3')

**Сохраняем результат на Диск**

In [None]:
!mkdir -p "/content/drive/My Drive/stylegan/sound/"
!cp -r /content/output/rapchik /content/drive/My\ Drive/stylegan/sound/
!cp /content/rapchik.mp3 /content/drive/My\ Drive/stylegan/sound/rapchik/

## GAN + Music

In [None]:
%tensorflow_version 1.x
import tensorflow as tf

# Download the code
!git clone https://github.com/NVlabs/stylegan2.git
%cd stylegan2
!nvcc test_nvcc.cu -o test_nvcc -run

print('Tensorflow version: {}'.format(tf.__version__) )
!nvidia-smi -L
print('GPU Identified at: {}'.format(tf.test.gpu_device_name()))
!mkdir data
%cd data
!wget https://rolux.org/media/stylegan2/vectors/mouth_ratio.npy
!wget https://rolux.org/media/stylegan2/vectors/mouth_open.npy
%cd ..

In [6]:
!cp /content/drive/My\ Drive/stylegan/sound/rapchik/* /content/stylegan2/data/.

In [None]:
#@title Параметры генерации / Generation settings
import os
import numpy as np
from scipy.interpolate import interp1d
from scipy.io import wavfile
import matplotlib.pyplot as plt
import PIL.Image
import moviepy.editor

import dnnlib
import dnnlib.tflib as tflib
import pretrained_networks

audio = {}
#@markdown FPS конечного видео/ Final FPS
fps = 60 #@param {type: "number"}

# https://www.google.com/search?q=death+grips+black+google+download
file_type = ".mp3" #@param [".wav", ".mp3"] {allow-input: true}
for mp3_filename in [f for f in os.listdir('data') if f.endswith(file_type)]:
    mp3_filename = f'data/{mp3_filename}'
    wav_filename = mp3_filename[:-4] + '.wav'
    if not os.path.exists(wav_filename):
        audio_clip = moviepy.editor.AudioFileClip(mp3_filename)
        audio_clip.write_audiofile(wav_filename, fps=44100, nbytes=2, codec='pcm_s16le')
    print(wav_filename)
    track_name = os.path.basename(wav_filename)[:-4]
    print(track_name)
    rate, signal = wavfile.read(wav_filename)
    signal = np.mean(signal, axis=1) # to mono
    signal = np.abs(signal)
    seed = signal.shape[0]
    duration = signal.shape[0] / rate
    frames = int(np.ceil(duration * fps))
    samples_per_frame = signal.shape[0] / frames
    audio[track_name] = np.zeros(frames, dtype=signal.dtype)
    for frame in range(frames):
        start = int(round(frame * samples_per_frame))
        stop = int(round((frame + 1) * samples_per_frame))
        audio[track_name][frame] = np.mean(signal[start:stop], axis=0)
    audio[track_name] /= max(audio[track_name])

for track in sorted(audio.keys()):
    plt.figure(figsize=(8, 3))
    plt.title(track)
    plt.plot(audio[track])
    plt.savefig(f'data/{track}.png')

#@markdown Выбор модели / Change model
network_pkl = 'gdrive:networks/stylegan2-ffhq-config-f.pkl' #@param ["gdrive:networks/stylegan2-horse-config-f.pkl", "gdrive:networks/stylegan2-ffhq-config-f.pkl", "gdrive:networks/stylegan2-church-config-f.pkl","gdrive:networks/stylegan2-cat-config-f.pkl","gdrive:networks/stylegan2-car-config-f.pkl"] {allow-input: true}
_G, _D, Gs = pretrained_networks.load_networks(network_pkl)

Gs_kwargs = dnnlib.EasyDict()
Gs_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
Gs_kwargs.randomize_noise = False
Gs_syn_kwargs = dnnlib.EasyDict()
Gs_syn_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
Gs_syn_kwargs.randomize_noise = False
Gs_syn_kwargs.minibatch_size = 4
noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')]
w_avg = Gs.get_var('dlatent_avg')

def get_ws(n, frames, seed):
    filename = f'data/ws_{n}_{frames}_{seed}.npy'
    if not os.path.exists(filename):
        src_ws = np.random.RandomState(seed).randn(n, 512)
        ws = np.empty((frames, 512))
        for i in range(512):
            # FIXME: retarded
            x = np.linspace(0, 3*frames, 3*len(src_ws), endpoint=False)
            y = np.tile(src_ws[:, i], 3)
            x_ = np.linspace(0, 3*frames, 3*frames, endpoint=False)
            y_ = interp1d(x, y, kind='quadratic', fill_value='extrapolate')(x_)
            ws[:, i] = y_[frames:2*frames]
        np.save(filename, ws)
    else:
        ws = np.load(filename)
    return ws

def mix_styles(wa, wb, ivs):
    w = np.copy(wa)
    for i, v in ivs:
        w[i] = wa[i] * (1 - v) + wb[i] * v
    return w

def normalize_vector(v):
    return v * np.std(w_avg) / np.std(v) + np.mean(w_avg) - np.mean(v)

def render_frame(t):
    global base_index
    frame = np.clip(np.int(np.round(t * fps)), 0, frames - 1)
    Base_index_track = "other" #@param ["other", "drums", "bass","vocals","piano"] {allow-input: true}
    base_index += base_speed * audio[Base_index_track][frame]**2 
    base_w = base_ws[int(round(base_index)) % len(base_ws)]
    base_w = np.tile(base_w, (18, 1))
    psi_audio = "bass" #@param ["other", "drums", "bass","vocals","piano"] {allow-input: true}
    psi = 0.5 + audio[psi_audio][frame] / 2
    base_w = w_avg + (base_w - w_avg) * psi
    mix_w = np.tile(mix_ws[frame], (18, 1))
    mix_w = w_avg + (mix_w - w_avg) * 0.75
    ranges = [range(0, 4), range(4, 8), range(8, 18)]
    values1 = "other" #@param ["other", "drums", "bass","vocals","piano"] {allow-input: true}
    values2 = "bass" #@param ["other", "drums", "bass","vocals","piano"] {allow-input: true}
    values3 = "drums" #@param ["other", "drums", "bass","vocals","piano"] {allow-input: true}
    values = [audio[track][frame] for track in [values1, values2,values3]]
    w = mix_styles(base_w, mix_w, zip(ranges, values))
    mouth_open_input = 'vocals' #@param ["other", "drums", "bass","vocals","piano"] {allow-input: true}
    w += mouth_open * audio[mouth_open_input][frame] * 1.5
    image = Gs.components.synthesis.run(np.stack([w]), **Gs_syn_kwargs)[0]
    image = PIL.Image.fromarray(image).resize((size, size), PIL.Image.LANCZOS)
    return np.array(image)


rate, signal = wavfile.read('/content/stylegan2/data/vocals.wav')
signal = np.mean(signal, axis=1) # to mono
signal = np.abs(signal)
seed = signal.shape[0]
duration = signal.shape[0] / rate
frames = int(np.ceil(duration * fps))
samples_per_frame = signal.shape[0] / frames


size = 1024 #@param {type: "number"}
seconds = int(np.ceil(duration))
resolution = 10 #@param {type: "slider", min: 1, max: 20}
base_frames = resolution * frames
base_ws = get_ws(seconds, base_frames, seed)
print(audio)
base_speed = base_frames / sum(audio['other']**2)
base_index = 0
mix_ws = get_ws(seconds, frames, seed + 1)
# https://rolux.org/media/stylegan2/vectors/mouth_ratio.npy
mouth_open = normalize_vector(-np.load('data/mouth_ratio.npy'))


video_clip = moviepy.editor.VideoClip(render_frame, duration=duration)

audio_clip_input = "/content/stylegan2/data/rapchik.mp3" #@param {type: "string"}

mp4_filename = '/content/rapchik-video.mp4' #@param {type: "string"}

from google.colab import files


audio_clip = moviepy.editor.AudioFileClip(audio_clip_input)
video_clip = video_clip.set_audio(audio_clip)
video_clip.write_videofile(mp4_filename, fps=fps, codec='libx264', audio_codec='aac', bitrate='8M')
# files.download(mp4_filename)

In [8]:
cp /content/rapchik-video.mp4 /content/drive/My\ Drive/stylegan/results/.