# Mr.Kov
Based on https://swiki.hfbk-hamburg.de/MusicTechnology/773

In [1]:
import numpy as np
import librosa
import soundfile
from typing import List
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

In [20]:
# basic parameters
#path = 'datasets/audio/yodel.aif'
path = 'datasets/audio/afguide.wav'

n_fft = 1024

In [21]:
# data analysis

data, sr = librosa.load(path, sr=48000, mono=True)
spec = librosa.feature.melspectrogram(data, n_fft=n_fft, sr=sr, fmax=8000)
data_spec_db = librosa.amplitude_to_db(spec)

pca_1 = PCA(n_components=1)
tsne_1 = TSNE(n_components=1, random_state=42)
data_spec_1 = pca_1.fit_transform(data_spec_db.T)
data_tsne_1 = tsne_1.fit_transform(data_spec_db.T)

order_pca = np.argsort(data_spec_1[:,0])
order_tsne = np.argsort(data_tsne_1[:,0])


fft = librosa.stft(data, n_fft=n_fft)

In [22]:
def random_jump(max_distance: float) -> int:
    random_distance = np.random.uniform((-1)*max_distance, max_distance)
    return int(np.round(random_distance))
        
def find_similar_frame_index(order, frame_index: int, max_distance: float) -> int:
    last_frame_index = len(order)-1
    frame_index = frame_index % last_frame_index
    selection = np.where(order == frame_index)
    i = selection[0][0] + random_jump(max_distance)
    i = np.clip(i, 0, last_frame_index)
    return order[i]

def generate_frame_indices(num_frames: int, start_frame: int, max_distance: float, order) -> List[int]:
    frames = []
    cur_pos = start_frame
    for _ in range(num_frames):
        frames.append(cur_pos)
        cur_pos = find_similar_frame_index(order, cur_pos + 1, max_distance)
    return frames
    
def mrkov(num_frames: int, start_frame: int, max_distance: float, order) -> np.ndarray:
    reordered_indices = generate_frame_indices(num_frames, start_frame, max_distance, order)
    fft_reordered = fft[:, reordered_indices]
    return librosa.istft(fft_reordered)


In [25]:
data = mrkov(10000, 0, 1, order_pca)
soundfile.write(file="datasets/audio/mrkov_af_test.wav", data=data, samplerate=sr)

data = mrkov(10000, 0, 1, order_tsne)
soundfile.write(file="datasets/audio/mrkov_af_test_tsne.wav", data=data, samplerate=sr)

data = mrkov(10000, 0, 0.6, order_tsne)
soundfile.write(file="datasets/audio/mrkov_af_test_tsne2.wav", data=data, samplerate=sr)

# Exploring the functions

In [515]:
random_jump(4.2)

3

In [516]:
find_similar_frame(56, 0)

56

In [517]:
find_similar_frame(56, 1.4)

56

In [615]:
zz = np.array([1, 2, 3, 4])
x = np.where(zz == 9)
x

(array([], dtype=int64),)

In [612]:
mkov_next = mrkov_next_frame_function(0, 1.3)

In [519]:
mkov_next()

1

In [520]:
mkov_next()

307

In [521]:
mkov_next = mrkov_next_frame_function(0, 0)

In [522]:
mkov_next()

1

In [523]:
mkov_next()

2

In [None]:
plot.xlim

In [524]:
mrkov_generate_frame_indices(10, 0, 0)

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [525]:
mrkov_generate_frame_indices(10, 0, 0.7)

[336, 10, 236, 237, 238, 239, 1138, 1523, 1524]

In [526]:
mrkov(10, 0, 1.3)

array([0.11133909, 0.12150038, 0.10952761, ..., 0.04896518, 0.04674239,
       0.04102705], dtype=float32)

In [527]:
mrkov_generate(1000, 0, 0.55)

array([-0.10413684, -0.09765087, -0.08832423, ...,  0.01890055,
        0.01941993,  0.03032707], dtype=float32)

In [530]:
data = mrkov_generate(1000, 0, 0.55)

In [531]:
data = mrkov_generate(1000, 0, 0.55)
soundfile.write(file="datasets/audio/mrkov_test.wav", data=data, samplerate=sr)