# Mr.Kov - Python version

In [6]:
from typing import Optional

import sklearn
import numpy as np
import librosa
import soundfile
from sklearn.decomposition import PCA


class MrKov:
    def __init__(
        self,
        max_jump: int = 1,
        sample_rate: int = 48000,
        dim_reduction = None,
        hop_length: Optional[int] = None,
        win_length: Optional[int] = None,
        n_fft: Optional[int] = None
    ):
        """
        Randomly replaces a grain with a similar grain based on PCA analysis.
        Based on `Mr. Kov <https://swiki.hfbk-hamburg.de/MusicTechnology/773>`_
        paper in SuperCollider.
        
        :param max_jump: range of considered grains when looking for similar grain.
            A value of 0 will reproduce the original sequence
        :param sample_rate: Sample rate to use for analysis - defaults to 48 kHz
        :param dim_reduction: dimensionality reduction algorithm of frequencies to 1 dimension
            to determine the grains that sound similar. Must implement a sklearn interface.
            If not defined PCA will be used.
            It is worth checking out sklearn.manifold.TSNE(n_components=1)
        :param hop_lenght: Hop length of FFT - if not defined half of the given
            samplerate is used
        :param win_length: Window length of FFT - if not defined samplerate is used
        :param n_fft: number of fft bins to use - if not defined samplerate is used
        
        """
        self.max_jump: int = max_jump
        self.dim_reduction: sklearn.base.TransformerMixin = dim_reduction if dim_reduction else PCA(n_components=1)
        self.hop_length = hop_length if hop_length else int(sample_rate/2)
        self.win_length = win_length if win_length else sample_rate
        self.sample_rate = sample_rate
        self.n_fft = n_fft if n_fft else self.win_length
    
    def run(self, source_file_path: str, target_file_path: str) -> None:
        """
        Runs the Mr.Kov algorithm.
        
        :param source_file_path: Path of source file
        :param target_file_path: Path of target file
        """
        data, _ = librosa.load(source_file_path, sr=self.sample_rate, mono=True)
        spec = self._calc_spec(data)
        reordered_spec = self._reorder_spec(spec)
        markov_spec_order = self._run_markov(reordered_spec)
        self._write_reordered_fft(data, markov_spec_order, target_file_path)
    
    def _write_reordered_fft(self, data: np.ndarray, fft_order: np.ndarray, file_path: str) -> None:
        """
        writes a re-aranged fft to a file
        """
        fft = librosa.stft(data, n_fft=self.n_fft, hop_length=self.hop_length, win_length=self.win_length)
        sig = librosa.istft(fft[:, fft_order], hop_length=self.hop_length, win_length=self.win_length)
        soundfile.write(file=file_path, data=sig, samplerate=self.sample_rate)
        return
    
    def _run_markov(self, reordered_spec: np.ndarray) -> np.ndarray:
        """
        Tries to re-arange the similar grain spectogram but introduces
        jitter via max_jump which can select any of the nearest max_jump 
        grains.
        """
        return reordered_spec[
            # use clip to not exceed the array when
            # adding jitter on the indices offset
            np.clip(
                np.argsort(reordered_spec) + np.random.randint(
                    low=(-1)*self.max_jump,
                    high=self.max_jump+1,
                    size=reordered_spec.shape
                ),
                a_min=0,
                a_max=len(reordered_spec)-1,
            )
        ]
    
    def _reorder_spec(self, spec: np.ndarray) -> np.ndarray:
        """
        Tries to re-order the grains by similarity using a dim reduction
        algorithm which reduces the frequencies to a single dimension
        """
        spec_1dim = self.dim_reduction.fit_transform(spec.T)
        ordered_spec = np.argsort(spec_1dim[:, 0])
        return ordered_spec
    
    def _calc_spec(self, X: np.ndarray, convert_to_db: bool = True) -> np.ndarray:
        """
        Calculates the spectogram of a given signal
        """
        spec = librosa.feature.melspectrogram(
            X,
            sr=self.sample_rate,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=self.win_length,
        )
        if convert_to_db:
            spec = librosa.amplitude_to_db(spec)
        return spec

In [2]:
MrKov(sample_rate=44100, max_jump=2).run('../datasets/specto_cluster/expo.flac', 'frac.flac',)

In [9]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=1)

MrKov(sample_rate=44100, hop_length=4000, dim_reduction=tsne, max_jump=15).run('../datasets/specto_cluster/beethoven.flac', 'beethoven_frac.flac',)