# Overview
This is my attempt to solve the first assignment of the Signals and Systems course offered by Innopolis University

# Part 1
## Solution's idea
Given a basic function (signal) $f$, we would like to find_alpha the a given signal $s(t)$ using fuction $f$. Here is the approach I considered: 
1. First use the discrete Fourier Transform to extract the frequency components in the signal $s(t)$
2. Consider the $K$ frequencies corresponding to the top $K$ peaks in the spectrum: $fr_1, fr_2, .... fr_{500}$
3. consider the a number of timestamps $T$
4. Denoting the $f(fr, t_i)$ as the value of the signal $f$ given the frequency $fr$ at the timestamp $t_i$, we consider the matrix $F$: 

$\begin{align} 
\begin{bmatrix}
f(fr_1, t_1) & f(fr_2, t_1) & f(fr_3, t_1) ... & f(fr_k, t_1) \\
f(fr_1, t_2) & f(fr_2, t_2) & f(fr_3, t_2) ... & f(fr_k, t_2) \\
f(fr_1, t_3) & f(fr_2, t_3) & f(fr_3, t_3) ... & f(fr_k, t_3) \\
f(fr_1, t_4) & f(fr_2, t_4) & f(fr_3, t_4) ... & f(fr_k, t_4) \\
..  & .. &  .. & .. \\
f(fr_1, t_T) & f(fr_2, t_T) & f(fr_3, t_T) ... & f(fr_k, t_T)
\end{bmatrix}
\end{align}$
The main part of the approximation is to find the coefficients $\alpha_1, \alpha_2 ... \alpha_k$ minimizing the loss:
$
\begin{align}
\sum_ {j = 1}^{T} (\sum _ {i=1}^{k} \alpha_i \cdot f(fr_i, t_j) - s(t_j)) ^ 2
\end{align}
$ 
This combination is the least squares solution for the equation expressed in matrix form:
$\begin{align} 
F \cdot \alpha = T
\end{align}
$
where 
$\begin{aligned}
\alpha = \begin{bmatrix}
\alpha_1 \\ 
\alpha_2 \\ 
.. \\
.. \\
\alpha_k
\end{bmatrix}
\end{aligned}
$
and 
$\begin{aligned}
T = \begin{bmatrix}
s(t_1) \\
s(t_2) \\
.. \\
.. \\
s(t_k)
\end{bmatrix}
\end{aligned}$

In [32]:
from thinkdsp import Wave 
from thinkdsp import Signal
import numpy as np
from thinkdsp import read_wave
from copy import copy
import random


# let's set the random module seed for reproducibility
np.random.seed(69)
random.seed(69)

In [16]:
def filter_wave(wave: Wave):
    # the main point here is to remove any sequences (of length more than 5% of the total audio) 
    # for which each value does not exceed %5 of the maximum value
    
    # first define the threshold
    min_value = max(abs(wave.ys)) * 0.05
    min_removable_length = int(len(wave.ys) * 0.05)
    
    new_ys = copy(wave.ys)

    start, end = 0, 0
    for i, y in enumerate(wave.ys):
        if np.abs(y) > min_value: # which means this value should not be removed
            # first check if the length of the removable sequence is long enough
            if end - start >= min_removable_length:
                # set all the values in this range to 0
                new_ys[start: end] = 0
            # now time to update the start value
            start = i + 1
        else: # which means the value is too low and can be removed
            end = i + 1
    # check if there is a removable sequence at the end
    if end - start >= min_removable_length:
        new_ys[start:end] = 0
    
    new_ys = np.asarray([y for y in new_ys if y != 0])

    return Wave(ys=new_ys, framerate=wave.framerate)

In [17]:
a2 = read_wave('ayhem_2.wav')
letter_a = a2.segment(start=0, duration=1.2)
display(letter_a.make_audio())
letter_y = a2.segment(start=1.2, duration=0.9)
display(letter_y.make_audio())
letter_h = a2.segment(start=2.1, duration=0.7)
display(letter_h.make_audio())
letter_e = a2.segment(start=2.8, duration=0.7)
display(letter_e.make_audio())
letter_m = a2.segment(start=3.5)
display(letter_m.make_audio())

In [18]:
from random import sample
# implementation:
def find_alpha(wave: Wave, signal: Signal, k: 500, sample_portion=0.75, low_pass:int=None, high_pass:int=None):
    assert low_pass is None or high_pass is None
    
    # make the spectrum of the wave
    spec = wave.make_spectrum()
    
    # apply the filter if the corresponding paramter is passed
    if low_pass:
        spec.low_pass(low_pass)

    if high_pass:
        spec.high_pass(high_pass)

    # extract the top k frequencies, with their amplitudes
    _, freqs = list(map(list, zip(*spec.peaks()[:k])))

    # before proceeding with sampling timestamps
    # let's map each t to the corresponding value
    time_value_map = dict(zip(wave.ts, wave.ys))

    # take a random sample out of the timestamps
    sample_size = int(sample_portion * len(wave.ts)) 
    # extract the timestamps and sort them
    ts = sorted(sample( list(wave.ts.reshape(-1,) ), sample_size))
 
    # calculate T
    T = np.asarray([time_value_map[t] for t in ts]).reshape(-1, 1)

    # calculate F
    # create the empty array with the shape
    F = np.empty([k, sample_size]) 

    for index, f in enumerate(freqs):
        # set the frequency
        signal.freq = f
        # add the evaluation with the specific frequency
        F [index] = signal.evaluate(ts)
    # transpose F
    F = F.T

    print(F.shape)
    print(T.shape)

    # find alpha with linear regression    
    alpha = np.linalg.lstsq(F, T, rcond=None)[0]

    return freqs, alpha

In [19]:
a_f = filter_wave(letter_a)

In [20]:
# let's find_alpha the letter a with the sinus function
from thinkdsp import SinSignal

# initialize a sinus signal 
sin = SinSignal()

alpha, freqs = find_alpha(wave=a_f, signal=sin, k=1000, sample_portion=0.8, low_pass=3500)

(23310, 1000)
(23310, 1)


In [21]:
# let's construct the approximation using the sinus signal
from thinkdsp import SumSignal

approx_signal = SumSignal(*[SinSignal(freq=f, amp=a, offset=0) for f, a in zip(alpha, freqs)])
approx_wave = approx_signal.make_wave(duration=a_f.duration, start=a_f.start, framerate=a_f.framerate)

print(a_f.corr(approx_wave))

0.6852406328365499


In [22]:
display(a_f.make_audio())
display(approx_wave.make_audio())

In [29]:
def approximate_one_signal(wave: Wave, signal, n_trials:int=5, k:int=1500, sample_portion:float=0.75, low_pass:int=None, high_pass:int=None):
    # this function builds upon the find_coefficients function finding 'n_trials' sets of coefficients
    # and setting the final coefficients as the average of the intermediate ones
    
    results = [find_alpha(wave, signal,k=k, sample_portion=sample_portion, low_pass=low_pass, high_pass=high_pass) for _ in range(n_trials)]
    # extract the frequencies
    freqs = results[0][0]
    # extract the alpha values from the results
    alphas = np.asarray([r[1] for r in results if r[1] is not None]) # filter the None values if any
    # final coefficients as the mean
    alpha = np.mean(alphas, axis=0).reshape(-1,) 

    # time to create the resulting signal    
    approx_signal = SumSignal(*[SinSignal(freq=f, amp=a, offset=0) for f, a in zip(freqs, alpha)])
    approx_wave = approx_signal.make_wave(duration=wave.duration, start=wave.start, framerate=wave.framerate)

    # return the wave and correlation score with the original wave
    return approx_wave, wave.corr(approx_wave)

In [30]:
approx_wave, corr_score = approximate_one_signal(a_f, SinSignal(), low_pass=4000, n_trials=5)

(21853, 1500)
(21853, 1)
(21853, 1500)
(21853, 1)
(21853, 1500)
(21853, 1)
(21853, 1500)
(21853, 1)
(21853, 1500)
(21853, 1)


In [31]:
print(corr_score)
display(a_f.make_audio())
display(approx_wave.make_audio())

0.6881849919980103


In [37]:
# let's try to extend the functionality by retrieving different frequencies ranges
def find_alpha_2(wave: Wave, signal: Signal, k=1000,sample_portion=0.75, low_pass:int=None, high_pass:int=None):
    assert low_pass is None or high_pass is None
    
    # make the spectrum of the wave
    spec = wave.make_spectrum()
    
    # apply the filter if the corresponding paramter is passed
    if low_pass:
        spec.low_pass(low_pass)

    if high_pass:
        spec.high_pass(high_pass)

    # extract the k frequencies with the highest amplitudes
    _, freqs = list(map(list, zip(*spec.peaks()[:k])))

    # extract k / 2 frequencies in the middle range 
    mid_point = int(len(freqs) / 2)
    _, more_freqs = list(map(list, zip(*spec.peaks()[mid_point - int(k / 2): mid_point + int(k / 2)])))
    freqs.extend(more_freqs)

    # extract the k / 4 frequencies with the lowest amplitudes
    _, more_freqs = list(map(list, zip(*spec.peaks()[-int(k / 4):])))
    freqs.extend(more_freqs)
    # now 'freqs' variable contain all the different frequencies 
    
    # before proceeding with sampling timestamps
    # let's map each t to the corresponding value
    time_value_map = dict(zip(wave.ts, wave.ys))

    # take a random sample out of the timestamps
    sample_size = int(sample_portion * len(wave.ts)) 
    # extract and sort the timestamps
    ts = sorted(sample( list(wave.ts.reshape(-1,) ), sample_size))
 
    # calculate T
    T = np.asarray([time_value_map[t] for t in ts]).reshape(-1, 1)

    # calculate F
    # create the empty array with the shape
    F = np.empty([len(freqs), sample_size]) 

    for index, f in enumerate(freqs):
        # set the frequency
        signal.freq = f
        # add the evaluation with the specific frequency
        F [index] = signal.evaluate(ts)
    # transpose F
    F = F.T

    print(F.shape)
    print(T.shape)

    # find alpha with linear regression    
    alpha = np.linalg.lstsq(F, T, rcond=None)[0]

    return freqs, alpha

In [38]:
def approximate_one_signal_2(wave: Wave, signal, n_trials:int=5, k:int=1500, sample_portion:float=0.75, low_pass:int=None, high_pass:int=None):
    # this function builds upon the find_coefficients function finding 'n_trials' sets of coefficients
    # and setting the final coefficients as the average of the intermediate ones
    
    results = [find_alpha_2(wave, signal,k=k, sample_portion=sample_portion, low_pass=low_pass, high_pass=high_pass) for _ in range(n_trials)]
    # extract the frequencies
    freqs = results[0][0]
    # extract the alpha values from the results
    alphas = np.asarray([r[1] for r in results if r[1] is not None]) # filter the None values if any
    # final coefficients as the mean
    alpha = np.mean(alphas, axis=0).reshape(-1,) 

    # time to create the resulting signal    
    approx_signal = SumSignal(*[SinSignal(freq=f, amp=a, offset=0) for f, a in zip(freqs, alpha)])
    approx_wave = approx_signal.make_wave(duration=wave.duration, start=wave.start, framerate=wave.framerate)

    # return the wave and correlation score with the original wave
    return approx_wave, wave.corr(approx_wave)

In [39]:
approx_wave, corr_score = approximate_one_signal_2(a_f, SinSignal(), low_pass=4000, n_trials=5)

(21853, 3375)
(21853, 1)
(21853, 3375)
(21853, 1)
(21853, 3375)
(21853, 1)
(21853, 3375)
(21853, 1)
(21853, 3375)
(21853, 1)


In [40]:
print(corr_score)
display(a_f.make_audio())
display(approx_wave.make_audio())

0.6877622376017383


let's try to extend the solution to more than a single signal. 

In [None]:
import itertools

# let's try to extend the functionality by retrieving different frequencies ranges
def find_alpha_multiple(wave: Wave, signals: list[Signal], k=1000,sample_portion=0.75, low_pass:int=None, high_pass:int=None, peek_only=True):
    assert low_pass is None or high_pass is None
    
    # make the spectrum of the wave
    spec = wave.make_spectrum()
    
    # apply the filter if the corresponding paramter is passed
    if low_pass:
        spec.low_pass(low_pass)

    if high_pass:
        spec.high_pass(high_pass)

    # extract the k frequencies with the highest amplitudes
    _, freqs = list(map(list, zip(*spec.peaks()[:k])))

    # if the argument peek_only is set to True, then only the k frequencies with the highest amplitudes are considered
    # otherwise, the extended sampling is applied
    if not peek_only:
        # extract k / 2 frequencies in the middle range 
        mid_point = int(len(freqs) / 2)
        _, more_freqs = list(map(list, zip(*spec.peaks()[mid_point - int(k / 2): mid_point + int(k / 2)])))
        freqs.extend(more_freqs)

        # extract the k / 4 frequencies with the lowest amplitudes
        _, more_freqs = list(map(list, zip(*spec.peaks()[-int(k / 4):])))
        freqs.extend(more_freqs)
        # now 'freqs' variable contain all the different frequencies 

    
    # before proceeding with sampling timestamps
    # let's map each t to the corresponding value
    time_value_map = dict(zip(wave.ts, wave.ys))

    # take a random sample out of the timestamps
    sample_size = int(sample_portion * len(wave.ts)) 
    # extract and sort the timestamps
    ts = sorted(sample( list(wave.ts.reshape(-1,) ), sample_size))
 
    # calculate T
    T = np.asarray([time_value_map[t] for t in ts]).reshape(-1, 1)

    # calculate F
    # create the empty array with the shape
    F = np.empty([len(freqs) * len(signals), sample_size]) 

    for index, f in enumerate(freqs):
        # set the current frequency for each of the signals
        for s_index in range(len(signals)):
            signals[s_index].freq = f 
        # set the index-th row             
        F[index] = np.asarray(list(itertools.chain(*[list(s.evaluate(ts)) for s in signals])))

    # transpose F
    F = F.T

    print(F.shape)
    print(T.shape)

    # find alpha with linear regression    
    alpha = np.linalg.lstsq(F, T, rcond=None)[0]

    return freqs, alpha

In [None]:
def approximate_multiple_signals(wave: Wave, signals, n_trials:int=3, k:int=1500, sample_portion:float=0.75, low_pass:int=None, high_pass:int=None):
    # this function builds upon the find_coefficients function finding 'n_trials' sets of coefficients
    # and setting the final coefficients as the average of the intermediate ones
    
    results = [find_alpha_2(wave, signals,k=k, sample_portion=sample_portion, low_pass=low_pass, high_pass=high_pass) for _ in range(n_trials)]
    # extract the frequencies
    freqs = results[0][0]
    # extract the alpha values from the results
    alphas = np.asarray([r[1] for r in results if r[1] is not None]) # filter the None values if any
    # final coefficients as the mean
    alpha = np.mean(alphas, axis=0).reshape(-1,) 

    # time to create the resulting signal    
    approx_signal = SumSignal(*[SinSignal(freq=f, amp=a, offset=0) for f, a in zip(freqs, alpha)])
    approx_wave = approx_signal.make_wave(duration=wave.duration, start=wave.start, framerate=wave.framerate)

    # return the wave and correlation score with the original wave
    return approx_wave, wave.corr(approx_wave)

In [None]:
# let's see how combining signals could improve the synthesized sound
from thinkdsp import CosSignal, SquareSignal

approx_wave, wave_corr = approximate_multiple_signals(a_f, [SinSignal(), CosSignal(), SquareSignal()], n_trials=2, low_pass=4000, )