-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c0b0c27
commit 6a19f2f
Showing
5 changed files
with
230 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -102,3 +102,6 @@ venv.bak/ | |
|
||
# mypy | ||
.mypy_cache/ | ||
|
||
# JetBrains | ||
.idea |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import os | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
from pydub import AudioSegment | ||
import argparse | ||
|
||
|
||
def fft(x, fs): | ||
nfft = len(x) | ||
df = fs / nfft | ||
f = np.arange(0, fs - df, fs / nfft) | ||
X = np.fft.fft(x) | ||
X_mag = 20 * np.log10(np.abs(X)) | ||
return f[0:int(np.ceil(nfft/2))], X_mag[0:int(np.ceil(nfft/2))] | ||
|
||
|
||
def normalize(x): | ||
dtype = x.dtype | ||
x = x.astype('float32') | ||
if dtype == 'int32': | ||
x /= 2.0 ** 31 | ||
if dtype == 'int16': | ||
x /= 2.0 ** 15 | ||
elif dtype == 'uint8': | ||
x /= 2.0 ** 8 | ||
x *= 2.0 | ||
x -= 1.0 | ||
return x | ||
|
||
|
||
def splice(x, fs, test_length, silence_length, n_speakers): | ||
# TODO: Use pydub | ||
# Splice data into multiple record + silence parts | ||
# There is silence in the beginning | ||
dtype = x.dtype | ||
sweeps = [] | ||
for i in range(n_speakers): | ||
start = np.round(fs * (silence_length * (i + 1) + test_length * i)).astype('int') | ||
end = np.round(fs * (silence_length * (i + 2) + test_length * (i + 1))).astype('int') | ||
sweeps.append(x[start:end, 0]) # Left mic | ||
sweeps.append(x[start:end, 1]) # Right mic | ||
|
||
# Zero pad to longest sweep | ||
n = max([len(x) for x in sweeps]) | ||
sweeps = np.vstack([np.concatenate((sweep, np.zeros(n - len(sweep)))) for sweep in sweeps]) | ||
|
||
return sweeps.astype(dtype) | ||
|
||
|
||
def xt(x, fs): | ||
return np.arange(0, len(x) / fs, 1 / fs) | ||
|
||
|
||
def plot_sweep(x, fs, name=None): | ||
plt.plot(xt(x, fs), x) | ||
plt.xlim([0.0, len(x) / fs]) | ||
plt.ylim([-1.0, 1.0]) | ||
plt.ylabel('Amplitude') | ||
if name is not None: | ||
plt.legend([name]) | ||
|
||
|
||
def split(recording, test_signal, n_speakers=1, silence_length=2.0): | ||
"""Splits sine sweep recording into individual speaker-ear pairs | ||
Signal looks something like this: | ||
--/\/\/\----/\/\/\-- | ||
---/\/\/\--/\/\/\--- | ||
There are two tracks, one for each ear. Dashes represent silence and sawtooths recorded signal. First saw tooths | ||
of both tracks are signal played on left front speaker and the second ones are signal played of right front speaker. | ||
There can be any (even) number of tracks. Output will be two tracks per speaker, left ear first and then right. | ||
Speakers are in the same order as in the original file, read from left to right and top to bottom. In the example | ||
above the output track order would be: | ||
1. Front left speaker - left ear | ||
2. Front left speaker - right ear | ||
3. Front right speaker - left ear | ||
4. Front right speaker - right ear | ||
Args: | ||
recording: AudioSegment for the sine sweep recording | ||
test_signal: AudioSegement for the test signal | ||
n_speakers: Number of speakers per track | ||
silence_length: Length of silence in the beginning, end and between test signals | ||
Returns: | ||
""" | ||
# TODO: Detect n_speakers from the recording | ||
|
||
columns = [] | ||
ms = silence_length*1000 + len(test_signal) | ||
remainder = recording[silence_length*1000:] | ||
for _ in range(n_speakers): | ||
columns.append(remainder[:ms].split_to_mono()) # Add list of mono channel AudioSegments | ||
remainder = remainder[ms:] # Cut to the beginning of next signal | ||
|
||
tracks = [] | ||
i = 0 | ||
while i < recording.channels: | ||
for column in columns: | ||
tracks.append(column[i]) # Left ear of current speaker | ||
tracks.append(column[i+1]) # Right ear of current speaker | ||
i += 2 | ||
|
||
return tracks | ||
|
||
|
||
# TODO: filtfilt | ||
# TODO: Convolution | ||
# TODO: Method for cropping impulse responses after convolution | ||
|
||
|
||
def main(): | ||
arg_parser = argparse.ArgumentParser() | ||
arg_parser.add_argument('--recording', type=str, required=True, help='File path to sine sweep recording.') | ||
arg_parser.add_argument('--test', type=str, required=True, help='File path to sine sweep test signal.') | ||
arg_parser.add_argument('--n_speakers', type=int, required=True, help='Number of speakers in each track.') | ||
arg_parser.add_argument('--silence_length', type=float, required=True, | ||
help='Length of silence in the beginning, end and between recordings.') | ||
cli_args = arg_parser.parse_args() | ||
|
||
# Read files | ||
rec = AudioSegment.from_wav(cli_args.recording) | ||
test = AudioSegment.from_wav(cli_args.test) | ||
|
||
print(len(rec)) | ||
|
||
# Make sure sampling rates match | ||
if rec.frame_rate != test.frame_rate: | ||
raise ValueError('Sampling frequencies of test tone and recording must match!') | ||
|
||
# Split recording WAV file into individual mono tracks | ||
tracks = split(rec, test, n_speakers=cli_args.n_speakers, silence_length=cli_args.silence_length) | ||
|
||
for tr in tracks: | ||
print(len(tr)) | ||
|
||
data = np.vstack([tr.get_array_of_samples() for tr in tracks]) | ||
|
||
for i in range(4): | ||
plt.subplot(4, 1, i + 1) | ||
plot_sweep( | ||
normalize(data[i, :]), | ||
rec.frame_rate, | ||
name='Speaker {s:d} - {mic}'.format(s=int(i/2+1), mic='right' if i % 2 else 'left') | ||
) | ||
plt.xlabel('Time (s)') | ||
plt.show() | ||
|
||
if not os.path.isdir('out'): | ||
os.makedirs('out', exist_ok=True) | ||
|
||
AudioSegment.from_mono_audiosegments(*tracks).export('out/sweeps.wav', format='wav') | ||
AudioSegment.from_mono_audiosegments(*tracks).export('out/tests.wav', format='wav') | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
matplotlib | ||
scipy | ||
pydub | ||
simpleaudio |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import numpy as np | ||
from scipy.io import wavfile | ||
import matplotlib.pyplot as plt | ||
import matplotlib.ticker as ticker | ||
|
||
from impulcifer import fft | ||
|
||
|
||
def main(): | ||
Fs, data = wavfile.read('data/hp_sweep_stereo.wav') | ||
data = np.array(data) | ||
print(Fs) | ||
print(data.shape) | ||
print(data.dtype) | ||
|
||
t = np.arange(0, len(data) / Fs, 1 / Fs) | ||
dtype = data.dtype | ||
data = data.astype('float32') | ||
if dtype == 'int32': | ||
data /= 2.0**31 | ||
if dtype == 'int16': | ||
data /= 2.0**15 | ||
elif dtype == 'uint8': | ||
data /= 2.0**8 | ||
data *= 2.0 | ||
data -= 1.0 | ||
|
||
ax = plt.subplot(2, 2, 1) | ||
plt.plot(t, data[:, 0]) | ||
plt.xlabel('Time (s)') | ||
plt.ylabel('Amplitude') | ||
|
||
ax = plt.subplot(2, 2, 3) | ||
plt.plot(t, data[:, 1]) | ||
plt.xlabel('Time (s)') | ||
plt.ylabel('Amplitude') | ||
|
||
ax = plt.subplot(2, 2, (2, 4)) | ||
f, X_mag_l = fft(data[:, 0], Fs) | ||
f, X_mag_r = fft(data[:, 1], Fs) | ||
X_mag_l = X_mag_l[f <= 20000] | ||
X_mag_r = X_mag_r[f <= 20000] | ||
f = f[f <= 20000] | ||
X_mag_l -= X_mag_l[np.argmin(np.abs(f - 1000.0))] | ||
X_mag_r -= X_mag_r[np.argmin(np.abs(f - 1000.0))] | ||
plt.plot(f, X_mag_l, f, X_mag_r) | ||
plt.semilogx() | ||
plt.xlim([20, 20000]) | ||
plt.grid(True, which='major') | ||
plt.grid(True, which='minor') | ||
plt.xlabel('Frequency (Hz)') | ||
plt.ylabel('Amplitude (dBr)') | ||
plt.legend(['Left', 'Right']) | ||
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.0f}')) | ||
|
||
plt.show() | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |