In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
from tqdm import tqdm_notebook as tqdm
import os
import IPython.display as ipd

In [2]:
%run ../../Utilities.ipynb

Using TensorFlow backend.


## Indidual notes

In [3]:
note_table = pd.read_csv("../../../Data/Piano/Note_table.tsv", header=0, sep="\t")

In [4]:
start_8 = note_table[note_table["Note"] == "E1"].index[0]
start_7 = note_table[note_table["Note"] == "B1"].index[0]
start_6 = note_table[note_table["Note"] == "E2"].index[0]
start_5 = note_table[note_table["Note"] == "A2"].index[0]
start_4 = note_table[note_table["Note"] == "D3"].index[0]
start_3 = note_table[note_table["Note"] == "G3"].index[0]
start_2 = note_table[note_table["Note"] == "B3"].index[0]
start_1 = note_table[note_table["Note"] == "E4"].index[0]

In [5]:
strings = {
    "8": note_table["Note"].iloc[start_8:start_8+25].values,
    "7": note_table["Note"].iloc[start_7:start_7+25].values,
    "6": note_table["Note"].iloc[start_6:start_6+25].values,
    "5": note_table["Note"].iloc[start_5:start_5+25].values,
    "4": note_table["Note"].iloc[start_4:start_4+25].values,
    "3": note_table["Note"].iloc[start_3:start_3+25].values,
    "2": note_table["Note"].iloc[start_2:start_2+25].values,
    "1": note_table["Note"].iloc[start_1:start_1+25].values
}

In [6]:
def file_to_data(file, rs, samples, note_table):
    y, sr = librosa.load(file, mono=False)
    y = librosa.core.to_mono(y)
    y = librosa.resample(y, sr, rs)

    df = pd.DataFrame(columns=["Notes", "String", "x"])
    string = file.split("/")[-1].split("_")[2][-1]
    string_notes = strings[string]

    if int(string) <= 6:
        for i in range(0, len(y), 4000):
            x = y[i:i+4000]
            x = x[np.argwhere(x >= 0.001)[0][0]:]
            notes = np.ones(6) * 25
            notes[int(string)-1] = i//4000
            df = pd.concat([df, pd.DataFrame({"Notes": [notes.astype(int)], "String": [string], "x": [x]})])
        df = df.reset_index(drop=True)
    return df

In [7]:
test = file_to_data("../../../Samples/Guitar/Ind_notes/Guitar_noamp_string6_10.mp3", 8000, 500, note_table)

In [8]:
test.head()

Unnamed: 0,Notes,String,x
0,"[25, 25, 25, 25, 25, 0]",6,"[0.0057153893, 0.01848321, 0.020498423, 0.0222..."
1,"[25, 25, 25, 25, 25, 1]",6,"[0.06243897, 0.053786274, 0.04054899, 0.025041..."
2,"[25, 25, 25, 25, 25, 2]",6,"[0.0073347655, 0.013448851, 0.015583859, 0.015..."
3,"[25, 25, 25, 25, 25, 3]",6,"[0.0059004743, 0.014234349, 0.0087156575, 0.00..."
4,"[25, 25, 25, 25, 25, 4]",6,"[0.008460975, 0.057885136, 0.09217568, 0.10965..."


In [9]:
def dir_to_data(directory, rs, samples, note_table):
    data = pd.DataFrame()
    pbar = tqdm(os.listdir(directory))
    
    for file in pbar:
        pbar.set_description("Processing %s" % file)
        df = file_to_data(directory + file, rs, samples, note_table)
        data = data.append(df)
        
    data = data.reset_index().drop("index", axis=1)
    return data

In [10]:
data = dir_to_data("../../../Samples/Guitar/Ind_notes/", 8000, 500, note_table)

HBox(children=(IntProgress(value=0, max=287), HTML(value='')))




In [11]:
def to_spectral(x, samples):
    xf = np.zeros((x.shape[0] // samples, samples))
    for i in range(0, x.shape[0] - samples, samples):
        w = abs(np.fft.fft(x[i:i+samples], n=samples*2))
        freqs = np.fft.fftfreq(len(w))
        xf[i//samples, :] = w[freqs >= 0]
    return xf

In [20]:
data["x"] = data["x"].apply(lambda x: np.pad(x, (0, 4000 - x.shape[0]), mode="constant"))

In [21]:
data["xf"] = data["x"].apply(lambda x: to_spectral(x, 100))

In [24]:
data.sample(5)

Unnamed: 0,Notes,String,x,xf
1863,"[25, 25, 13, 25, 25, 25]",3,"[0.028514756, -0.025760684, -0.036091104, -0.0...","[[0.01133306697010994, 1.0188106298446655, 0.1..."
4778,"[25, 25, 25, 25, 25, 3]",6,"[0.0050685033, 0.013935001, 0.020956254, 0.024...","[[0.5353103280067444, 0.36878064274787903, 1.3..."
2402,"[25, 25, 2, 25, 25, 25]",3,"[0.0013028657, 0.0027251185, 0.00420907, 0.004...","[[0.0029150284826755524, 0.03945070132613182, ..."
3393,"[25, 25, 25, 18, 25, 25]",4,"[0.029294392, 0.004189732, -0.021477576, -0.04...","[[0.12324074655771255, 0.42871934175491333, 0...."
1090,"[25, 15, 25, 25, 25, 25]",2,"[0.0025315117, 0.0020110633, 0.00016255114, -0...","[[0.022191518917679787, 0.02076551876962185, 0..."


In [25]:
data.shape

(5375, 4)

In [26]:
data.to_pickle("../../../Data/Guitar/guitar-data-indnotes-500.pkl")

## 2-note chords

In [3]:
indnotes = pd.read_pickle("../../../Data/Guitar/guitar-data-indnotes-500.pkl")

In [8]:
data = pd.DataFrame()
indnotes = indnotes.sample(500)

for index, row in tqdm(indnotes.iterrows(), total=indnotes.shape[0]):
    data = pd.concat([data, pd.DataFrame({"Notes": [row["Notes"]], 
                                          "x": [np.pad(row["x"], (0, 4000 - row["x"].shape[0]), mode="constant")]})])
    temp = pd.DataFrame()
    for index2, row2 in indnotes.iterrows():
        fret1 = row["Notes"][int(row["String"]) - 1]
        fret2 = row2["Notes"][int(row2["String"]) - 1]
        if (row["String"] != row2["String"]) and (np.abs(fret1 - fret2) <= 7):
            notes = row["Notes"].copy()
            notes[int(row2["String"]) - 1] = row2["Notes"][int(row2["String"]) - 1]
            x1 = np.pad(row["x"], (0, 4000 - row["x"].shape[0]), mode="constant")
            x2 = np.pad(row2["x"], (0, 4000 - row2["x"].shape[0]), mode="constant")
            x = x1 + x2
            temp = pd.concat([temp, pd.DataFrame({"Notes": [notes], "x": [x]})])
    data = pd.concat([data, temp])
data = data.reset_index(drop=True)

HBox(children=(IntProgress(value=0, max=500), HTML(value='')))

In [9]:
data.shape

(106898, 2)

In [10]:
data.sample(5)

Unnamed: 0,Notes,x
79671,"[12, 6, 25, 25, 25, 25]","[0.17717709, 0.18088004, 0.1559871, 0.03366031..."
18739,"[21, 25, 25, 25, 25, 16]","[0.06774381, 0.117443554, 0.06604557, 0.006420..."
97316,"[25, 25, 10, 8, 25, 25]","[0.01890809, 0.0180666, 0.009000065, -0.010174..."
30638,"[25, 25, 14, 25, 21, 25]","[0.19842254, 0.20835544, 0.11902555, 0.0063857..."
48682,"[25, 25, 8, 25, 12, 25]","[0.020632287, 0.08305131, 0.1634053, 0.2121388..."


In [14]:
ipd.Audio(data.sample()["x"].values[0], rate=8000)

In [15]:
(data["x"].apply(lambda x: len(x))).mean()

4000.0

In [16]:
def to_spectral(x, samples):
    xf = np.zeros((x.shape[0] // samples, samples))
    for i in range(0, x.shape[0] - samples, samples):
        w = abs(np.fft.fft(x[i:i+samples], n=samples*2))
        freqs = np.fft.fftfreq(len(w))
        xf[i//samples, :] = w[freqs >= 0]
    return xf

In [17]:
from tqdm import tqdm as tqdm_regular
tqdm_regular.pandas()

In [18]:
data["xf"] = data["x"].progress_apply(lambda x: to_spectral(x, 100))

100%|████████████████████████████████| 106898/106898 [01:05<00:00, 1625.01it/s]


In [19]:
data = data.sample(25000)

In [20]:
data = data.drop("x", axis=1)

In [21]:
data.to_pickle("../../../Data/Guitar/chords_test.pkl")