In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Conv1d, BatchNorm1d, ReLU, Linear
import numpy as np
import pandas as pd
import ChordClassifier as cc
import importlib as lib

lib.reload(cc)
settings_dict = {'in_channels': 2, 
		'num_layers': 5,
		'filter_width': 11,
		'channels': [16, 32, 64, 128, 256],
		'dropout':0.2,
		'rate':44100,
		'duration':0.5,
		'flat_dim':128}

net = cc.SiameseArchitecture(**settings_dict)


In [7]:
test_in = torch.randn(4, 2, 22100)

In [8]:
net(test_in, test_in).shape



torch.Size([4, 1])

In [4]:
pytorch_total_params = sum(p.numel() for p in net.parameters())


In [5]:
pytorch_total_params

34515937

In [1]:
# %load train_chords.py
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import importlib as lib
import ChordClassifier as cc
import torch.optim

from torch.nn import Conv1d, BatchNorm1d, ReLU, Linear
from utils import *
from parameters import *

lib.reload(cc)

BATCH_SIZE = 6

def make_batch(df, chord, b_s=32):
    match_dfs = df[df.chord == chord]
    match_dfs = match_dfs.sample(b_s)
    diff_dfs = df[df.chord != chord]
    diff_dfs = diff_dfs.sample(b_s)

    match_x1 = []
    match_x2 = []

    diff_x1 = []
    diff_x2 = []

    for i in range(0, len(match_dfs), 2):
        _, t = read(match_dfs.iloc[i].fname)
        match_x1.append(t)

    for j in range(1, len(match_dfs), 2):
        _, t = read(match_dfs.iloc[i].fname)
        match_x2.append(t)

    for i in range(0, len(diff_dfs), 2):
        _, t = read(diff_dfs.iloc[i].fname)
        diff_x1.append(t)

    for i in range(1, len(diff_dfs), 2):
        _, t = read(diff_dfs.iloc[i].fname)
        diff_x2.append(t)

    x1 = torch.cat(match_x1 + diff_x1, 0)
    x2 = torch.cat(match_x2 + diff_x2, 0)

    y = torch.cat((torch.ones(len(match_x1), 1), torch.zeros(len(diff_x2), 1)), 0)
    y.requires_grad = True

    # indices = np.arange(len(match_x1) + len(diff_x2))
    # np.random.shuffle(indices)
    x1_out = x1[:,:,:]
    x2_out = x2[:,:,:]
    y_out = y[:,:]

    return x1_out, x2_out, y_out

net = cc.SiameseArchitecture(**SETTINGS)    

# refset_df = pd.read_csv(REF_CSV, header=None, sep=',', names=['fname', 'chord', 'key'])    
# refset_df.loc[:, 'fname'] = REF_DIR + refset_df[['fname']]
# refset_df['chord'] = refset_df['chord'] + '_' + refset_df['key']
# refset_df = refset_df.drop(columns=['key'])

df = pd.read_csv(ALT_CSV, header=None, sep=',', names=['fname', 'chord', 'key'])
df.loc[:, 'fname'] = ALT_DIR + df[['fname']]
df['chord'] = df['chord'] + '_' + df['key']
df = df.drop(columns=['key'])    

# df = refset_df.append(df)

optimizer = torch.optim.Adam(net.parameters(), lr=0.000001,  amsgrad=True)
optimizer.zero_grad()
    
for _ in range(0, 10):
    x1, x2, y = make_batch(df, 'd_sharp_major', b_s=BATCH_SIZE)
    probs = net(x1, x2)
    loss = F.binary_cross_entropy(probs, y.detach())
    thresh = probs > 0.5
    acc = torch.sum(thresh == y).item() / torch.numel(y)

    print('loss: {} acc: {}'.format(loss, acc))

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()



loss: 0.7975291609764099 acc: 0.3333333333333333
loss: 0.6964635848999023 acc: 0.8333333333333334
loss: 0.7176907658576965 acc: 0.3333333333333333
loss: 0.7445562481880188 acc: 0.0
loss: 0.6352415084838867 acc: 0.8333333333333334
loss: 0.788937509059906 acc: 0.5
loss: 0.6249960064888 acc: 0.6666666666666666
loss: 0.6754755973815918 acc: 0.5
loss: 0.6820318102836609 acc: 0.6666666666666666
loss: 0.602455198764801 acc: 0.6666666666666666


In [3]:
for _ in range(0, 10):
    x1, x2, y = make_batch(df, 'd_sharp_major', b_s=BATCH_SIZE)
    probs = net(x1, x2)
    loss = F.binary_cross_entropy(probs, y.detach())
    thresh = probs > 0.5
    acc = torch.sum(thresh == y).item() / torch.numel(y)

    print('loss: {} acc: {}'.format(loss, acc))

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()


loss: 0.5470489859580994 acc: 0.8333333333333334
loss: 0.6778363585472107 acc: 0.6666666666666666
loss: 0.5715370774269104 acc: 1.0
loss: 0.5494368076324463 acc: 0.8333333333333334
loss: 0.5638139843940735 acc: 1.0
loss: 0.5791290402412415 acc: 1.0
loss: 0.5864560008049011 acc: 0.8333333333333334
loss: 0.552257239818573 acc: 0.8333333333333334
loss: 0.5076869130134583 acc: 0.8333333333333334
loss: 0.5381329655647278 acc: 1.0


In [11]:
base = '/Users/collin/Dropbox/code/VoiceSampler/chord_classification/SomeLikeItHot/'
base2 = '/Users/collin/Downloads/songs/collin_cuts/'
not_d = base2 + 'cw_cut1.mp3'
is_d = base + 'slih12.mp3'

_, yes_d = read(is_d)
_, no_d = read(not_d)
net(yes_d, no_d)

tensor([[0.5478]], grad_fn=<SigmoidBackward>)

In [72]:
net(x1, x2)

tensor([[1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.9607e-19],
        [1.2706e-18],
        [7.2639e-19],
        [5.7291e-14],
        [1.3429e-12]], grad_fn=<SigmoidBackward>)

In [73]:
test_example = x1[0].unsqueeze(axis=0)

In [78]:
_, g_sh_M = read('/Users/collin/Dropbox/code/VoiceSampler/chord_classification/SomeLikeItHot/slih1.mp3')

22050


In [79]:
net(g_sh_M, test_example)

tensor([[1.]], grad_fn=<SigmoidBackward>)