# PyTorch Exploration

In [43]:
import komod
import ptmod
# from pymongo import MongoClient
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch
import torchvision
# from torchvision import transforms, utils
import torch.utils.data as data_utils
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
from collections import OrderedDict, defaultdict
import pickle
import os

from sklearn import metrics

%matplotlib inline

## Creating Dataset Sub-Class

Made class in `pytorchmod` that uses MongoDB to access particular test round. Creates spectrograms on the fly through the `__get__()` method.

In [3]:
# this scaling is pretty tiny, but it'll do the trick for a dry run
foo_train = ptmod.SpectroDataset('test_run', 'train', scaling=0.125)
foo_test = ptmod.SpectroDataset('test_run', 'test', scaling=0.125)

In [4]:
print("Train set length:", len(foo_train))
print("Test set length:", len(foo_test))

Train set length: 81
Test set length: 19


In [5]:
for i in range(10):
    item = foo_train[i]
    print("\nChunk:", item[2])
    print("Label:", item[1])
    print("---")
    ptmod.tensor_stats(item[0])


Chunk: 000175
Label: 0
---
Min: -0.7899980545043945
Max: 1.0
Mean: 0.29952806312907243
Std: 0.3481502752916145
Shape: torch.Size([1, 64, 54])

Chunk: 000232
Label: 1
---
Min: -0.6265758275985718
Max: 1.0
Mean: 0.26637560173741837
Std: 0.31313222857154127
Shape: torch.Size([1, 64, 54])

Chunk: 000248
Label: 0
---
Min: -1.0
Max: 1.0
Mean: 0.1910784627393022
Std: 0.3638404788205222
Shape: torch.Size([1, 64, 54])

Chunk: 000426
Label: 1
---
Min: -0.5803954601287842
Max: 1.0
Mean: 0.23331058807737767
Std: 0.27653016076763703
Shape: torch.Size([1, 64, 54])

Chunk: 000460
Label: 0
---
Min: -1.0
Max: 1.0
Mean: 0.10331488752981638
Std: 0.39873085560956295
Shape: torch.Size([1, 64, 54])

Chunk: 000512
Label: 1
---
Min: -0.9103786945343018
Max: 1.0
Mean: -0.10920006837233343
Std: 0.3085012483265392
Shape: torch.Size([1, 64, 54])

Chunk: 000619
Label: 1
---
Min: -0.6913891434669495
Max: 1.0
Mean: 0.15983487910293828
Std: 0.2697588379506309
Shape: torch.Size([1, 64, 54])

Chunk: 001085
Label: 0
--

In [6]:
type(foo_train[2][0])

torch.FloatTensor

### What does the loader do?

In [7]:
train_loader = data_utils.DataLoader(foo_train, batch_size=4, shuffle=True, drop_last=True)

In [8]:
# makes an iterable from loader
train_iter = iter(train_loader)

In [9]:
type(train_iter)

torch.utils.data.dataloader.DataLoaderIter

In [10]:
# save a batch of data as a variable
loader_unit = next(train_iter)
type(loader_unit)

list

In [11]:
# list includes training array, corresponding label, and chunk ID
len(loader_unit)

3

In [12]:
# each should include four records
for sub in loader_unit:
    print(len(sub))

4
4
4


In [13]:
# and the types?
for sub in loader_unit:
    print(type(sub))

<class 'torch.FloatTensor'>
<class 'torch.LongTensor'>
<class 'tuple'>


In [14]:
# what about some shapes and contents
print(loader_unit[0][0].shape)
print(loader_unit[1][0]) # breaks with shape because an int has no such property
print(loader_unit[2][0]) # same, because str

torch.Size([1, 64, 54])
1
008532


In [15]:
loader_unit[1]


 1
 1
 1
 1
[torch.LongTensor of size 4]

## On to the CNN...

### N-pixels going into the first FC layer?

The `reduce_axis` function in the `ptmod` module computes the number of pixels along a single axis, given original length, filter length, and stride:

In [16]:
ptmod.reduce_axis(28,5,1)

24

In [17]:
ptmod.reduce_axis(7,5,2)

2

Then, `cnn_pixels_out` gives us the total number of values that would be fed to the first FC layer, given input dimensions and kernel/stride/filters of each convolutional or max-pooling layer. Consider the 28x28 images in MNIST Fashion and the four layers of the CNN:

In [18]:
mnist_cnn_layers = (
    (5,1,10),
    (2,2,0),
    (5,1,20),
    (2,2,0)
)

ptmod.cnn_pixels_out((1,28,28), mnist_cnn_layers)

1 x 28 x 28
10 x 24 x 24
10 x 12 x 12
20 x 8 x 8
20 x 4 x 4


320

And the net from the PyTorch tutorials, optimized for a 32x32 image:

In [19]:
tutorial_layers = (
    (5,1,6),
    (2,2,0),
    (5,1,16),
    (2,2,0)
)

ptmod.cnn_pixels_out((1,32,32), tutorial_layers, drop_last=True)

1 x 32 x 32
6 x 28 x 28
6 x 14 x 14
16 x 10 x 10
16 x 5 x 5


400

With even the tiny 1/8-scale spectros, the output grows significantly when using the same cnn layers as the MNIST example:

In [20]:
# PyTorch drops pixels that would require a partial stride to calculate
ptmod.cnn_pixels_out((1,64,54), mnist_cnn_layers, drop_last=True)

1 x 64 x 54
10 x 60 x 50
10 x 30 x 25
20 x 26 x 21
20 x 13 x 10


2600

Playing with some different values:

In [41]:
cnn_layers_test = (
    (5,1,10),
    (2,2,0),
    (5,1,20),
    (2,2,0)
)

ptmod.cnn_pixels_out((1,64,54), cnn_layers_test, drop_last=True)

1 x 64 x 54
10 x 60 x 50
10 x 30 x 25
20 x 26 x 21
20 x 13 x 10


2600

Perhaps reasonable... let's try it.

### CNN Fit

In [22]:
### This assumes a (1,64,54) tensor
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # (in channels, out channels, kernel, stride=s)
        self.conv1 = nn.Conv2d(1, 10, 5, stride=1)
        # (2x2 kernel, stride=2 -- stride defaults to kernel)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(10, 20, 5, stride=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(2600, 50)
        self.fc2 = nn.Linear(50, 2)
        
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # need to reshape for fully connected layer
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x


In [23]:
# instantiate model, set loss criterion and optimizer
cnn_1 = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(cnn_1.parameters(), lr=0.01) # set momentum if desired

In [24]:
print(cnn_1)

CNN (
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (fc1): Linear (2600 -> 50)
  (fc2): Linear (50 -> 2)
)


In [25]:
# re-instantiate the train loader
train_loader = data_utils.DataLoader(foo_train, 
                                     batch_size=4, 
                                     shuffle=True,
                                     num_workers=2,
                                     drop_last=True)

In [26]:
nb_epochs = 10
# minibatches = 5000

for epoch in range(nb_epochs):
    print("Epoch", epoch+1)
    running_loss = 0.0
    then = time.perf_counter()
    for i, data in enumerate(train_loader, 1):
        sub_then = time.perf_counter()
        # separate input data and labels, dump chunk IDs
        inputs, labels, _ = data
        # wrap in Variable for GD
        inputs, labels = Variable(inputs), Variable(labels)
        # zero parameter gradients, else accumulate
        optimizer.zero_grad()
        # forward prop
        outputs = cnn_1(inputs)
        # calculate loss
        loss = criterion(outputs, labels)
        # backprop
        loss.backward()
        # update weights
        optimizer.step()         
        #verbosity
        sub_now = time.perf_counter()
        print("\r * {} loss: {:.3f}\tTime: {:.3f} ms"
              .format(i, loss.data[0], (sub_now-sub_then)*1000), end='')
        running_loss += loss.data[0]
    now = time.perf_counter()
    print("\r * Avg loss: {:.3f}\tTime: {:.3f} ms"
          .format(running_loss/i, (now-then)*1000))

#         # print running loss
#         running_loss += loss.data[0]
#         if i%minibatches == minibatches:
#             # print every 5,000 minibatches or whatever you set 'minibatches' equal to
#             print('[%d, %5d] loss: %.3f' % (epoch+1, i, running_loss/minibatches))
#             running_loss = 0.0
            
print('\nTraining Complete!')

Epoch 1
 * Avg loss: 0.693	Time: 595.553 ms
Epoch 2
 * Avg loss: 0.690	Time: 648.583 ms
Epoch 3
 * Avg loss: 0.690	Time: 601.633 ms
Epoch 4
 * Avg loss: 0.689	Time: 613.672 ms
Epoch 5
 * Avg loss: 0.689	Time: 558.390 ms
Epoch 6
 * Avg loss: 0.687	Time: 552.772 ms
Epoch 7
 * Avg loss: 0.687	Time: 528.488 ms
Epoch 8
 * Avg loss: 0.683	Time: 530.346 ms
Epoch 9
 * Avg loss: 0.684	Time: 528.968 ms
Epoch 10
 * Avg loss: 0.684	Time: 537.246 ms

Training Complete!


In [21]:
# show learnable parameters for model
params = list(cnn_1.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

8
torch.Size([10, 1, 5, 5])


In [23]:
# view output, should be the energy for each category
train_batch = next(iter(train_loader))

input_var = Variable(train_batch[0])
out = cnn_1(input_var)
print(out)

Variable containing:
 0.0000  0.1541
 0.0000  0.1153
 0.0000  0.1214
 0.0000  0.1115
[torch.FloatTensor of size 4x2]



### Prediction

In [24]:
classes = ["no sax", "sax"]

In [25]:
test_loader = data_utils.DataLoader(foo_test, 
                                    batch_size=4, 
                                    shuffle=False, # set for False for test set
                                    num_workers=2,
                                    drop_last=False)

In [26]:
test_iter = iter(test_loader)
spectros, labels, chunk_ids = test_iter.next()

# show ground truth
print("Groundtruth")
print(labels.size(0))
for j in range(labels.size(0)):
    print("{}: {}".format(chunk_ids[j], classes[labels[j]]))

# from PyTorch tutorial
# print('GroundTruth: ', ' '.join('%8s' % classes[labels[j]] for j in range(4)))

Groundtruth
4
000727: sax
001193: no sax
001503: no sax
005661: no sax


In [27]:
outputs = cnn_1(Variable(spectros))
print(outputs.data)


 0.0000  0.1052
 0.0000  0.0557
 0.0000  0.0874
 0.0000  0.0735
[torch.FloatTensor of size 4x2]



In [28]:
_, predicted = torch.max(outputs.data, 1)

print("Predictions")
for j in range(4):
    print("{}: {}".format(chunk_ids[j], classes[predicted[j]]))

Predictions
000727: sax
001193: sax
001503: sax
005661: sax


In [29]:
correct = 0
total = 0
results = {}
for data in test_loader:
    spectros, labels, chunk_ids = data
    outputs = cnn_1(Variable(spectros))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
    for c_id, gt, pred, out in zip(chunk_ids, labels, predicted, outputs.data):
        results[c_id] = (gt, pred, out)

print('Accuracy of the network on the 18 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 18 test images: 42 %


In [31]:
print(results)

{'000727': (1, 1, 
 0.0000
 0.1052
[torch.FloatTensor of size 2]
), '001193': (0, 1, 
1.00000e-02 *
  0.0000
  5.5690
[torch.FloatTensor of size 2]
), '001503': (0, 1, 
1.00000e-02 *
  0.0000
  8.7391
[torch.FloatTensor of size 2]
), '005661': (0, 1, 
1.00000e-02 *
  0.0000
  7.3467
[torch.FloatTensor of size 2]
), '008523': (1, 1, 
 0.0000
 0.1142
[torch.FloatTensor of size 2]
), '008599': (1, 1, 
 0.0000
 0.1473
[torch.FloatTensor of size 2]
), '008722': (0, 1, 
 0.0000
 0.1221
[torch.FloatTensor of size 2]
), '009152': (1, 1, 
 0.0000
 0.1220
[torch.FloatTensor of size 2]
), '010576': (1, 1, 
 0.0000
 0.1297
[torch.FloatTensor of size 2]
), '011182': (0, 1, 
 0.0000
 0.1666
[torch.FloatTensor of size 2]
), '011659': (0, 1, 
1.00000e-02 *
  0.0000
  9.6978
[torch.FloatTensor of size 2]
), '013746': (0, 1, 
1.00000e-02 *
  0.0000
  8.8686
[torch.FloatTensor of size 2]
), '014629': (0, 1, 
 0.0000
 0.1302
[torch.FloatTensor of size 2]
), '014810': (0, 1, 
 0.0000
 0.1038
[torch.FloatTe

In [32]:
y = []
y_hat = []
for val in results.values():
    y.append(val[0])
    y_hat.append(val[1])

print(y)
print(y_hat)

[1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


### Developing Fit and Predict Functions

In [27]:
def fit(cnn, 
        dataset, 
        optimizer, 
        criterion, 
        num_epochs, 
        batch_size=4, 
        minibatches=1):
    """
    Runs feed-forward and back-prop to train CNN model.
    ---
    IN
    cnn: CNN instance 
    dataset: built SpectroDataset object
    optimizer: PyTorch optimizer for back-prop
    criterion: PyTorch loss object for loss metric
    num_epochs: number of times to cycle through data (int)
    batch_size: number of records per batch (int)
    minibatches: print loss and time every n minibatches (int)
    NO OUT
    """
    
    train_loader = data_utils.DataLoader(
        dataset, 
        batch_size=4, 
        shuffle=True,
        num_workers=2,
        drop_last=False # not sure the merits of doing this or not?
    )

    for epoch in range(num_epochs):
        print("Epoch", epoch+1)
        running_loss = 0.0
        then = time.perf_counter()
        for i, data in enumerate(train_loader, 1):
            sub_then = time.perf_counter()
            # separate input data and labels, dump chunk IDs
            spectros, labels, _ = data
            # wrap in Variable for GD
            spectros, labels = Variable(spectros), Variable(labels)
            # zero parameter gradients, else accumulate
            optimizer.zero_grad()
            # forward prop
            outputs = cnn(spectros)
            # calculate loss
            loss = criterion(outputs, labels)
            # backprop
            loss.backward()
            # update weights
            optimizer.step()         
            #verbosity
            sub_now = time.perf_counter()
            print("\r * {} loss: {:.3f}\tTime: {:.3f} ms"
                  .format(i, loss.data[0], (sub_now-sub_then)*1000), end='')
            running_loss += loss.data[0]
    #         running_loss += loss.data[0]
    #         if i%minibatches == minibatches:
    #             # print every 5,000 minibatches or whatever you set 'minibatches' equal to
    #             print('[%d, %5d] loss: %.3f' % (epoch+1, i, running_loss/minibatches))
    #             running_loss = 0.0
        now = time.perf_counter()
        print("\r * Avg loss: {:.3f}\tTime: {:.3f} ms"
              .format(running_loss/i, (now-then)*1000))
    print('\nTraining Complete!')

In [28]:
def predict(cnn, dataset, batch_size=4, res_format='df'):
    """
    Predicts values on trained CNN.
    ---
    IN
    cnn: trained CNN instance
    dataset: built SpectroDataset object
    batch_size: number of records per batch
    res_format: results format, either 'df' for pandas dataframe or 'dict'
        for dictionary (str)
    OUT
    results: if 'dict', dictionary with chunk ID as key, and a tuple of (actual,
        predicted, output_array) as value (dict); if 'df', pandas dataframe
    """
    
    test_loader = data_utils.DataLoader(
        dataset, 
        batch_size=4, 
        shuffle=False, # set for False for test set
        num_workers=2
    )
    
    results = {}
    
    for data in test_loader:
        spectros, labels, chunk_ids = data
        outputs = cnn_1(Variable(spectros))
        _, pred = torch.max(outputs.data, 1)
        for c_id, y, y_hat, out in zip(chunk_ids, labels, pred, outputs.data):
            results[c_id] = (y, y_hat, out)
            
    if res_format == 'df':
        results = results_to_df(results)
    
    return results

In [29]:
def results_to_df(results):
    """
    Converts predict results to Pandas dataframe.
    ---
    IN
    results: dictionary generated by results function (dict)
    OUT
    df: pandas dataframe of results 
    """

    cols = ['chunk_id', 'actual', 'pred', 'e0', 'e1']
    results_trans = OrderedDict.fromkeys(cols)
    for k in results_trans.keys():
        results_trans[k] = []

    for k, v in results.items():
        for col, val in zip(cols, [k, v[0], v[1], v[2][0], v[2][1]]):
            results_trans[col].append(val)
    
    df = pd.DataFrame(results_trans)
    
    return df

In [30]:
fit(cnn_1, foo_train, optim.SGD(cnn_1.parameters(), lr=0.01), nn.CrossEntropyLoss(), 10)

Epoch 1
 * Avg loss: 0.681	Time: 543.595 ms
Epoch 2
 * Avg loss: 0.681	Time: 620.448 ms
Epoch 3
 * Avg loss: 0.678	Time: 587.303 ms
Epoch 4
 * Avg loss: 0.684	Time: 574.613 ms
Epoch 5
 * Avg loss: 0.673	Time: 568.319 ms
Epoch 6
 * Avg loss: 0.679	Time: 558.119 ms
Epoch 7
 * Avg loss: 0.676	Time: 565.400 ms
Epoch 8
 * Avg loss: 0.671	Time: 575.641 ms
Epoch 9
 * Avg loss: 0.667	Time: 568.968 ms
Epoch 10
 * Avg loss: 0.674	Time: 568.993 ms

Training Complete!


In [31]:
train_results = predict(cnn_1, foo_train)
test_results = predict(cnn_1, foo_test)

### Custom Config of Class?

In [32]:
### Trying for custom input and fit/predict as methods
class CNN_cpcpff(nn.Module):
    """
    Pass input params as a dictionary where each item is a layer
    and each value is a list, following this convention:
    
    Convolutional: c1: [kernel, stride, channels_out]
    Max Pooling: p1: [kernel, stride]
    Fully Connected: f1: [channels_in, channels_out]
    
    For example:
    
        params = {
            'c1': [5,1,10],
            'p1': [2,2],
            'c2': [5,1,20],
            'p2': [2,2],
            'f1': [2600,50],
            'f2': [50,2]
        }
    
    All values must be integers.
    """
    
    def __init__(self, params):
        super(CNN_cpcpff, self).__init__()
        # (in channels, out channels, kernel, stride=s)
        self.p = params
        self.conv1 = nn.Conv2d(1, 
                               self.p['c1'][2], 
                               self.p['c1'][0], 
                               stride=self.p['c1'][1])
        # (2x2 kernel, stride=2 -- stride defaults to kernel)
        self.pool1 = nn.MaxPool2d(self.p['p1'][0], self.p['p1'][1])
        self.conv2 = nn.Conv2d(self.p['c1'][2], 
                               self.p['c2'][2], 
                               self.p['c2'][0], 
                               stride=self.p['c2'][1])
        self.pool2 = nn.MaxPool2d(self.p['p2'][0], self.p['p2'][1])
        self.fc1 = nn.Linear(self.p['f1'][0], self.p['f1'][1])
        self.fc2 = nn.Linear(self.p['f2'][0], self.p['f2'][1])
        
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # need to reshape for fully connected layer
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

    def save_myself(self, fname, dir_out='../data'):
        """
        Saves current object as a .pkl file.
        ---
        fname: filename of choice (str)
        dir_out: path to save directory (str)
        """
        
        fpath = os.path.join(dir_out, fname + '.p')
        with open(fpath, 'wb') as pf:
            pickle.dump(self, pf)

In [33]:
params_init = {
    'c1': [5,1,10],
    'p1': [2,2],
    'c2': [5,1,20],
    'p2': [2,2],
    'f1': [2600,50],
    'f2': [50,2]
}

In [34]:
cnn_test = CNN_cpcpff(params_init)

In [35]:
print(cnn_test)

CNN_cpcpff (
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (fc1): Linear (2600 -> 50)
  (fc2): Linear (50 -> 2)
)


In [36]:
fit(cnn_test, foo_train, optim.SGD(cnn_test.parameters(), lr=0.01, momentum=0.5), nn.CrossEntropyLoss(), 10)

Epoch 1
 * Avg loss: 0.693	Time: 732.572 ms
Epoch 2
 * Avg loss: 0.693	Time: 802.186 ms
Epoch 3
 * Avg loss: 0.693	Time: 683.722 ms
Epoch 4
 * Avg loss: 0.693	Time: 580.796 ms
Epoch 5
 * Avg loss: 0.693	Time: 582.000 ms
Epoch 6
 * Avg loss: 0.693	Time: 593.529 ms
Epoch 7
 * Avg loss: 0.693	Time: 558.362 ms
Epoch 8
 * Avg loss: 0.693	Time: 601.578 ms
Epoch 9
 * Avg loss: 0.693	Time: 622.007 ms
Epoch 10
 * Avg loss: 0.693	Time: 710.040 ms

Training Complete!


In [37]:
res_train_df = predict(cnn_test, foo_train)
res_test_df = predict(cnn_test, foo_test)

In [40]:
cnn_test.save_myself('test_model')

It works!

### Metrics

In [38]:
def get_scores(train_df, test_df, verbose=True):
    """
    Calculates accuracy, recall, and specificity for train and test
    predictions.
    ### add precision?
    ---
    IN
    train_df: predict results df of train set
    test_df: predict results df of test set
    OUT
    scores: scores bundle
    """
    
    scores = defaultdict(list)
    score_types = ['acc', 'rec', 'spec']
    
    for df in [train_df, test_df]:
        df_scores = []
        df_scores.append(
            metrics.accuracy_score(df.actual, df.pred))
        df_scores.append(
            metrics.recall_score(df.actual, df.pred))
        df_scores.append(
            metrics.recall_score(df.actual, df.pred, pos_label=0))
#         df_scores.append(df[df.actual == df.pred].shape[0] / df.shape[0])
#         df_scores.append(df[(df.actual == 1) & (df.pred == 1)].shape[0] /
#                          df[df.actual == 1].shape[0])
#         df_scores.append(df[(df.actual == 0) & (df.pred == 0)].shape[0] /
#                          df[df.actual == 0].shape[0])
        for n, s in zip(score_types, df_scores):
            scores[n].append(s)
        
    if verbose:
        print("MODEL SCORES")
        print("Score\tTrain\tTest")
        print("-" * 24)
        for score in score_types:
            print("{}\t{:.3f}\t{:.3f}".format(
                score.capitalize(), 
                scores[score][0],
                scores[score][1])
            )
        
    return scores    

In [39]:
scores = get_scores(res_train_df, res_test_df, verbose=True)

MODEL SCORES
Score	Train	Test
------------------------
Acc	0.630	0.526
Rec	0.976	1.000
Spec	0.256	0.182


Into the module with it all and onto the real stuff...