# Spectrogram classification Algorithm

Note: this code uses the fastai library *version 1* https://github.com/fastai/fastai1/blob/master/README.md#installation

In [1]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
import matplotlib.pyplot as plt
%matplotlib inline
# this is the main library used (sits on top of PyTorch)
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [2]:
def set_patient(i):
    # NEWPATH is a directory of data containing train, test, valid folders
    # train and valid have subfolders Yes and No of positive and negative image cases, respectively
    # test has uncategorized test images
    NEWPATH = "data/updated_JNM_training_data"

    # avoid having a minibatch of size 1 (normalization issues later on)
    train_yes_path = NEWPATH+"/train/Yes"
    train_no_path = NEWPATH+"/train/No"
    valid_yes_path = NEWPATH+"/valid/Yes"
    valid_no_path = NEWPATH+"/valid/No"
    if len([name for name in os.listdir(train_yes_path) if ".jpg" in name])%8 == 1:
        rname = train_yes_path+"/"+os.listdir(train_yes_path)[0]
        os.remove(rname)
    if len([name for name in os.listdir(train_no_path) if ".jpg" in name])%8 == 1:
        rname = train_no_path+"/"+os.listdir(train_no_path)[0]
        os.remove(rname)
    if len([name for name in os.listdir(valid_yes_path) if ".jpg" in name])%8 == 1:
        rname = valid_yes_path+"/"+os.listdir(valid_yes_path)[0]
        os.remove(rname)
    if len([name for name in os.listdir(valid_no_path) if ".jpg" in name])%8 == 1:
        rname = valid_no_path+"/"+os.listdir(valid_no_path)[0]
        os.remove(rname)
    return NEWPATH

In [3]:
# make a transform to flip images along vertical axis, creating artificial expansion of training dataset
class RandomFlipUD(CoordTransform):
    def __init__(self, tfm_y=TfmType.NO, p=0.5):
        super().__init__(tfm_y=tfm_y)
        self.p=p
    def set_state(self): self.store.do_flip = random.random()<self.p
    def do_transform(self, x, is_y): return np.flipud(x).copy() if self.store.do_flip else x

In [4]:
def get_the_data():
    # define transorms of the data
    sz = 44
    # flip horizontally to artificially create more trianing data
    transforms_up_down = [RandomFlipUD(),RandomScale(sz,1.2),RandomRotate(1)]
    # make square without cropping (skew down)
    tfms = tfms_from_model(arch,sz,crop_type = CropType.NO,aug_tfms=transforms_up_down)
    # get data from path with transforms, batch size 8, test data in 'test' folder
    data = ImageClassifierData.from_paths(NEWPATH,tfms=tfms,bs=8,test_name='test')
    return data

In [5]:
class EarlyStopping(Callback):
    # stop training early if validation loss does not improve after patience = 5 iterations
    # load best model
    def __init__(self, learner, save_path, enc_path=None, patience=5):
        super().__init__()
        self.learner=learner
        self.save_path=save_path
        self.enc_path=enc_path
        self.patience=patience
    def on_train_begin(self):
        self.best_val_loss=100
        self.num_epochs_no_improvement=0
    def on_epoch_end(self, metrics):
        val_loss = metrics[0]
        if val_loss < self.best_val_loss:
            self.best_val_loss = val_loss
            self.num_epochs_no_improvement = 0
            self.learner.save(self.save_path)
            if self.enc_path is not None:
                self.learner.save_encoder(self.enc_path)
        else:
            self.num_epochs_no_improvement += 1
        if self.num_epochs_no_improvement > self.patience:
            print(f'Stopping - no improvement after {self.patience+1} epochs')
            return True
    def on_train_end(self):
        print(f'Loading best model from {self.save_path}')
        self.learner.load(self.save_path)

In [9]:
def train_the_model(arch,data):
   
    #train the model
    learn = ConvLearner.pretrained(arch,data,precompute=True)
    lr = 1e-2
    learn.fit(lr,1)
    learn.precompute = False
    learn.fit(1e-3,3,cycle_len=1)
    learn.unfreeze()
    lr = np.array([1e-4,1e-3,1e-2])
    cb = [EarlyStopping(learn,save_path='best_mod',patience = 6)]
    learn.fit(lr,6,cycle_len=1,cycle_mult=2,callbacks=cb)
    
    #get output predictions and probabilities
    log_preds_test = learn.predict(is_test=True)
    preds_test = np.argmax(log_preds_test,axis=1)
    probs_test = np.exp(log_preds_test[:,1])
    
    #make test: a dataframe of test image names, predictions, and probabilities
    test_names = np.empty_like(data.test_ds.fnames)
    for i in range(len(data.test_ds.fnames)):
        test_names[i] = data.test_ds.fnames[i]
    test = pd.DataFrame(data = test_names,columns = ['image_number'])
    test['prediction'] = preds_test
    test['probability'] = probs_test
    
    return test

In [10]:
def perf_measure(y_actual, y_hat):
    # measure performance
    TP = 0
    FP = 0
    TN = 0
    FN = 0

    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==1:
            TP+= 1
        if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
            FP += 1
        if y_actual[i]==y_hat[i]==0:
            TN += 1
        if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
            FN += 1

    return(TP, FP, TN, FN)

In [11]:
from sklearn import metrics

# train the model, run on test data and get output dat
NEWPATH = set_patient(0)
data = get_the_data()
dat = train_the_model(arch,data)

epoch      trn_loss   val_loss   accuracy                     
    0      0.729164   0.440497   0.831014  



epoch      trn_loss   val_loss   accuracy                    
    0      0.487585   0.304923   0.867992  
    1      0.43838    0.287109   0.879523                    
    2      0.422965   0.280096   0.879125                    



epoch      trn_loss   val_loss   accuracy                    
    0      0.379149   0.189283   0.93002   
    1      0.31651    0.120518   0.956262                    
    2      0.205598   0.11113    0.962624                    
    3      0.352908   0.112775   0.957455                    
    4      0.171938   0.076861   0.975348                    
    5      0.11548    0.099423   0.961034                     
    6      0.157989   0.074438   0.978529                     
    7      0.192357   0.128191   0.959841                    
    8      0.127441   0.069557   0.978926                    
    9      0.096506   0.064126   0.979324                     
    10     0.077063   0.074291   0.972962                     
    11     0.052119   0.064469   0.978131                     
    12     0.048707   0.064367   0.979722                     
    13     0.074135   0.074665   0.976143                     
    14     0.061342   0.057074   0.98171                      
    15     0.10613

In [None]:
dat.head()