# <center> Efficient detection of longitudinal bacteria fission using transfer learning in Deep Neural Networks
# <center> Supplemental Material

Most rod-shaped bacteria divide perpendicularly (along the narrow axis). Some, however, divide longitudinally (along the long axis). Few species are known to do so. Here we create a model of longitudinal divison that can help experimentalist researches to classify such images.

## Training code for longitudinal division classification

#### Requirements:
 * GPU
 * Nvidia driver
 * cuda version > 10
 * cython
 * opencv-python
 * tqdm
 * torchsummary
 * matplotlib
 * pandas
 * scipy
 * joblib
 * scikit-learn
 * jupyterlab
 * torch
 * torchvision
 * torchaudio
 * numpy

#### Folders
 * data: contains all image samples in the folders train, validation and test. 
 * model: where the trained model will be saved
 
#### train_functions_sgd.py code contains all functions that are called in the main function. Do not remove or delete this file.

Main code for training a pretrained resnet18 network to classify microscopy images of longitudinal division bacteria.

The code is based on transfer learning for computer vision. The original code can be [here](https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html)

* runtraining() main function to perform the training of a binary classifier (0,1)
* data: Contains the folders train, val and test
* train, val, test: Are folders with image samples to train the model.
* model: Is the folder where the trained model will be save

========== How to run in terminal ==========

Copy and paste the code into a file called train_model, then run in a terminal:

`python train_model.py`

============================================

In [None]:
from __future__ import print_function, division
import numpy as np
import torch
import torchvision
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
import os
import torch.nn as nn
import train_functions_sgd as trnfn
import time
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import sys

### Function for making the training

In [None]:
def runtraining():
    print('processing data')
    
    # data transformations to prepare the images for training
    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize((128, 128)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Resize((128, 128)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'test': transforms.Compose([
            transforms.Resize((128, 128)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    data_dir = 'data/'

    # image loader on batch of 8 images
    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
                      for x in ['train', 'val', 'test']}
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=8, shuffle=True, num_workers=8)
                      for x in ['train', 'val', 'test']}
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
    
    # extract the classes label: 0 and 1
    class_names = image_datasets['train'].classes

    # setting up the device to train in CPU or GPU
    # if torch doesn't find a GPU available it will run on the CPU
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f'Device: {device} / Dataset_size: {dataset_sizes}')

    # prepare for training
    # setting a pretrained resnet18.
    # nn.Lienear(in_features, out_features): 
    # applies a linear transformation to the incoming data (features) to out features
    # the features are extracted from the pretrained resnet18 into 2 features. 
    model_ft = models.resnet18(pretrained=True)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.to(device)

    # Setting up the type of loss function to optimize the model during training.
    criterion = nn.CrossEntropyLoss()

    # Setting up the optimizer function.
    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

    
    # training function
    # train_model:
    #             model_ft: defined model (Resnet18)
    #             criterion: cross entropy loss function
    #             optimizer_ft: optimization algorithm Stochastic Gradient Descent (SGD)
    #             exp_lr_scheduler: decay LR by a factor
    #             dataloaders: batch of images, 8 images per batch
    #             dataset_sizes: size of train and val samples
    #             device: GPU or CPU
    #             num_epochs: number of epochs to train in the data
    model_ft = trnfn.train_model(model_ft, criterion, optimizer_ft, 
                                 exp_lr_scheduler, dataloaders, 
                                 dataset_sizes, device, num_epochs=25)

    # Save model
    torch.save(model_ft.state_dict(), "model/trained_net_sgd.pth")
  
    y_true, y_pred = trnfn.test_model(model_ft, criterion, device, dataloaders, dataset_sizes)
    y_pred = y_pred
    y_pred = y_pred

    print("accuracy score: ","%.6f" % accuracy_score(y_true, y_pred),"\n")
    print("confusion matrix:")
    print(confusion_matrix(y_true, y_pred),"\n")
    print("classification report:")
    print(classification_report(y_true, y_pred, digits=6))


### Running the training 

In [None]:
""""
setting a random seed for reproducibility
however, Pytorch state:

"Completely reproducible results are not guaranteed across PyTorch releases, 
individual commits, or different platforms. Furthermore, results may not be 
reproducible between CPU and GPU executions, even when using identical seeds."

""""
np.random.seed(1234)

runtraining()