# DDSM dataset preview

In [None]:
import random
import cv2
import glob
import numpy as np
import torch
import matplotlib.pyplot as plt

from argparse import ArgumentParser
from mpl_toolkits.axes_grid1 import ImageGrid
from pytorch_lightning import Trainer
from data import DInterface

import os
import tqdm
import glob
import numpy as np
import pandas as pd
import pytorch_lightning as pl
from torchvision import transforms
from argparse import ArgumentParser
from pytorch_lightning import Trainer
import pytorch_lightning.callbacks as plc
from torch.utils.data import DataLoader
from pytorch_lightning.loggers import TensorBoardLogger

from model import MInterface
from data import DInterface
from utils import load_model_path_by_args


from preprocess.patchset import PatchSet
from preprocess.utils import segment_breast,crop_img, read_resize_img, draw_rect, get_max_connected_area,convert_to_8bit

In [None]:
parser = ArgumentParser()
# Basic Training Control
parser.add_argument('--batch_size', default=32, type=int)
parser.add_argument('--num_workers', default=6, type=int)
parser.add_argument('--seed', default=1234, type=int)
parser.add_argument('--lr', default=1e-3, type=float)

# LR Scheduler
parser.add_argument('--lr_scheduler', choices=['step', 'cosine'], type=str)
parser.add_argument('--lr_decay_steps', default=20, type=int)
parser.add_argument('--lr_decay_rate', default=0.5, type=float)
parser.add_argument('--lr_decay_min_lr', default=1e-5, type=float)

# Restart Control
parser.add_argument('--load_best', action='store_true')
parser.add_argument('--load_dir', default=None, type=str)
parser.add_argument('--load_ver', default=None, type=str)
parser.add_argument('--load_v_num', default=None, type=int)

# Training Info
parser.add_argument('--dataset', default='patch_data', type=str)
parser.add_argument('--data_dir', default='/home/xumingjie/BreastCancer/NaturePaperReproduce/preprocess/', type=str)
parser.add_argument('--csv_file', default='csv/patch_trainv3.csv', type=str)
parser.add_argument('--lmdb_file', default='mdb/patch_imagesv3', type=str)
parser.add_argument('--model_name', default='standard_net', type=str)
parser.add_argument('--loss', default='bce', type=str)
parser.add_argument('--weight_decay', default=1e-5, type=float)
parser.add_argument('--no_augment', action='store_true')
parser.add_argument('--log_dir', default='logs', type=str)

# Model Hyperparameters
parser.add_argument('--hid', default=64, type=int)
parser.add_argument('--block_num', default=8, type=int)
parser.add_argument('--in_channel', default=1, type=int)
parser.add_argument('--out_channel', default=3, type=int)
parser.add_argument('--layer_num', default=5, type=int)

# Other
parser.add_argument('--aug_prob', default=0.5, type=float)

# Add pytorch lightning's args to parser as a group.
parser = Trainer.add_argparse_args(parser)

## Deprecated, old version
# parser = Trainer.add_argparse_args(
#     parser.add_argument_group(title="pl.Trainer args"))

# Reset Some Default Trainer Arguments' Default Values
parser.set_defaults(max_epochs=100)

args = parser.parse_args(args=['--lr_scheduler', 'step'])

In [None]:
## Summary
def data_summary(data):
    background = data[data['type'] =='bkg'].shape[0]
    mass = data[(data['type'] =='mass')].shape[0]
    calc = data[(data['type'] =='calcification')].shape[0]

    return [background,mass,calc]

In [None]:
args = parser.parse_args(args=[
                                '--model_name','lesion_net',
                                '--csv_file','csv/patch_trainv3.csv',
                                '--lr_scheduler', 'step',
                                ])
train_data_module = DInterface(**vars(args))
train_data_module.setup(stage='fit')

In [None]:

labels = ['backgound','mass','calcification']
    
num_train = data_summary(data_module.trainset.data_list)
num_val= data_summary(data_module.valset.data_list)
x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, num_train, width, label='train')
rects2 = ax.bar(x + width/2, num_val,width, label='validation')

ax.set_ylabel('Counts')
ax.set_title('Train set summary')
ax.set_xticks(x, labels)
ax.legend()

ax.bar_label(rects1, padding=3)
ax.bar_label(rects2, padding=3)

fig.tight_layout()

# plt.show()
plt.savefig('train set summary.png')

In [None]:
args = parser.parse_args(args=[
                                '--model_name','lesion_net',
                                '--csv_file','csv/patch_testv3.csv',
                                '--lr_scheduler', 'step',
                                ])
test_data_module = DInterface(**vars(args))
test_data_module.setup(stage='test')

In [None]:
labels = ['backgound','mass','calcification']
    
num_train = data_summary(data_module.trainset.data_list)
num_val= data_summary(data_module.valset.data_list)
x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, num_train, width, label='train')
rects2 = ax.bar(x + width/2, num_val,width, label='validation')

ax.set_ylabel('Counts')
ax.set_title('Test set summary')
ax.set_xticks(x, labels)
ax.legend()

ax.bar_label(rects1, padding=3)
ax.bar_label(rects2, padding=3)

fig.tight_layout()

# plt.show()
plt.savefig('test set summary.png')