In [6]:
import os

import cv2
import pandas as pd


from tqdm import tqdm

# for reading and displaying images
from skimage.io import imread
from skimage.transform import resize
import matplotlib.pyplot as plt
# %matplotlib inline

# for creating validation set
from sklearn.model_selection import train_test_split

# for evaluating the model
from sklearn.metrics import accuracy_score

# PyTorch libraries and modules
import torch
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD

# torchvision for pre-trained models
from torchvision import models
import argparse
from collections import Counter
import time

import numpy as np

In [7]:
from google.colab import drive
drive.mount('/content/gdrive')


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [8]:
    df = pd.read_csv("/content/gdrive/MyDrive/mini-MIAS/data_description.csv", header=None, index_col=0)
    df[1].str.strip()  # Strip leading and trailing spaces in label column.

    # print(df[1])

    for img_pgm in os.listdir("/content/gdrive/MyDrive/mini-MIAS/images_original"):
        if img_pgm.endswith(".pgm"):
            img = cv2.imread("/content/gdrive/MyDrive/mini-MIAS/images_original/{}".format(img_pgm))
            img_name = img_pgm.split(".")[0]
            label = df.loc[img_name].loc[1]
            if label == 'G ':
                new_path = "/content/gdrive/MyDrive/mini-MIAS/images_processed/fatty_glandular/{}.png".format(img_name)
                print("Image {} moved to Fatty-Glandular({} case).".format(img_pgm, label))
            elif label == 'D ':
                new_path = "/content/gdrive/MyDrive/mini-MIAS/images_processed/dense_glandular/{}.png".format(img_name)
                print("Image {} moved to Dense-Glandular({} case).".format(img_pgm, label))

            elif label == 'F ':
                new_path = "/content/gdrive/MyDrive/mini-MIAS/images_processed/fatty/{}.png".format(img_name)
                print("Image {} moved to Fatty({} case).".format(img_pgm, label))

            cv2.imwrite(new_path, img)


    print("Finished converting and sorting dataset.") 

Image mdb008.pgm moved to Fatty-Glandular(G  case).
Image mdb009.pgm moved to Fatty(F  case).
Image mdb006.pgm moved to Fatty(F  case).
Image mdb001.pgm moved to Fatty-Glandular(G  case).
Image mdb010.pgm moved to Fatty(F  case).
Image mdb004.pgm moved to Dense-Glandular(D  case).
Image mdb012.pgm moved to Fatty(F  case).
Image mdb011.pgm moved to Fatty(F  case).
Image mdb015.pgm moved to Fatty-Glandular(G  case).
Image mdb005.pgm moved to Fatty(F  case).
Image mdb017.pgm moved to Fatty-Glandular(G  case).
Image mdb013.pgm moved to Fatty-Glandular(G  case).
Image mdb002.pgm moved to Fatty-Glandular(G  case).
Image mdb007.pgm moved to Fatty-Glandular(G  case).
Image mdb018.pgm moved to Fatty-Glandular(G  case).
Image mdb014.pgm moved to Fatty-Glandular(G  case).
Image mdb003.pgm moved to Dense-Glandular(D  case).
Image mdb019.pgm moved to Fatty-Glandular(G  case).
Image mdb016.pgm moved to Fatty-Glandular(G  case).
Image mdb025.pgm moved to Fatty(F  case).
Image mdb023.pgm moved to Fatt

In [9]:
print(os.listdir('/content/gdrive/MyDrive/mini-MIAS/images_processed'))


['fatty', 'fatty_glandular', 'dense_glandular']


In [10]:
from torchvision import transforms, datasets, models

image_transforms = {
    # Train uses data augmentation
    'train':
    transforms.Compose([
        transforms.RandomRotation(degrees=10),
        transforms.RandomHorizontalFlip(),
        
        transforms.Resize((299,299)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  # Imagenet standards
    ])}

In [11]:

def imshow_tensor(image, ax=None, title=None):

    if ax is None:
        fig, ax = plt.subplots()

    # Set the color channel as the third dimension
    image = image.numpy().transpose((1, 2, 0))

    # Reverse the preprocessing steps
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    image = std * image + mean

    # Clip the image pixel values
    image = np.clip(image, 0, 1)

    ax.imshow(image)
    plt.axis('off')

    return ax, image

In [13]:
from torch.utils.data import DataLoader, sampler, random_split

batch_size = 1

all_data = datasets.ImageFolder('/content/gdrive/MyDrive/mini-MIAS/images_processed',
                                transform=image_transforms['train'])
#train_data_len = int(len(all_data)*0.8)
#valid_data_len = int((len(all_data) - train_data_len)/2)
#test_data_len = int(len(all_data) - train_data_len - valid_data_len)
#train_data, val_data, test_data = random_split(all_data, [train_data_len, valid_data_len, test_data_len])
#train_data.dataset.transform = image_transforms['train']
#val_data.dataset.transform = image_transforms['val']
#test_data.dataset.transform = image_transforms['test']
#print(len(train_data), len(val_data), len(test_data))

data_loader = DataLoader(all_data, batch_size=batch_size, shuffle=True)
##val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
#test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [14]:

trainiter = iter(data_loader)
features_train, labels_train = next(trainiter)
print(features_train.shape, labels_train.shape)

torch.Size([1, 3, 299, 299]) torch.Size([1])


In [16]:
model = models.vgg16_bn(pretrained=True)

import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model=model.to(device)

Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /root/.cache/torch/hub/checkpoints/vgg16_bn-6c64b313.pth


HBox(children=(FloatProgress(value=0.0, max=553507836.0), HTML(value='')))




In [17]:
model.features

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU(inplace=True)
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (9): ReLU(inplace=True)
  (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (12): ReLU(inplace=True)
  (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 

In [18]:
model.avgpool

AdaptiveAvgPool2d(output_size=(7, 7))