In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.patches import Circle
import numpy as np
import os
import sys

In [None]:
sys.path.append('../src/')

In [None]:
from modules import helpers
from modules import mammoscan

In [None]:
plt.style.use('dark_background') # comment or delete it for light background

In [None]:
mammo = pd.read_table('../scan_file_data.txt', delimiter='\s', engine='python')

In [None]:
# rename the class column to avoid conflicts with the class keyword in python
mammo.columns = ['refnum', 'bg', 'ab_class', 'severity', 'x', 'y', 'radius']

In [None]:
mammo.refnum.value_counts()

In [None]:
mammo.severity.value_counts(normalize=True)

In [None]:
mammo.info()

In [None]:
from pathlib import Path
import re

INITIALLY REMOVE ONLY THE SCANS WITHOUT INVALID X ENTRY *NOTE

In [None]:
test_df = mammo.copy()

In [None]:
test_df.drop_duplicates(subset='refnum', keep='first', inplace=True)

In [None]:
test_df.set_index(keys='refnum', drop=True, inplace=True)

In [None]:
test_df.index;

In [None]:
def delete_image(filename: str, directory='../all-mias/'):
    paths = Path(directory).glob('**/*.pgm')
    filename += '.pgm'
    for f_path in sorted(paths):
        try:
            if f_path.name == filename:
                os.remove(f_path)
                break
        except FileNotFoundError as fnf:
            print('{fnf}') 
        

In [None]:
def clean_ds_files(df: pd.DataFrame) -> pd.DataFrame:
    new_df = df.copy()
    # search for invalid x values for removal
    indices = new_df.x[lambda x: x == '*NOTE'].index
    
    for idx in indices:
        n_idx = new_df.index.get_loc(idx)
        # drop from dataset
        new_df.drop(new_df.index[n_idx], inplace=True)
        # delete from directory
        delete_image(idx)

    # make x and y float values
    new_df.x = new_df.x.astype(float)
    new_df.y = new_df.y.astype(float)
    
    return new_df

In [None]:
clean_df = clean_ds_files(test_df)

In [None]:
clean_df.info()

In [None]:
# save clean to file
test_df.to_csv('test_df.csv')

In [None]:
type(clean_df.loc['mdb005'])

In [None]:
def create_scan_filenames_dic(path: str) -> dict:
    '''Creates a dictionary with image filenames'''
    paths = Path(path).glob('**/*.pgm')
    img_dic = dict()
    for f_path in sorted(paths):
        # get full filename
        full_fname = f_path.name
        # get filename (no extension)
        filename = f_path.stem
        # create dictionary
        img_dic[filename] = f_path.as_posix()
    
    return img_dic

In [None]:
scan_info = clean_df.loc['mdb005'].copy() # need to ensure we get a deep copy of the Series to avoid changing a value in it and keeping the reference
scan_info

In [None]:
from PIL import Image

In [None]:
test_img = Image.open('../all-mias/mdb005.pgm')
test_img

In [None]:
class MammoScan:
    def __init__(self, scan, sc_info):
        self._scan = scan
        self._sc_info = sc_info
    
    @property
    def scan(self):
        return self._scan
    
    @property
    def scan_info(self):
        return self._sc_info
    
    @property
    def x(self):
        return self._sc_info.x
    
    @property
    def y(self):
        return self._sc_info.y
    
    @property
    def radius(self):
        return self._sc_info.radius

    @property
    def ab_class(self):
        return self._sc_info.ab_class
    
    @property
    def bg(self):
        return self._sc_info.bg
    
    @property
    def severity(self):
        return self._sc_info.severity

    @property
    def transformations(self):
        return self.__transform()
    
    # instance method
    def plot(self):
        img = self.scan

        # Create a figure. Equal aspect so circles look circular
        fig, ax = plt.subplots(1)

        fig.set_size_inches(12, 10)
        ax.set_aspect('equal')

        # Show the image
        ax.imshow(img)
        ax.set_ylim(bottom=0, top=1024)

        # create a circle to patch on the image
        x = pd.to_numeric(self.x)
        y = pd.to_numeric(self.y)
        r = pd.to_numeric(self.radius)
        circ = Circle((x,1024-y), r, fill=False)
        ax.add_patch(circ)
        print(x, y, r)
    
    # private method
    def __set_x(self, xValue):
        self._sc_info.x = xValue
    
    # private method
    def __set_y(self, yValue):
        self._sc_info.y = yValue
    
    # private method
    def __set_radius(self, rValue):
        self._sc_info.radius = rValue
        
    # private method
    def __get_crop_coords(self):
        '''Returns a tuple with x, y and r'''
        # check scan class to decide on how to crop
        if pd.isnull(self.radius):
            self.__set_radius(48.0)
        if pd.isnull(self.x):
            x = float(np.random.randint(500, 513))
            self.__set_x(x)
        if pd.isnull(self.y):
            y = float(np.random.randint(500, 513))
            self.__set_y(y)
            
        return (self.x, self.y, self.radius)
    
    # private method
    def __transform(self):
        '''Creates a dict 
                  with rotated and mirrored versions of self.scan'''
        # create dictionary
        transformations = dict()
        # get crop values
        x, y, r = self.__get_crop_coords()
        # crop and resize scan
        cropped_scan = self.scan.crop((x-r, y-r, x+r, y+r))
        resized_scan = cropped_scan.resize((48,48))
        # create rotated images
        for angle in [0, 90, 180, 270]:
            rotated = resized_scan.rotate(angle) # rotated by angle
            mirr_lr = rotated.transpose(Image.FLIP_LEFT_RIGHT)
            mirr_tp = rotated.transpose(Image.FLIP_TOP_BOTTOM)
            transformations[angle] = dict(zip(['rotated', 'mirr_lr', 'mirr_tp'], 
                                         [rotated, mirr_lr, mirr_tp]))

        return transformations

In [None]:
test = MammoScan(test_img, scan_info)

In [None]:
transformed = test.transformations

In [None]:
for angle, imgs in transformed.items():
    for img in imgs.values():
        print(angle)
        display(img)
        
        
# 90 degrees left-right == 270 degree top-bottom
# 0 degrees left-right == 180 top-bottom

In [None]:
scans_filenames_dic = create_scan_filenames_dic('../all-mias/')

In [None]:
def save_subsamples(scans_dic: dict(), df: pd.DataFrame) -> pd.DataFrame:
    # define subsamples folder
    folder = '../subsamples'
    df_sub = pd.DataFrame()
    try:
        # create if not yet
        if not os.path.exists(folder):
            os.mkdir(folder)
    except:
        print('An error occurred when searching for the folder')
        
 
    # iterate dictionary of filenames
    for scan_name, filename in scans_dic.items():
        
        # create image and scan info objects
        try:
            scan = Image.open(filename)
        except FileNotFoundError as fnf:
            print({fnf})
            
        scan_info = df.loc[scan_name].copy()
        # create the MammoScan object
        m_scan = MammoScan(scan, scan_info)
        # get the transformations
        transf_scans = m_scan.transformations
        # create filenames
        filenames = create_subsample_filename(scan_name, transf_scans)
        # get transformed scans Image objects
        imgs = get_transformed_scans(transf_scans)
        # prepare for saving
        fs_and_is = list(zip(filenames, imgs))
        
        for filename, image in fs_and_is:
            # create new observation with subsample name
            scan_info.name = filename
            # append to dataframe
            df_sub = df_sub.append(scan_info)
            print(scan_name)
            path = os.path.join('../subsamples', filename) 
            print(path)
            try:
                image.save(path, compress_level=0)
            except ValueError as ve:
                print('Output format could not be determined from the file name.')
            except OSError as ose:
                print('File could not be written.')
                print({ose})
        
    return df_sub

In [None]:
def create_subsample_filename(scan_name: str, transf_dic: dict) -> list:
    '''Creates suffix pattern filename for transformed scans'''
    filename = ''
    file_names = list()
    for angle, transfs in transf_dic.items():
        for tf in transfs.keys():
            filename += f'{scan_name}_{angle}_{tf}.png'
            #print(filename)
            file_names.append(filename)
            filename = ''
     
    return file_names

In [None]:
def get_transformed_scans(transf_dic: dict):
    scans = list()
    for angle, transfs in transf_dic.items():
        for scan in transfs.values():
            scans.append(scan)
    
    return scans

In [None]:
test_sub = save_subsamples(scans_filenames_dic, clean_df)

In [None]:
test_sub

In [None]:
def plot_scan(scan: MammoScan):
    img = scan.scan

    # Create a figure. Equal aspect so circles look circular
    fig, ax = plt.subplots(1)

    fig.set_size_inches(12, 10)
    ax.set_aspect('equal')

    # Show the image
    ax.imshow(img)
    ax.set_ylim(bottom=0, top=1024)

    # create a circle to patch on the image
    x = pd.to_numeric(scan.x)
    y = pd.to_numeric(scan.y)
    r = pd.to_numeric(scan.radius)
    circ = Circle((x,1024-y), r, fill=False)
    ax.add_patch(circ)
    print(x, y, r)

In [None]:
test_df

In [None]:
newt = test.scan_info
newt.name = 'test'

In [None]:
test_df.append(newt)