In [80]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.patches import Circle
import numpy as np
import os

In [81]:
plt.style.use('dark_background') # comment or delete it for light background

In [91]:
mammo = pd.read_table('scan_file_data.txt', delimiter='\s', engine='python')

In [92]:
# rename the class column to avoid conflicts with the class keyword in python
mammo.columns = ['refnum', 'bg', 'ab_class', 'severity', 'x', 'y', 'radius']

In [None]:
mammo.refnum.value_counts()

In [None]:
mammo.severity.value_counts(normalize=True)

In [257]:
mammo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 330 entries, 0 to 329
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   refnum    330 non-null    object 
 1   bg        330 non-null    object 
 2   ab_class  330 non-null    object 
 3   severity  123 non-null    object 
 4   x         122 non-null    object 
 5   y         122 non-null    object 
 6   radius    119 non-null    float64
dtypes: float64(1), object(6)
memory usage: 18.2+ KB


In [98]:
from pathlib import Path
import re

INITIALLY REMOVE ONLY THE SCANS WITHOUT INVALID X ENTRY *NOTE

In [320]:
test_df = mammo.copy()

In [321]:
test_df.drop_duplicates(subset='refnum', keep='first', inplace=True)

In [322]:
test_df.set_index(keys='refnum', drop=True, inplace=True)

In [323]:
test_df.index;

In [324]:
def delete_image(filename: str, directory='./all-mias/'):
    paths = Path(directory).glob('**/*.pgm')
    filename += '.pgm'
    for f_path in sorted(paths):
        try:
            if f_path.name == filename:
                os.remove(f_path)
                break
        except FileNotFoundError as fnf:
            print('{fnf}') 
        

In [325]:
def clean_ds_files(df: pd.DataFrame) -> pd.DataFrame:
    new_df = df.copy()
    # search for invalid x values for removal
    indices = new_df.x[lambda x: x == '*NOTE'].index
    
    for idx in indices:
        n_idx = new_df.index.get_loc(idx)
        # drop from dataset
        new_df.drop(new_df.index[n_idx], inplace=True)
        # delete from directory
        delete_image(idx)

    # make x and y float values
    new_df.x = new_df.x.astype(float)
    new_df.y = new_df.y.astype(float)
    
    return new_df

In [326]:
clean_df = clean_ds_files(test_df)

In [328]:
clean_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 319 entries, mdb001 to mdb322
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   bg        319 non-null    object 
 1   ab_class  319 non-null    object 
 2   severity  112 non-null    object 
 3   x         111 non-null    float64
 4   y         111 non-null    float64
 5   radius    111 non-null    float64
dtypes: float64(3), object(3)
memory usage: 17.4+ KB


In [105]:
# save clean to file
test_df.to_csv('test_df.csv')

In [106]:
type(test_df.loc['mdb005'])

pandas.core.series.Series

In [107]:
def create_scan_filenames_dic(path: str) -> dict:
    '''Creates a dictionary with image filenames'''
    paths = Path(path).glob('**/*.pgm')
    img_dic = dict()
    for f_path in sorted(paths):
        # get full filename
        full_fname = f_path.name
        # get filename (no extension)
        filename = f_path.stem
        # create dictionary
        img_dic[filename] = f_path.as_posix()
    
    return img_dic

In [108]:
def get_scan_info(filename: str) -> pd.Series:
    try:
        scan_info = test_df.loc[filename]
        return scan_info
    except KeyError as ie:
        print('Invalid Index')

In [109]:
scan_info = get_scan_info('mdb005').copy() # need to ensure we get a deep copy of the Series to avoid changing a value in it and keeping the reference
scan_info

bg              F
ab_class     CIRC
severity        B
x           477.0
y           133.0
radius       30.0
Name: mdb005, dtype: object

In [110]:
from PIL import Image

In [111]:
test_img = Image.open('./all-mias/mdb005.pgm')

In [199]:
class MammoScan:
    def __init__(self, scan, sc_info):
        self._scan = scan
        self._sc_info = sc_info
    
    @property
    def scan(self):
        return self._scan
    
    @property
    def scan_info(self):
        return self._sc_info
    
    @property
    def x(self):
        return self._sc_info.x
    
    @property
    def y(self):
        return self._sc_info.y
    
    @property
    def radius(self):
        return self._sc_info.radius

    @property
    def ab_class(self):
        return self._sc_info.ab_class
    
    @property
    def bg(self):
        return self._sc_info.bg
    
    @property
    def severity(self):
        return self._sc_info.severity

    @property
    def transformations(self):
        return self.__transform()
    
    # instance method
    def plot(self):
        img = self.scan

        # Create a figure. Equal aspect so circles look circular
        fig, ax = plt.subplots(1)

        fig.set_size_inches(12, 10)
        ax.set_aspect('equal')

        # Show the image
        ax.imshow(img)
        ax.set_ylim(bottom=0, top=1024)

        # create a circle to patch on the image
        x = pd.to_numeric(self.x)
        y = pd.to_numeric(self.y)
        r = pd.to_numeric(self.radius)
        circ = Circle((x,1024-y), r, fill=False)
        ax.add_patch(circ)
        print(x, y, r)
    
    # private method
    def __set_x(self, xValue):
        self._sc_info.x = xValue
    
    # private method
    def __set_y(self, yValue):
        self._sc_info.y = yValue
    
    # private method
    def __set_radius(self, rValue):
        self._sc_info.radius = rValue
        
    # private method
    def __get_crop_coords(self):
        '''Returns a tuple with x, y and r'''
        # check scan class to decide on how to crop
        if pd.isnull(self.radius):
            self.__set_radius(48.0)
        if pd.isnull(self.x):
            x = float(np.random.randint(500, 513))
            self.__set_x(x)
        if pd.isnull(self.y):
            y = float(np.random.randint(500, 513))
            self.__set_y(y)
            
        return (self.x, self.y, self.radius)
    
    # private method
    def __transform(self):
        '''Creates a dict 
                  with rotated and mirrored versions of self.scan'''
        # create dictionary
        transformations = dict()
        # get crop values
        x, y, r = self.__get_crop_coords()
        # crop and resize scan
        cropped_scan = self.scan.crop((x-r, y-r, x+r, y+r))
        resized_scan = cropped_scan.resize((48,48))
        # create rotated images
        for angle in [0, 90, 180, 270]:
            rotated = resized_scan.rotate(angle) # rotated by angle
            mirr_lr = rotated.transpose(Image.FLIP_LEFT_RIGHT)
            mirr_tp = rotated.transpose(Image.FLIP_TOP_BOTTOM)
            transformations[angle] = dict(zip(['rotated', 'mirr_lr', 'mirr_tp'], 
                                         [rotated, mirr_lr, mirr_tp]))

        return transformations

In [181]:
test = MammoScan(test_img, scan_info)

In [None]:
for angle, imgs in transformed.items():
    for img in imgs.values():
        print(angle)
        display(img)
        
        
# 90 degrees left-right == 270 degree top-bottom
# 0 degrees left-right == 180 top-bottom

In [158]:
scans_filenames_dic = create_scan_filenames_dic('./all-mias/')

In [225]:
def save_subsamples(scans_dic: dict(), df: pd.DataFrame) -> pd.DataFrame:
    # define subsamples folder
    folder = 'subsamples'
    df_sub = pd.DataFrame()
    try:
        # create if not yet
        if not os.path.exists(folder):
            os.mkdir(folder)
    except:
        print('An error occurred when searching for the folder')
        
 
    # iterate dictionary of filenames
    for scan_name, filename in scans_dic.items():
        
        # create image and scan info objects
        try:
            scan = Image.open(filename)
        except FileNotFoundError as fnf:
            print({fnf})
            
        scan_info = df.loc[scan_name].copy()
        # create the MammoScan object
        m_scan = MammoScan(scan, scan_info)
        # get the transformations
        transf_scans = m_scan.transformations
        # create filenames
        filenames = create_subsample_filename(scan_name, transf_scans)
        # get transformed scans Image objects
        imgs = get_transformed_scans(transf_scans)
        # prepare for saving
        fs_and_is = list(zip(filenames, imgs))
        
        for filename, image in fs_and_is:
            # create new observation with subsample name
            scan_info.name = filename
            # append to dataframe
            df_sub = df_sub.append(scan_info)
            print(scan_name)
            path = os.path.join('./subsamples', filename) 
            print(path)
            try:
                pass
                #image.save(path, compress_level=0)
            except ValueError as ve:
                print('Output format could not be determined from the file name.')
            except OSError as ose:
                print('File could not be written.')
                print({ose})
        
    return df_sub

In [160]:
def create_subsample_filename(scan_name: str, transf_dic: dict) -> list:
    '''Creates suffix pattern filename for transformed scans'''
    filename = ''
    file_names = list()
    for angle, transfs in transf_dic.items():
        for tf in transfs.keys():
            filename += f'{scan_name}_{angle}_{tf}.png'
            #print(filename)
            file_names.append(filename)
            filename = ''
     
    return file_names

In [163]:
def get_transformed_scans(transf_dic: dict):
    scans = list()
    for angle, transfs in transf_dic.items():
        for scan in transfs.values():
            scans.append(scan)
    
    return scans

In [None]:
test_sub = save_subsamples(scans_dic, test_df)

In [228]:
test_sub

Unnamed: 0,ab_class,bg,radius,severity,x,y
mdb001_0_rotated.png,CIRC,G,197.0,B,535.0,425.0
mdb001_0_mirr_lr.png,CIRC,G,197.0,B,535.0,425.0
mdb001_0_mirr_tp.png,CIRC,G,197.0,B,535.0,425.0
mdb001_90_rotated.png,CIRC,G,197.0,B,535.0,425.0
mdb001_90_mirr_lr.png,CIRC,G,197.0,B,535.0,425.0
mdb001_90_mirr_tp.png,CIRC,G,197.0,B,535.0,425.0
mdb001_180_rotated.png,CIRC,G,197.0,B,535.0,425.0
mdb001_180_mirr_lr.png,CIRC,G,197.0,B,535.0,425.0
mdb001_180_mirr_tp.png,CIRC,G,197.0,B,535.0,425.0
mdb001_270_rotated.png,CIRC,G,197.0,B,535.0,425.0


In [169]:
def plot_scan(scan: MammoScan):
    img = scan.scan

    # Create a figure. Equal aspect so circles look circular
    fig, ax = plt.subplots(1)

    fig.set_size_inches(12, 10)
    ax.set_aspect('equal')

    # Show the image
    ax.imshow(img)
    ax.set_ylim(bottom=0, top=1024)

    # create a circle to patch on the image
    x = pd.to_numeric(scan.x)
    y = pd.to_numeric(scan.y)
    r = pd.to_numeric(scan.radius)
    circ = Circle((x,1024-y), r, fill=False)
    ax.add_patch(circ)
    print(x, y, r)

In [214]:
test_df

Unnamed: 0_level_0,bg,ab_class,severity,x,y,radius
refnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mdb001,G,CIRC,B,535.0,425.0,197.0
mdb002,G,CIRC,B,522.0,280.0,69.0
mdb003,D,NORM,,,,
mdb004,D,NORM,,,,
mdb005,F,CIRC,B,477.0,133.0,30.0
...,...,...,...,...,...,...
mdb318,D,NORM,,,,
mdb319,D,NORM,,,,
mdb320,D,NORM,,,,
mdb321,D,NORM,,,,


In [215]:
newt = test.scan_info
newt.name = 'test'

In [216]:
test_df.append(newt)

Unnamed: 0_level_0,bg,ab_class,severity,x,y,radius
refnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mdb001,G,CIRC,B,535.0,425.0,197.0
mdb002,G,CIRC,B,522.0,280.0,69.0
mdb003,D,NORM,,,,
mdb004,D,NORM,,,,
mdb005,F,CIRC,B,477.0,133.0,30.0
...,...,...,...,...,...,...
mdb319,D,NORM,,,,
mdb320,D,NORM,,,,
mdb321,D,NORM,,,,
mdb322,D,NORM,,,,
