In [1]:
%matplotlib notebook
import pandas as pd
import numpy as np
import pydicom
import skimage
import matplotlib.pyplot as plt

#### First, read in your dataframe of images with bounding box coordinates

In [2]:
bbox = pd.read_csv('bounding_boxes.csv')
bbox

Unnamed: 0.1,Unnamed: 0,Image Index,Finding Label,Bbox [x,y,w,h]
0,583,dicom_00023075_033.dcm,Mass,239.502222,535.077934,72.817778,65.991111
1,584,dicom_00029579_005.dcm,Mass,609.28,189.19349,73.955556,71.68
2,585,dicom_00013659_019.dcm,Mass,559.217778,167.575712,102.4,136.533333


#### For each of the three DICOM files listed in the dataframe, do the following: 
1. Read the DICOM's pixel_array attribute into a dataframe using the pydicom.dcmread function
2. Visualize the image using plt.imshow
3. Plot a histogram of the image pixel intensity values
4. Find the mean and standard deviation of intensity values of the image, and standardize it using the standardization formula:
```test
(X - X_mean)/X_std_dev
```

5. Re-plot a histogram of the normalized intensity values
6. Use the coordinates in the dataframe that tell the starting x & y values, and the width and height of the mass to plot visualize only the mass using plt.imshow
7. Plot a histogram of the normalized intensity values of the mass

### Image 1

### Reading first DICOM file

In [3]:
dcm = pydicom.dcmread('dicom_00023075_033.dcm')
print(dcm)

(0008, 0016) SOP Class UID                       UI: Secondary Capture Image Storage
(0008, 0018) SOP Instance UID                    UI: 1.3.6.1.4.1.11129.5.5.167144088560981875590694938780706491173530
(0008, 0060) Modality                            CS: 'DX'
(0008, 1030) Study Description                   LO: 'Mass'
(0010, 0020) Patient ID                          LO: '23075'
(0010, 0040) Patient's Sex                       CS: 'M'
(0010, 1010) Patient's Age                       AS: '31'
(0020, 000d) Study Instance UID                  UI: 1.3.6.1.4.1.11129.5.5.129257632452512930332701350119063480053433
(0020, 000e) Series Instance UID                 UI: 1.3.6.1.4.1.11129.5.5.111498372484777560349612235514239542820181
(0028, 0002) Samples per Pixel                   US: 1
(0028, 0004) Photometric Interpretation          CS: 'MONOCHROME2'
(0028, 0010) Rows                                US: 1024
(0028, 0011) Columns                             US: 1024
(0028, 0100) Bits Allocated  

In [4]:
plt.imshow(dcm.pixel_array, cmap='gray')

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7fb697b02f50>

In [5]:
plt.figure(figsize=(5,5))
plt.hist(dcm.pixel_array.ravel(), bins=256) # ravel return contiguous flattened array

<IPython.core.display.Javascript object>

(array([3.455e+03, 2.969e+03, 2.961e+03, 2.387e+03, 2.990e+03, 3.522e+03,
        2.789e+03, 1.972e+03, 1.539e+03, 1.108e+03, 9.850e+02, 8.390e+02,
        9.200e+02, 8.530e+02, 8.690e+02, 9.220e+02, 9.840e+02, 9.930e+02,
        1.031e+03, 1.071e+03, 1.255e+03, 1.202e+03, 1.335e+03, 1.412e+03,
        1.592e+03, 1.835e+03, 1.862e+03, 2.141e+03, 2.182e+03, 2.248e+03,
        2.357e+03, 2.380e+03, 2.433e+03, 2.570e+03, 2.504e+03, 2.684e+03,
        2.679e+03, 2.814e+03, 2.777e+03, 2.970e+03, 3.013e+03, 3.145e+03,
        0.000e+00, 3.320e+03, 3.488e+03, 3.615e+03, 3.928e+03, 4.042e+03,
        4.177e+03, 4.408e+03, 4.334e+03, 4.363e+03, 4.550e+03, 4.525e+03,
        4.557e+03, 4.777e+03, 4.673e+03, 4.804e+03, 4.987e+03, 4.852e+03,
        4.824e+03, 4.876e+03, 4.877e+03, 4.940e+03, 5.113e+03, 5.074e+03,
        5.173e+03, 5.038e+03, 4.991e+03, 4.918e+03, 4.938e+03, 4.806e+03,
        4.755e+03, 4.764e+03, 4.647e+03, 4.530e+03, 4.712e+03, 4.671e+03,
        4.816e+03, 4.924e+03, 4.898e+0

In [6]:
# calculate mean and standard deviaiton

mean_intensity, std_intensity = np.mean(dcm.pixel_array), np.std(dcm.pixel_array)
print('mean :', mean_intensity, ', std :',std_intensity)

mean : 123.25588417053223 , std : 57.47256019573095


In [7]:
# Normalise the image

new_img = dcm.pixel_array.copy()
new_img = (new_img - mean_intensity)/std_intensity

In [8]:
# Plotting the normalized pixel intensity

plt.figure(figsize=(5,5))
plt.hist(new_img.ravel(), bins=256)

<IPython.core.display.Javascript object>

(array([3.455e+03, 2.969e+03, 2.961e+03, 2.387e+03, 2.990e+03, 3.522e+03,
        2.789e+03, 1.972e+03, 1.539e+03, 1.108e+03, 9.850e+02, 8.390e+02,
        9.200e+02, 8.530e+02, 8.690e+02, 9.220e+02, 9.840e+02, 9.930e+02,
        1.031e+03, 1.071e+03, 1.255e+03, 1.202e+03, 1.335e+03, 1.412e+03,
        1.592e+03, 1.835e+03, 1.862e+03, 2.141e+03, 2.182e+03, 2.248e+03,
        2.357e+03, 2.380e+03, 2.433e+03, 2.570e+03, 2.504e+03, 2.684e+03,
        2.679e+03, 2.814e+03, 2.777e+03, 2.970e+03, 3.013e+03, 3.145e+03,
        0.000e+00, 3.320e+03, 3.488e+03, 3.615e+03, 3.928e+03, 4.042e+03,
        4.177e+03, 4.408e+03, 4.334e+03, 4.363e+03, 4.550e+03, 4.525e+03,
        4.557e+03, 4.777e+03, 4.673e+03, 4.804e+03, 4.987e+03, 4.852e+03,
        4.824e+03, 4.876e+03, 4.877e+03, 4.940e+03, 5.113e+03, 5.074e+03,
        5.173e+03, 5.038e+03, 4.991e+03, 4.918e+03, 4.938e+03, 4.806e+03,
        4.755e+03, 4.764e+03, 4.647e+03, 4.530e+03, 4.712e+03, 4.671e+03,
        4.816e+03, 4.924e+03, 4.898e+0

In [9]:
bbox

Unnamed: 0.1,Unnamed: 0,Image Index,Finding Label,Bbox [x,y,w,h]
0,583,dicom_00023075_033.dcm,Mass,239.502222,535.077934,72.817778,65.991111
1,584,dicom_00029579_005.dcm,Mass,609.28,189.19349,73.955556,71.68
2,585,dicom_00013659_019.dcm,Mass,559.217778,167.575712,102.4,136.533333


In [10]:
# Visualising the suspecious mass

plt.imshow(dcm.pixel_array[535:(535+66),240:(240+73)], cmap='gray')

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7fb6968d7590>

In [11]:
# Plotting histogram of normalised pixel intensities for mass 

plt.figure(figsize=(5,5))
plt.hist(new_img[535:(535+66),240:(240+73)].ravel(), bins=256)

<IPython.core.display.Javascript object>

(array([  1.,   0.,   1.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   1.,
          0.,   2.,   0.,   2.,   0.,   2.,   0.,   3.,   0.,   9.,   0.,
         10.,   0.,   8.,   0.,   0.,   6.,   0.,   8.,   0.,   8.,   0.,
         14.,   0.,  16.,   0.,  15.,   0.,  16.,   0.,  21.,   0.,  13.,
          0.,   7.,   0.,  21.,   0.,  11.,   0.,   0.,  21.,   0.,   7.,
          0.,  15.,   0.,  19.,   0.,  16.,   0.,  13.,   0.,  19.,   0.,
         19.,   0.,  17.,   0.,   3.,   0.,  11.,   0.,  13.,   0.,   0.,
         14.,   0.,  13.,   0.,  12.,   0.,   7.,   0.,  17.,   0.,   9.,
          0.,   7.,   0.,  13.,   0.,  18.,   0.,  12.,   0.,  13.,   0.,
          7.,   0.,  10.,   0.,   0.,  23.,   0.,  16.,   0.,  31.,   0.,
         31.,   0.,  25.,   0.,  26.,   0.,  26.,   0.,  30.,   0.,  34.,
          0.,  30.,   0.,  30.,   0.,  40.,   0.,   0.,  51.,   0.,  69.,
          0.,  81.,   0.,  95.,   0., 108.,   0., 121.,   0., 102.,   0.,
        130.,   0., 128.,   0., 141., 

### Image 2

In [12]:
bbox

Unnamed: 0.1,Unnamed: 0,Image Index,Finding Label,Bbox [x,y,w,h]
0,583,dicom_00023075_033.dcm,Mass,239.502222,535.077934,72.817778,65.991111
1,584,dicom_00029579_005.dcm,Mass,609.28,189.19349,73.955556,71.68
2,585,dicom_00013659_019.dcm,Mass,559.217778,167.575712,102.4,136.533333


In [13]:
dcm2 = pydicom.dcmread('dicom_00029579_005.dcm')

In [14]:
plt.imshow(dcm2.pixel_array, cmap='gray')

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7fb697a155d0>

In [15]:
plt.figure(figsize=(5,5))
plt.hist(dcm2.pixel_array.ravel(), bins=256)

<IPython.core.display.Javascript object>

(array([7.5257e+04, 3.2400e+03, 2.0070e+03, 1.6990e+03, 1.3230e+03,
        1.1500e+03, 1.0230e+03, 8.8300e+02, 8.8200e+02, 8.3800e+02,
        7.9000e+02, 6.8100e+02, 6.6600e+02, 6.4200e+02, 5.5100e+02,
        5.8200e+02, 5.9100e+02, 5.2500e+02, 5.5200e+02, 5.7700e+02,
        5.2600e+02, 5.6300e+02, 5.5100e+02, 5.4700e+02, 5.9500e+02,
        5.9300e+02, 6.0300e+02, 6.3800e+02, 6.5600e+02, 7.2000e+02,
        7.1500e+02, 7.8000e+02, 8.3400e+02, 9.1000e+02, 9.7000e+02,
        9.4900e+02, 1.0210e+03, 1.0620e+03, 1.1390e+03, 1.1460e+03,
        1.2310e+03, 1.2570e+03, 1.3500e+03, 1.2810e+03, 1.3580e+03,
        1.4710e+03, 1.4820e+03, 1.4660e+03, 1.5910e+03, 1.6740e+03,
        1.8090e+03, 1.7390e+03, 1.8070e+03, 1.8360e+03, 1.9610e+03,
        2.0430e+03, 1.9570e+03, 2.0190e+03, 1.9730e+03, 2.0210e+03,
        2.2260e+03, 2.4030e+03, 2.4010e+03, 2.6180e+03, 2.8080e+03,
        2.9310e+03, 3.3110e+03, 3.6500e+03, 3.8790e+03, 4.1520e+03,
        4.2900e+03, 4.5400e+03, 4.8760e+03, 5.25

In [17]:
mean2_intensity, std2_intensity = np.mean(dcm.pixel_array), np.std(dcm.pixel_array)
print('mean :',mean2_intensity,', std :',std2_intensity)

mean : 123.25588417053223 , std : 57.47256019573095


In [19]:
new_img2 = dcm2.pixel_array.copy()
new_img2 = (new_img - mean2_intensity)/std2_intensity

In [20]:
plt.figure(figsize=(5,5))
plt.hist(new_img2.ravel(), bins=256)

<IPython.core.display.Javascript object>

(array([3.455e+03, 2.969e+03, 2.961e+03, 2.387e+03, 2.990e+03, 3.522e+03,
        2.789e+03, 1.972e+03, 1.539e+03, 1.108e+03, 9.850e+02, 8.390e+02,
        9.200e+02, 8.530e+02, 8.690e+02, 9.220e+02, 9.840e+02, 9.930e+02,
        1.031e+03, 1.071e+03, 1.255e+03, 1.202e+03, 1.335e+03, 1.412e+03,
        1.592e+03, 1.835e+03, 1.862e+03, 2.141e+03, 2.182e+03, 2.248e+03,
        2.357e+03, 2.380e+03, 2.433e+03, 2.570e+03, 2.504e+03, 2.684e+03,
        2.679e+03, 2.814e+03, 2.777e+03, 2.970e+03, 3.013e+03, 3.145e+03,
        0.000e+00, 3.320e+03, 3.488e+03, 3.615e+03, 3.928e+03, 4.042e+03,
        4.177e+03, 4.408e+03, 4.334e+03, 4.363e+03, 4.550e+03, 4.525e+03,
        4.557e+03, 4.777e+03, 4.673e+03, 4.804e+03, 4.987e+03, 4.852e+03,
        4.824e+03, 4.876e+03, 4.877e+03, 4.940e+03, 5.113e+03, 5.074e+03,
        5.173e+03, 5.038e+03, 4.991e+03, 4.918e+03, 4.938e+03, 4.806e+03,
        4.755e+03, 4.764e+03, 4.647e+03, 4.530e+03, 4.712e+03, 4.671e+03,
        4.816e+03, 4.924e+03, 4.898e+0

In [21]:
bbox

Unnamed: 0.1,Unnamed: 0,Image Index,Finding Label,Bbox [x,y,w,h]
0,583,dicom_00023075_033.dcm,Mass,239.502222,535.077934,72.817778,65.991111
1,584,dicom_00029579_005.dcm,Mass,609.28,189.19349,73.955556,71.68
2,585,dicom_00013659_019.dcm,Mass,559.217778,167.575712,102.4,136.533333


In [22]:
plt.imshow(dcm2.pixel_array[189:(189+72),609:(609+74)], cmap='gray')

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7fb697323510>

In [31]:
plt.figure(figsize=(5,5))
plt.hist(new_img2[189:(189+72),609:(609+74)].ravel(), bins=256, color='green')

<IPython.core.display.Javascript object>

(array([  1.,   0.,   1.,   0.,   0.,   7.,   0.,   0.,   7.,   0.,   0.,
         17.,   0.,   0.,  17.,   0.,   0.,  11.,   0.,   0.,  31.,   0.,
          0.,  33.,   0.,   0.,  25.,   0.,   0.,  34.,   0.,   0.,  30.,
          0.,  40.,   0.,   0.,  50.,   0.,   0.,  49.,   0.,   0.,  63.,
          0.,   0.,  82.,   0.,   0.,  98.,   0.,   0., 112.,   0.,   0.,
        101.,   0.,   0., 106.,   0.,   0., 116.,   0.,   0., 127.,   0.,
        116.,   0.,   0., 126.,   0.,   0., 127.,   0.,   0., 120.,   0.,
          0., 113.,   0.,   0., 105.,   0.,   0.,  98.,   0.,   0.,  89.,
          0.,   0., 103.,   0.,   0., 109.,   0.,   0., 114.,   0., 119.,
          0.,   0., 133.,   0.,   0., 156.,   0.,   0., 154.,   0.,   0.,
        178.,   0.,   0., 185.,   0.,   0., 173.,   0.,   0., 187.,   0.,
          0., 152.,   0.,   0., 122.,   0., 105.,   0.,   0., 108.,   0.,
          0.,  85.,   0.,   0.,  82.,   0.,   0.,  67.,   0.,   0.,  82.,
          0.,   0.,  79.,   0.,   0., 

### Image 3

In [43]:
bbox

Unnamed: 0.1,Unnamed: 0,Image Index,Finding Label,Bbox [x,y,w,h]
0,583,dicom_00023075_033.dcm,Mass,239.502222,535.077934,72.817778,65.991111
1,584,dicom_00029579_005.dcm,Mass,609.28,189.19349,73.955556,71.68
2,585,dicom_00013659_019.dcm,Mass,559.217778,167.575712,102.4,136.533333


In [44]:
dcm3 = pydicom.dcmread('dicom_00013659_019.dcm')

In [45]:
plt.imshow(dcm3.pixel_array, cmap='gray')

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7fb6925c9810>

In [46]:
plt.figure(figsize=(5,5))
plt.hist(dcm3.pixel_array.ravel(), bins=256)

<IPython.core.display.Javascript object>

(array([2.6510e+03, 3.9000e+01, 1.9000e+01, 2.6000e+01, 1.9000e+01,
        7.0000e+01, 8.7000e+01, 1.2400e+02, 2.2100e+02, 1.1200e+02,
        1.8000e+02, 7.1200e+02, 9.5900e+02, 2.1810e+03, 2.2080e+03,
        2.3530e+03, 2.6470e+03, 3.4630e+03, 3.3090e+03, 1.4310e+03,
        9.7600e+02, 5.9000e+02, 4.4700e+02, 3.5900e+02, 3.4700e+02,
        3.5800e+02, 2.8100e+02, 2.9100e+02, 2.8700e+02, 2.5400e+02,
        2.9600e+02, 3.0000e+02, 3.0500e+02, 2.9800e+02, 2.9500e+02,
        3.4600e+02, 3.0700e+02, 3.4300e+02, 3.0400e+02, 3.4700e+02,
        3.8900e+02, 3.8900e+02, 3.7400e+02, 4.6300e+02, 4.6300e+02,
        4.5100e+02, 5.1500e+02, 4.8500e+02, 5.1800e+02, 5.4600e+02,
        4.9900e+02, 5.0200e+02, 5.7300e+02, 5.5600e+02, 6.2100e+02,
        6.2400e+02, 6.5500e+02, 6.0600e+02, 6.3500e+02, 6.8200e+02,
        7.0600e+02, 7.5000e+02, 7.5800e+02, 7.5000e+02, 7.3300e+02,
        7.7700e+02, 7.8600e+02, 8.0300e+02, 7.5200e+02, 8.0800e+02,
        8.3000e+02, 8.6300e+02, 8.9800e+02, 8.80

In [47]:
mean3_intensity, std3_intensity = np.mean(dcm3.pixel_array), np.std(dcm3.pixel_array)
print('mean :',mean3_intensity,', std :', std3_intensity)

mean : 155.60211944580078 , std : 43.776203799182035


In [48]:
img3 = dcm3.pixel_array.copy()
img3 = (img3 - mean3_intensity)/std3_intensity

In [49]:
plt.figure(figsize=(5,5))
plt.hist(img3.ravel(), bins=256)

<IPython.core.display.Javascript object>

(array([2.6510e+03, 3.9000e+01, 1.9000e+01, 2.6000e+01, 1.9000e+01,
        7.0000e+01, 8.7000e+01, 1.2400e+02, 2.2100e+02, 1.1200e+02,
        1.8000e+02, 7.1200e+02, 9.5900e+02, 2.1810e+03, 2.2080e+03,
        2.3530e+03, 2.6470e+03, 3.4630e+03, 3.3090e+03, 1.4310e+03,
        9.7600e+02, 5.9000e+02, 4.4700e+02, 3.5900e+02, 3.4700e+02,
        3.5800e+02, 2.8100e+02, 2.9100e+02, 2.8700e+02, 2.5400e+02,
        2.9600e+02, 3.0000e+02, 3.0500e+02, 2.9800e+02, 2.9500e+02,
        3.4600e+02, 3.0700e+02, 3.4300e+02, 3.0400e+02, 3.4700e+02,
        3.8900e+02, 3.8900e+02, 3.7400e+02, 4.6300e+02, 4.6300e+02,
        4.5100e+02, 5.1500e+02, 4.8500e+02, 5.1800e+02, 5.4600e+02,
        4.9900e+02, 5.0200e+02, 5.7300e+02, 5.5600e+02, 6.2100e+02,
        6.2400e+02, 6.5500e+02, 6.0600e+02, 6.3500e+02, 6.8200e+02,
        7.0600e+02, 7.5000e+02, 7.5800e+02, 7.5000e+02, 7.3300e+02,
        7.7700e+02, 7.8600e+02, 8.0300e+02, 7.5200e+02, 8.0800e+02,
        8.3000e+02, 8.6300e+02, 8.9800e+02, 8.80

In [50]:
bbox

Unnamed: 0.1,Unnamed: 0,Image Index,Finding Label,Bbox [x,y,w,h]
0,583,dicom_00023075_033.dcm,Mass,239.502222,535.077934,72.817778,65.991111
1,584,dicom_00029579_005.dcm,Mass,609.28,189.19349,73.955556,71.68
2,585,dicom_00013659_019.dcm,Mass,559.217778,167.575712,102.4,136.533333


In [51]:
plt.imshow(dcm3.pixel_array[168:(168+137),559:(559+102)], cmap='gray')

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7fb691ee2f50>

In [54]:
plt.figure(figsize=(5,5))
plt.hist(img3[168:(168+137),559:(559+102)].ravel(), bins=256, color='brown')

<IPython.core.display.Javascript object>

(array([  1.,   0.,   0.,   9.,   0.,   0.,   0.,   9.,   0.,   0.,   0.,
         34.,   0.,   0.,   0.,  58.,   0.,   0.,   0.,  61.,   0.,   0.,
          0.,  47.,   0.,   0.,   0.,  26.,   0.,   0.,   0.,  44.,   0.,
          0.,  46.,   0.,   0.,   0.,  45.,   0.,   0.,   0.,  40.,   0.,
          0.,   0.,  35.,   0.,   0.,   0.,  51.,   0.,   0.,   0.,  36.,
          0.,   0.,   0.,  34.,   0.,   0.,   0.,  35.,   0.,   0.,  36.,
          0.,   0.,   0.,  43.,   0.,   0.,   0.,  43.,   0.,   0.,   0.,
         40.,   0.,   0.,   0.,  42.,   0.,   0.,   0.,  28.,   0.,   0.,
          0.,  39.,   0.,   0.,   0.,  34.,   0.,   0.,  47.,   0.,   0.,
          0.,  69.,   0.,   0.,   0.,  72.,   0.,   0.,   0.,  75.,   0.,
          0.,   0., 100.,   0.,   0.,   0.,  84.,   0.,   0.,   0.,  94.,
          0.,   0.,   0.,  64.,   0.,   0.,   0.,  75.,   0.,   0.,  78.,
          0.,   0.,   0.,  91.,   0.,   0.,   0., 171.,   0.,   0.,   0.,
        200.,   0.,   0.,   0., 290., 

#### What do we understand from plotting histograms of image pixels intensity and mass pixels intensity? 

It tell us that the intensity values of the mass are higher than the image mean, but mostly fall within a single standard deviation of the image intensity values. This tell us that using some sort of thresholding mechanism to identify tumors in an image would probably _not_ be appropriate, because the mass's intensity values are not _that_ different from the rest of the image. 