# Lung Segmentation data preparation

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from glob import glob
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

In [None]:
os.chdir('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project')

In [None]:
pwd

'/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project'

In [None]:
path = '/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/'

# Loading and Resizing data

In [None]:
from natsort import natsorted

def load_data(path):
  x_train = []
  y_train = []

  x_valid = []
  y_valid = []

  x_test = []
  y_test = []

  images = natsorted(glob(f"{path}/CXR_png/*.png"))
  masks = natsorted(glob(f"{path}/mask/*.png"))

  train_x, valid_x = train_test_split(images, test_size=0.3, random_state=1)
  train_y, valid_y = train_test_split(masks, test_size=0.3, random_state=1)

  valid_x, test_x = train_test_split(valid_x, test_size=0.33, random_state=1)
  valid_y, test_y = train_test_split(valid_y, test_size=0.33, random_state=1)

  for i in train_x:
    image = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (256, 256), interpolation = cv2.INTER_AREA)
    image = np.array(image)
    x_train.append(image)
 
  for i in train_y:
    mask = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
    mask = cv2.resize(mask, (256, 256), interpolation = cv2.INTER_AREA)
    mask = np.array(mask)
    y_train.append(mask)

  for i in valid_x:
    image = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (256, 256), interpolation = cv2.INTER_AREA)
    image = np.array(image)
    x_valid.append(image)
 
  for i in valid_y:
    mask = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
    mask = cv2.resize(mask, (256, 256), interpolation = cv2.INTER_AREA)
    mask = np.array(mask)
    y_valid.append(mask) 

  for i in test_x:
    image = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (256, 256), interpolation = cv2.INTER_AREA)
    image = np.array(image)
    x_test.append(image)
 
  for i in test_y:
    mask = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
    mask = cv2.resize(mask, (256, 256), interpolation = cv2.INTER_AREA)
    mask = np.array(mask)
    y_test.append(mask) 

  #print(len(train_x), len(train_y))
  #print(len(valid_x), len(valid_y))

  return (np.array(x_train), np.array(y_train)), (np.array(x_valid), np.array(y_valid)), (np.array(x_test), np.array(y_test))

In [None]:
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_data(path)

In [None]:
train_x.shape

(396, 256, 256)

In [None]:
valid_x.shape

(113, 256, 256)

In [None]:
test_x.shape

(57, 256, 256)

In [None]:
train_x=np.expand_dims(train_x, axis=-1)
train_y=np.expand_dims(train_y, axis=-1)
valid_x=np.expand_dims(valid_x, axis=-1)
valid_y=np.expand_dims(valid_y, axis=-1)
test_x=np.expand_dims(test_x, axis=-1)
test_y=np.expand_dims(test_y, axis=-1)

In [None]:
print(train_x.shape)
print(train_y.shape)
print(valid_x.shape)
print(valid_y.shape)
print(test_x.shape)
print(test_y.shape)

(396, 256, 256, 1)
(396, 256, 256, 1)
(113, 256, 256, 1)
(113, 256, 256, 1)
(57, 256, 256, 1)
(57, 256, 256, 1)


# Shuffling data

In [None]:
from sklearn.utils import shuffle

train_x, train_y = shuffle(train_x, train_y)
valid_x, valid_y = shuffle(valid_x, valid_y)
test_x, test_y = shuffle(test_x, test_y)

# Saving shuffled numpy data

In [None]:
np.save('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/train_x', train_x)
np.save('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/train_y', train_y)

np.save('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/valid_x', valid_x)
np.save('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/valid_y', valid_y)

np.save('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/test_x', test_x)
np.save('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/test_y', test_y)

In [None]:
train_x1 = np.load('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/train_x.npy')
train_y1 = np.load('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/train_y.npy')

valid_x1 = np.load('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/valid_x.npy')
valid_y1 = np.load('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/valid_y.npy')

test_x1 = np.load('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/test_x.npy')
test_y1 = np.load('/content/drive/My Drive/Colab Notebooks/Lung_disease_mini_project/test_y.npy')

# Loading numpy data

In [None]:
print(train_x1.shape)
print(train_y1.shape)
print(valid_x1.shape)
print(valid_y1.shape)
print(test_x1.shape)
print(test_y1.shape)

(396, 256, 256, 1)
(396, 256, 256, 1)
(113, 256, 256, 1)
(113, 256, 256, 1)
(57, 256, 256, 1)
(57, 256, 256, 1)
