In [2]:
import os
import numpy as np
import pandas as pd
import SimpleITK as sitk
import matplotlib.pyplot as plt

In [3]:
def plot_hists(images1, images2=None):
    plt.hist(images1.ravel(), bins=100, density=True, color='b', alpha=1 if images2 is None else 0.5)
    if images2 is not None:
        plt.hist(images2.ravel(), bins=100, density=True, alpha=0.5, color='orange')
    plt.show()

In [4]:
def window_transform(ct_array, windowWidth, windowCenter, normal=False):
	"""
    return: trucated image according to window center and window width
    and normalized to [0,1]
    """
	minWindow = float(windowCenter) - 0.5*float(windowWidth)
	# print(minWindow)
	newimg = (ct_array - minWindow) / float(windowWidth)
	newimg[newimg < 0] = 0
	newimg[newimg > 1] = 1
	if not normal:
		newimg = (newimg * 255).astype('uint8')
	return newimg

In [21]:
# 肺窗调整到[-1250, 250]

data_path = "/zhangtong/org_dataset/MosMed/"
ct_path = os.path.join(data_path, "ct_scans")
infection_path = os.path.join(data_path, "infection_masks")

In [22]:
data_dict = {}
image_save_path = "/zhangtong/dealed_dataset/mosmed_m/ct_scans/"
infection_mask_path = "/zhangtong/dealed_dataset/mosmed_m/infection_mask/"

for file in os.listdir(ct_path):
    if file == "ReadMe.txt":
        continue
    filename = file.split(".")
    ct = sitk.ReadImage(os.path.join(ct_path, file))
    ct_array = sitk.GetArrayFromImage(ct)
    ct_array = window_transform(ct_array * 1.0, 1500, -500)
    print(infection_mask_path)
    infection_mask = sitk.ReadImage(os.path.join(infection_path, "{}_mask.nii.gz".format(filename[0])))
    infection_mask_arr = sitk.GetArrayFromImage(infection_mask)
#     print(filename)
    print("start process {}".format(filename))
    data_list = []
    for i in range(ct_array.shape[0]):
        if np.max(infection_mask_arr[i]) > 0:
            image_path = os.path.join(image_save_path, "{}_{}.npy".format(filename[0], i))
            infected_path = os.path.join(infection_mask_path, "{}_{}.npy".format(filename[0], i))
#             print(image_path)
#             print(infection_path)
            np.save(image_path, ct_array[i])
            np.save(infected_path, infection_mask_arr[i])
            
            data_list.append({
                "image_path": image_path,
                "infection_mask": infected_path,
            })
        else:
            continue
    data_dict[filename[0]] = data_list
#     break

/zhangtong/dealed_dataset/mosmed_m/infection_mask/
start process ['study_0295', 'nii', 'gz']
/zhangtong/dealed_dataset/mosmed_m/infection_mask/
start process ['study_0297', 'nii', 'gz']
/zhangtong/dealed_dataset/mosmed_m/infection_mask/
start process ['study_0280', 'nii', 'gz']
/zhangtong/dealed_dataset/mosmed_m/infection_mask/
start process ['study_0272', 'nii', 'gz']
/zhangtong/dealed_dataset/mosmed_m/infection_mask/
start process ['study_0265', 'nii', 'gz']
/zhangtong/dealed_dataset/mosmed_m/infection_mask/
start process ['study_0271', 'nii', 'gz']
/zhangtong/dealed_dataset/mosmed_m/infection_mask/
start process ['study_0281', 'nii', 'gz']
/zhangtong/dealed_dataset/mosmed_m/infection_mask/
start process ['study_0303', 'nii', 'gz']
/zhangtong/dealed_dataset/mosmed_m/infection_mask/
start process ['study_0257', 'nii', 'gz']
/zhangtong/dealed_dataset/mosmed_m/infection_mask/
start process ['study_0282', 'nii', 'gz']
/zhangtong/dealed_dataset/mosmed_m/infection_mask/
start process ['stu

In [23]:
import json
with open("mosmed_data.json", 'w', encoding='utf-8') as f:
    f.write(json.dumps(data_dict))

In [24]:
key_list = list(data_dict.keys())

In [25]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(key_list, test_size=0.2, random_state=101)
train, valid = train_test_split(train, test_size=0.1, random_state=1)
print(train)
print(valid)
print(test)

['study_0271', 'study_0293', 'study_0268', 'study_0286', 'study_0280', 'study_0267', 'study_0275', 'study_0261', 'study_0265', 'study_0281', 'study_0282', 'study_0295', 'study_0270', 'study_0263', 'study_0296', 'study_0288', 'study_0257', 'study_0290', 'study_0255', 'study_0260', 'study_0283', 'study_0302', 'study_0276', 'study_0259', 'study_0300', 'study_0273', 'study_0256', 'study_0298', 'study_0277', 'study_0269', 'study_0272', 'study_0266', 'study_0289', 'study_0264', 'study_0278', 'study_0274']
['study_0303', 'study_0279', 'study_0292', 'study_0294']
['study_0299', 'study_0262', 'study_0285', 'study_0291', 'study_0258', 'study_0297', 'study_0301', 'study_0284', 'study_0304', 'study_0287']


In [27]:
train_data = []
for item in train:
    train_data += data_dict[item]
with open("mosmed_train_m.json", "w", encoding="utf-8") as f:
    f.write(json.dumps(train_data))

In [28]:
valid_data = []
for item in valid:
    valid_data += data_dict[item]
with open("mosmed_valid_m.json", "w", encoding="utf-8") as f:
    f.write(json.dumps(valid_data))

In [29]:
test_data = []
for item in test:
    test_data += data_dict[item]
with open("mosmed_test_m.json", "w", encoding="utf-8") as f:
    f.write(json.dumps(test_data))