In [33]:
from scipy.io import loadmat
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import mode
import random
from scipy.ndimage import zoom, rotate
import os
import pickle
from tqdm import tqdm

In [34]:
def pickle_dataset(zoom_factor=1):
    if not os.path.exists("pickles"):
        os.mkdir("pickles")

    for pi in range(9):
        print(pi)

        pi_set = []
        mat_i = loadmat(f"mats/data-science-P{pi+1}.mat")
        coord_to_col = mat_i["meta"][0][0][8]
        data = mat_i["data"]
        info = mat_i["info"][0]

        for i in tqdm(range(359)):
            datum = data[i][0][0]

            scan = np.zeros((23, 61, 51))
            for x in range(51):
                for y in range(61):
                    for z in range(23):
                        scan[z, y, x] = datum[coord_to_col[x, y, z] - 1]

            mask = scan != mode(scan.flat).mode

            masked_scan = scan[mask]
            mask_mean, mask_std = np.mean(masked_scan), np.std(masked_scan)
            lt_mask, gt_mask = masked_scan < mask_mean, masked_scan > mask_mean
            std_left = np.sqrt(np.sum(np.square(masked_scan[lt_mask] - mask_mean)) / np.size(masked_scan[lt_mask]))
            std_right = np.sqrt(np.sum(np.square(masked_scan[gt_mask] - mask_mean)) / np.size(masked_scan[gt_mask]))

            scan[mask] -= mask_mean
            scan[mask][scan[mask] < mask_mean] /= std_left
            scan[mask][scan[mask] > mask_mean] /= std_right

            n = 3.5
            scan[scan > n * mask_std] = n * mask_std
            scan[scan < -n * mask_std] = -n * mask_std

            scan[scan == mode(scan.flat).mode] = scan[mask].min()
            scan = (scan - scan.min()) / (scan.max() - scan.min())
            scan = np.pad(scan, 5)

            if zoom_factor > 1:
                scan = zoom(scan, zoom_factor, order=1)
            angles = np.random.randint(low=-5, high=5, size=(3, ))
            scan = rotate(scan, angles[0], (0, 1), reshape=False)
            scan = rotate(scan, angles[1], (1, 2), reshape=False)
            scan = rotate(scan, angles[2], (2, 0), reshape=False)

            pi_set.append((scan, info[i][2][0]))

        with open(f"pickles/{i}_zoom-{zoom_factor}.pkl", "wb") as f:
            pickle.dump(pi_set, f)
        
        
            # w = 6
            # fig, ax = plt.subplots(w, w, constrained_layout=True)
            # fig.dpi = 100
            # bg_color = (225 / 255, 216 / 255, 226 / 255)
            # fig.set_facecolor(bg_color)
            
            # for j in range(w * w):
            #     ax[(j - j % w) // w, j % w].imshow(scan[j % 33], vmin=0, vmax=1, cmap="twilight")
            #     ax[(j - j % w) // w, j % w].set_xticks([])
            #     ax[(j - j % w) // w, j % w].set_yticks([])
            #     plt.setp(ax[(j - j % w) // w, j % w].spines.values(), color=bg_color)
            # plt.show()

pickle_dataset()

0


100%|██████████| 359/359 [01:04<00:00,  5.54it/s]


1


100%|██████████| 359/359 [01:03<00:00,  5.64it/s]


KeyboardInterrupt: 