# notepad

## Imports

The data will be processed using 

In [4]:
import os       # using operating system dependent functionality (folders)
import glob
import pandas as pd # data analysis and manipulation
import numpy as np    # numerical computing (manipulating and performing operations on arrays of data)
import copy     # Can Copy and Deepcopy files so original file is untouched.
from ipywidgets import IntSlider, Output
import ipywidgets as widgets
from IPython.display import display
import matplotlib.pyplot as plt
#import SimpleITK as sitk
import skimage
import hashlib
import sys
sys.path.insert(0, '../') # path to functions
from brainspin import file_handler as fh # 
from brainspin import mold #
from brainspin import carve
from brainspin.file_handler import Config

In [22]:

def hash_folder(origin_folder1, file_extension, made, force=False):
    """Hashing function to be used by command line.

    :param origin_folder1: The string of the folder with files to hash
    :type origin_folder1: str
    :param file_extension: File extension
    :type file_extension: str
    :param made: file directory where csv with hashes will be put
    :type made: str
    """
    filepath = os.path.join(made, 'hash_output.csv')
    df = hash_rash(origin_folder1, file_extension)
    if not force:
        if os.path.isfile(filepath):
            return
    try:
        os.makedirs(os.path.dirname(filepath))
    except FileExistsError:
        pass

    df.to_csv(filepath)

# def save_preprocessed(array, out_fname, force):
#     """
#     This function is written to be called by the cli module.
#     It stores arrays in a directory.
#     """
#     if not force:
#         if os.path.isfile(out_fname):
#             return
#     try:
#         os.makedirs(os.path.dirname(out_fname))
    # except FileExistsError:
    #     pass
    # np.save(out_fname, array, allow_pickle=False)


def hash_rash(origin_folder1, file_extension):
    """Hashing function to check files are not corrupted or to assure
    files are changed.

    :param origin_folder1: The string of the folder with files to hash
    :type origin_folder1: str
    :param file_extension: File extension
    :type file_extension: str

    :returns: Dataframe with hashes for what is in folder
    :rtype: ~pandas.DataFrame
    """
    hash_list = []
    file_names = []
    files = '**/*.' + file_extension
    
    non_suspects1 = glob.glob(
        os.path.join(origin_folder1, files),
        recursive=True,
    )
    # print(non_suspects1)
    BUF_SIZE = 65536
    for file in non_suspects1:
        sha256 = hashlib.sha256()
        with open(file, 'rb') as f:
            while True:
                data = f.read(BUF_SIZE)
                if not data:
                    break
                sha256.update(data)
        result = sha256.hexdigest()
        hash_list.append(result)
        file_names.append(file)
        #print(file_names)
    df = pd.DataFrame(hash_list, file_names)
    df.columns = ["hash"]
    df = df.reset_index()
    df = df.rename(columns={'index': 'file_name'})
    df.to_csv('out.csv')

    return df


In [23]:
output = hash_folder('../not_pushed', 'gz', 'hopi')

In [24]:
pd.read_csv('out.csv')

Unnamed: 0.1,Unnamed: 0,file_name,hash
0,0,../not_pushed\data_anonymized\EPAD\040EPAD0000...,1057b8d371d5816349ebef5d56a10fe28332dfccdaae47...
1,1,../not_pushed\data_anonymized\EPAD\040EPAD0000...,1312dfb271ebad0ad2ccbed73eb26a0d3c4e323efb4a9a...
2,2,../not_pushed\data_anonymized\EPAD\040EPAD0000...,ef1be36f689e5710ca5c2a7f91251a335c363a42c8d38b...
3,3,../not_pushed\data_anonymized\HCP\HCA6061757\T...,25cc6080428b99883557f28c4ba0c31eac5eafb1bed6c5...
4,4,../not_pushed\data_anonymized\HCP\HCA6061757\T...,ba2b90d8203e5918117906208944e6778e3f1290cbf549...
5,5,../not_pushed\data_anonymized\Insight46\sub-19...,288278eab96a1b1a8f7a76380d2ab4a32e2cdd93cc685f...
6,6,../not_pushed\data_anonymized\Insight46\sub-19...,d3f6f6ff76b9f8cd292c4ff14b6e1bdd552baceb976c80...
7,7,../not_pushed\data_anonymized\Insight46\sub-19...,647db82b345573f4d6123a11aab0033edd61e267607a21...
8,8,../not_pushed\data_anonymized\Insight46\sub-19...,5a696e998b28cc31eda6b7dab8816e7746bad923f93ccd...
9,9,../not_pushed\data_anonymized\Insight46\sub-19...,f1531b68cb3c5f8038978ea7a90e62f276e0952b88226a...


In [None]:
file_directory = '../not_pushed'
file_directory_list = glob.glob(
    os.path.join(file_directory, '**/*.tsv'),
    recursive=True,
    )
for file in file_directory_list:
            print(file)

In [None]:
# non_suspects1 = glob.glob(os.path.join('../not_pushed','**/*.tsv' ))
# print(non_suspects1)
for file in files:
        if file.endswith('.tsv'):
            print(file)

## Load files

Use the config pathways for the different datasets

In [None]:
config = Config()
root_mri_directory = config.get_directory('root_mri_directory')

In [None]:
tsv_pattern = os.path.join(root_mri_directory, '**/*.tsv')
tsv_files = glob.glob(tsv_pattern, recursive=True)


In [None]:
tsv_files

In [None]:
dataframe_example = pd.read_csv(tsv_files[0], sep='\t')

In [None]:
dataframe_example

In [None]:
mri_pattern = os.path.join(root_mri_directory, '**/*.gz')
gz_files = glob.glob(mri_pattern, recursive=True)

In [None]:
gz_files

In [None]:
# here we can use something not in the base environment just to check that these files exist correctly



# A path to an mrid brain .nii image:
t1_fn = gz_files[0]

# Read the .nii image containing the volume with SimpleITK:
sitk_t1 = sitk.ReadImage(t1_fn)

# and access the numpy array:
t1 = sitk.GetArrayFromImage(sitk_t1)

# now display it

In [None]:
t1.shape

In [None]:
# import plotly
# import plotly.express as px


# fig = px.imshow(
#     t1,
#     facet_col=1,
#     animation_frame=0,
#     binary_string=True,
#     binary_format="jpg",
# )
# fig.layout.annotations[0]["text"] = "Something0"
# fig.layout.annotations[1]["text"] = "Something2"
# plotly.io.show(fig)

In [None]:
sitk.Show(sitk_t1, debugOn=True)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
data = t1
z, x, y = data.nonzero()
ax.scatter(x, y, z, c=z, alpha=1)
plt.show()