In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import pickle
import os.path as path
plt.rcParams['figure.figsize'] = (10.0, 8.0)

'\nInterpolating to a constant wavenumber step (lowest as possible)\nMake all images the same wavelength.\n'

In [2]:
filename_lst = []
file_location = '../data/Raman_Mouse/'
for i, file in enumerate(glob.glob(f'{file_location}raw/*')):
    filename_lst.append(file.split('/')[-1].split('.')[0] + '.npy')
    print(i, filename_lst[-1])

    if path.exists(f'{file_location}{file.split("/")[-1].split(".")[0]}.npy') and i != 0:
        continue
    
    df = pd.read_csv(file, delimiter='\t', skipinitialspace=True, header=0, skiprows=[])
    data = df.to_numpy()[:,:4]

    if i == 0:
        X = np.array(sorted(list(np.unique(data[:,0]))))
        Y = np.array(sorted(list(np.unique(data[:,1]))))
        X_0 = X - X[0]
        Y_0 = Y - Y[0]
        Wavelength = sorted(list(np.unique(data[:,2])))
        np.save(f'{file_location}X_coordinates', X) 
        np.save(f'{file_location}Y_coordinates', Y)
    else:
        try:
            X = np.array(sorted(list(np.unique(data[:,0]))))
            Y = np.array(sorted(list(np.unique(data[:,1]))))
            x_0 = X - X[0]
            y_0 = Y - Y[0]
            assert (X_0 == x_0).all(), f"X is not equal for file {file}"
            assert (Y_0 == y_0).all(), f"Y is not equal for file {file}"
        except AssertionError:
            print("X difference:", X_0 - x_0)
            print("Y difference:", Y_0 - y_0)
        except AttributeError:
            print("Corrupted File")
            continue

    if i==0 and path.exists(f'{file_location}{file.split("/")[-1].split(".")[0]}.npy'):
        continue

    wavelength = sorted(list(np.unique(data[:,2])))
    np.save(f'{file_location}{file.split("/")[-1].split(".")[0]}_wavelength', wavelength) 
    
    # detected if a point is missing (the value is than -100)
    img = np.zeros((len(X), len(Y), len(wavelength)), dtype=np.float64) - 100

    for d in data:
        i = list(X).index(d[0])
        j = list(Y).index(d[1])
        w = wavelength.index(d[2])
        img[i,j,w] = d[3]
    np.save(f'{file_location}{file.split("/")[-1].split(".")[0]}', img) 

np.save(f'{file_location}FileNames', filename_lst)

0 raw\Kidney_map_150z25_60s_#35.npy
1 raw\Kidney_map_150z25_60s_#36.npy
2 raw\Kidney_map_150z25_60s_#37.npy
3 raw\Kidney_map_150z25_60s_#38.npy
4 raw\Kidney_map_150z25_60s_#39.npy
5 raw\Kidney_map_150z25_60s_#40.npy
6 raw\Kidney_map_150z25_60s_#41.npy
7 raw\Kidney_map_150z25_60s_#42.npy
8 raw\Kidney_map_150z25_60s_#43.npy
9 raw\Kidney_map_150z25_60s_#44.npy
10 raw\Kidney_map_150z25_60s_#45.npy
11 raw\Kidney_map_150z25_60s_#46.npy
12 raw\Kidney_map_150z25_60s_#47.npy
13 raw\Kidney_map_150z25_60s_#48.npy
14 raw\Kidney_map_150z25_60s_#49.npy
15 raw\Kidney_map_150z25_60s_#50.npy
16 raw\Kidney_map_150z25_60s_#51.npy
17 raw\Liver_map_150x25_60s_#10.npy
18 raw\Liver_map_150x25_60s_#13.npy
19 raw\Liver_map_150x25_60s_#7.npy
20 raw\Liver_map_150x25_60s_#9.npy
21 raw\Liver_map_150z25_60s_#01.npy
22 raw\Liver_map_150z25_60s_#03.npy
23 raw\Liver_map_150z25_60s_#05.npy
24 raw\Liver_map_150z25_60s_#11.npy
25 raw\Liver_map_150z25_60s_#12.npy
26 raw\Liver_map_150z25_60s_#14.npy
27 raw\Liver_map_150z25

In [4]:
filename_lst = np.load(f"{file_location}FileNames.npy")
df = pd.read_csv('../data/Raman_Mouse/Sample_labels.csv', delimiter=',', skipinitialspace=True, header=0, skiprows=[])
# only need which sample, gender, tissue type and age.
data = df.to_numpy()[:,[0,2,4,6]]

In [6]:
tissue_dict = {'Liver' : [1,0,0], 'Muscle' : [0,1,0], 'Kidney' : [0,0,1]}
labels = {}
for file in filename_lst:
    try:
        specimen_number = int(file.split("#")[1][:2])
    except ValueError:
        specimen_number = int(file.split("#")[1][0])
    index = list(data[:,0]).index(specimen_number)
    V_M = int(data[index, 1] == 'M')
    L_M_K = tissue_dict[data[index, 2]] 
    
    labels[f"{file.split('.')[0]}.npy"] = (V_M, *L_M_K, data[index, 3])

with open('../data/Raman_Mouse/Sample_labels.pickle', 'wb') as f:
    pickle.dump(labels, f)